From fe06e2116b4a2c7fe08f88720973916932f84fcc Mon Sep 17 00:00:00 2001 From: Roope Astala Date: Mon, 17 Sep 2018 15:51:23 -0400 Subject: [PATCH] Update notebooks --- 00.configuration.ipynb | 245 + .../01.train-within-notebook.ipynb | 193 +- .../01.train-within-notebook/score.py | 27 + .../02.train-on-local/02.train-on-local.ipynb | 69 +- .../02.train-on-local/train.py | 0 .../03.train-on-aci/03.train-on-aci.ipynb | 325 ++ .../04.train-on-remote-vm.ipynb | 0 .../04.train-on-remote-vm/train.py | 0 .../05.train-in-spark/05.train-in-spark.ipynb | 0 .../05.train-in-spark/iris.csv | 0 .../05.train-in-spark/train-spark.py | 0 ...7.hyperdrive-with-sklearn-checkpoint.ipynb | 4031 +++++++++++++++++ .../07.hyperdrive-with-sklearn.ipynb | 15 +- .../diabetes_sklearn.py | 0 ...yperdrive-with-TensorFlow-checkpoint.ipynb | 3448 ++++++++++++++ .../08.hyperdrive-with-TensorFlow.ipynb | 14 +- .../mnist_test_images/img_1.jpg | Bin .../mnist_test_images/img_10.jpg | Bin .../mnist_test_images/img_2.jpg | Bin .../mnist_test_images/img_3.jpg | Bin .../mnist_test_images/img_4.jpg | Bin .../mnist_test_images/img_5.jpg | Bin .../mnist_test_images/img_6.jpg | Bin .../mnist_test_images/img_7.jpg | Bin .../mnist_test_images/img_8.jpg | Bin .../mnist_test_images/img_9.jpg | Bin .../tf_mnist_score.py | 0 .../tf_mnist_train.py | 0 ...er-model-create-image-deploy-service.ipynb | 0 .../sklearn_regression_model.pkl | Bin .../11.production-deploy-to-aks.ipynb | 0 .../sklearn_regression_model.pkl | Bin ...le-data-collection-for-models-in-aks.ipynb | 0 .../sklearn_regression_model.pkl | Bin automl/00.configuration.ipynb | 288 ++ automl/01.auto-ml-classification.ipynb | 406 ++ automl/02.auto-ml-regression.ipynb | 432 ++ automl/03.auto-ml-remote-execution.ipynb | 530 +++ automl/03b.auto-ml-remote-batchai.ipynb | 609 +++ ...uto-ml-remote-batchai-compute-target.ipynb | 527 +++ ...emote-execution-text-data-blob-store.ipynb | 545 +++ ...ing-data-Blacklist-Early-Termination.ipynb | 399 ++ ....auto-ml-sparse-data-custom-cv-split.ipynb | 424 ++ .../07.auto-ml-exploring-previous-runs.ipynb | 329 ++ ...ote-execution-with-text-file-on-DSVM.ipynb | 541 +++ ...to-ml-classification-with-deployment.ipynb | 480 ++ automl/10.auto-ml-multi-output-example.ipynb | 286 ++ automl/11.auto-ml-sample-weight.ipynb | 262 ++ ...l-retrieve-the-training-sdk-versions.ipynb | 241 + automl/13.auto-ml-dataprep.ipynb | 570 +++ automl/README.md | 230 + automl/automl_env.yml | 22 + automl/automl_setup.cmd | 42 + automl/automl_setup_linux.sh | 34 + automl/automl_setup_mac.sh | 35 + tutorials/01.train-models.ipynb | 2 +- tutorials/02.deploy-models.ipynb | 43 +- 57 files changed, 15505 insertions(+), 139 deletions(-) create mode 100644 00.configuration.ipynb rename {00.Getting Started => 01.getting-started}/01.train-within-notebook/01.train-within-notebook.ipynb (80%) create mode 100644 01.getting-started/01.train-within-notebook/score.py rename {00.Getting Started => 01.getting-started}/02.train-on-local/02.train-on-local.ipynb (84%) rename {00.Getting Started => 01.getting-started}/02.train-on-local/train.py (100%) create mode 100644 01.getting-started/03.train-on-aci/03.train-on-aci.ipynb rename {00.Getting Started => 01.getting-started}/04.train-on-remote-vm/04.train-on-remote-vm.ipynb (100%) rename {00.Getting Started => 01.getting-started}/04.train-on-remote-vm/train.py (100%) rename {00.Getting Started => 01.getting-started}/05.train-in-spark/05.train-in-spark.ipynb (100%) rename {00.Getting Started => 01.getting-started}/05.train-in-spark/iris.csv (100%) rename {00.Getting Started => 01.getting-started}/05.train-in-spark/train-spark.py (100%) create mode 100644 01.getting-started/07.hyperdrive-with-sklearn/.ipynb_checkpoints/07.hyperdrive-with-sklearn-checkpoint.ipynb rename {00.Getting Started => 01.getting-started}/07.hyperdrive-with-sklearn/07.hyperdrive-with-sklearn.ipynb (99%) rename {00.Getting Started => 01.getting-started}/07.hyperdrive-with-sklearn/diabetes_sklearn.py (100%) create mode 100644 01.getting-started/08.hyperdrive-with-TensorFlow/.ipynb_checkpoints/08.hyperdrive-with-TensorFlow-checkpoint.ipynb rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/08.hyperdrive-with-TensorFlow.ipynb (99%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/mnist_test_images/img_1.jpg (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/mnist_test_images/img_10.jpg (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/mnist_test_images/img_2.jpg (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/mnist_test_images/img_3.jpg (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/mnist_test_images/img_4.jpg (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/mnist_test_images/img_5.jpg (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/mnist_test_images/img_6.jpg (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/mnist_test_images/img_7.jpg (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/mnist_test_images/img_8.jpg (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/mnist_test_images/img_9.jpg (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/tf_mnist_score.py (100%) rename {00.Getting Started => 01.getting-started}/08.hyperdrive-with-TensorFlow/tf_mnist_train.py (100%) rename {00.Getting Started => 01.getting-started}/10.register-model-create-image-deploy-service/10.register-model-create-image-deploy-service.ipynb (100%) rename {00.Getting Started => 01.getting-started}/10.register-model-create-image-deploy-service/sklearn_regression_model.pkl (100%) rename {00.Getting Started => 01.getting-started}/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb (100%) rename {00.Getting Started => 01.getting-started}/11.production-deploy-to-aks/sklearn_regression_model.pkl (100%) rename {00.Getting Started => 01.getting-started}/12.enable-data-collection-for-models-in-aks/12.enable-data-collection-for-models-in-aks.ipynb (100%) rename {00.Getting Started => 01.getting-started}/12.enable-data-collection-for-models-in-aks/sklearn_regression_model.pkl (100%) create mode 100644 automl/00.configuration.ipynb create mode 100644 automl/01.auto-ml-classification.ipynb create mode 100644 automl/02.auto-ml-regression.ipynb create mode 100644 automl/03.auto-ml-remote-execution.ipynb create mode 100644 automl/03b.auto-ml-remote-batchai.ipynb create mode 100644 automl/03c.auto-ml-remote-batchai-compute-target.ipynb create mode 100644 automl/04.auto-ml-remote-execution-text-data-blob-store.ipynb create mode 100644 automl/05.auto-ml-missing-data-Blacklist-Early-Termination.ipynb create mode 100644 automl/06.auto-ml-sparse-data-custom-cv-split.ipynb create mode 100644 automl/07.auto-ml-exploring-previous-runs.ipynb create mode 100644 automl/08.auto-ml-remote-execution-with-text-file-on-DSVM.ipynb create mode 100644 automl/09.auto-ml-classification-with-deployment.ipynb create mode 100644 automl/10.auto-ml-multi-output-example.ipynb create mode 100644 automl/11.auto-ml-sample-weight.ipynb create mode 100644 automl/12.auto-ml-retrieve-the-training-sdk-versions.ipynb create mode 100644 automl/13.auto-ml-dataprep.ipynb create mode 100644 automl/README.md create mode 100644 automl/automl_env.yml create mode 100644 automl/automl_setup.cmd create mode 100644 automl/automl_setup_linux.sh create mode 100644 automl/automl_setup_mac.sh diff --git a/00.configuration.ipynb b/00.configuration.ipynb new file mode 100644 index 000000000..959fa1ae9 --- /dev/null +++ b/00.configuration.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 00. Installation and configuration\n", + "\n", + "## Prerequisites:\n", + "\n", + "### 1. Install Azure ML SDK\n", + "Follow [SDK installation instructions](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment).\n", + "\n", + "### 2. Install some additional packages\n", + "This Notebook requires some additional libraries. In the conda environment, run below commands: \n", + "```shell\n", + "(myenv) $ conda install -y matplotlib tqdm scikit-learn\n", + "```\n", + "\n", + "### 3. Make sure your subscription is registered to use ACI.\n", + "This Notebook makes use of Azure Container Instance (ACI). You need to ensure your subscription has been registered to use ACI in order be able to deploy a dev/test web service.\n", + "```shell\n", + "# check to see if ACI is already registered\n", + "(myenv) $ az provider show -n Microsoft.ContainerInstance -o table\n", + "\n", + "# if ACI is not registered, run this command.\n", + "# note you need to be the subscription owner in order to execute this command successfully.\n", + "(myenv) $ az provider register -n Microsoft.ContainerInstance\n", + "```\n", + "\n", + "In this example you will optionally create an Azure Machine Learning Workspace and initialize your notebook directory to easily use this workspace. Typically you will only need to run this once per notebook directory, and all other notebooks in this directory or any sub-directories will automatically use the settings you indicate here.\n", + "\n", + "This notebook also contains optional cells to install and update the require Azure Machine Learning libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "install" + ] + }, + "outputs": [], + "source": [ + "# Check core SDK version number for debugging purposes\n", + "import azureml.core\n", + "\n", + "print(\"SDK Version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize an Azure ML Workspace\n", + "### What is an Azure ML Workspace and why do I need one?\n", + "\n", + "An AML Workspace is an Azure resource that organaizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an AML Workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, operationalization, and the monitoring of operationalized models.\n", + "\n", + "### What do I need\n", + "\n", + "In order to use an AML Workspace, first you need access to an Azure Subscription. You can [create your own](https://azure.microsoft.com/en-us/free/) or get your existing subscription information from the [Azure portal](https://portal.azure.com). Inside your subscription, you will need access to a _resource group_, which organizes Azure resources and provides a default region for the resources in a group. You can see what resource groups to which you have access, or create a new one in the [Azure portal](https://portal.azure.com)\n", + "\n", + "You can also easily create a new resource group using azure-cli.\n", + "\n", + "```sh\n", + "(myenv) $ az group create -n my_resource_group -l eastus2\n", + "```\n", + "\n", + "To create or access an Azure ML Workspace, you will need to import the AML library and the following information:\n", + "* A name for your workspace\n", + "* Your subscription id\n", + "* The resource group name " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Supported Azure Regions\n", + "Please specify the Azure subscription Id, resource group name, workspace name, and the region in which you want to create the workspace, for example \"eastus2\". " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "subscription_id = os.environ.get(\"SUBSCRIPTION_ID\", \"\")\n", + "resource_group = os.environ.get(\"RESOURCE_GROUP\", \"\")\n", + "workspace_name = os.environ.get(\"WORKSPACE_NAME\", \"\")\n", + "workspace_region = os.environ.get(\"WORKSPACE_REGION\", \"eastus2\") # or eastus2euap" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating a workspace\n", + "If you already have access to an AML Workspace you want to use, you can skip this cell. Otherwise, this cell will create an AML workspace for you in a subscription provided you have the correct permissions.\n", + "\n", + "This will fail when:\n", + "1. You do not have permission to create a workspace in the resource group\n", + "2. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this subscription\n", + "\n", + "If workspace creation fails, please work with your IT admin to provide you with the appropriate permissions or to provision the required resources." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "# import the Workspace class and check the azureml SDK version\n", + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.create(name = workspace_name,\n", + " subscription_id = subscription_id,\n", + " resource_group = resource_group, \n", + " location = workspace_region,\n", + " exist_ok = True)\n", + "ws.get_details()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configuring your local environment\n", + "You can validate that you have access to the specified workspace and write a configuration file to the default configuration location, `./aml_config/config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "ws = Workspace(workspace_name = workspace_name,\n", + " subscription_id = subscription_id,\n", + " resource_group = resource_group)\n", + "\n", + "# persist the subscription id, resource group name, and workspace name in aml_config/config.json.\n", + "ws.write_config()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can then load the workspace from this config file from any notebook in the current directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "# load workspace configuratio from ./aml_config/config.json file.ß\n", + "my_workspace = Workspace.from_config()\n", + "my_workspace.get_details()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a folder to host all sample projects\n", + "Lastly, create a folder where all the sample projects will be hosted." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "sample_projects_folder = './sample_projects'\n", + "\n", + "if not os.path.isdir(sample_projects_folder):\n", + " os.mkdir(sample_projects_folder)\n", + " \n", + "print('Sample projects will be created in {}.'.format(sample_projects_folder))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Success!\n", + "Great, you are ready to move on to the rest of the sample notebooks." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/01.train-within-notebook/01.train-within-notebook.ipynb b/01.getting-started/01.train-within-notebook/01.train-within-notebook.ipynb similarity index 80% rename from 00.Getting Started/01.train-within-notebook/01.train-within-notebook.ipynb rename to 01.getting-started/01.train-within-notebook/01.train-within-notebook.ipynb index 9dabc23c6..566dfea14 100644 --- a/00.Getting Started/01.train-within-notebook/01.train-within-notebook.ipynb +++ b/01.getting-started/01.train-within-notebook/01.train-within-notebook.ipynb @@ -109,7 +109,7 @@ "print('Workspace name: ' + ws.name, \n", " 'Azure region: ' + ws.location, \n", " 'Subscription id: ' + ws.subscription_id, \n", - " 'Resource group: ' + ws.resource_group, sep = '\\n')" + " 'Resource group: ' + ws.resource_group, sep='\\n')" ] }, { @@ -151,7 +151,7 @@ "\n", "X, y = load_diabetes(return_X_y = True)\n", "columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", "data = {\n", " \"train\":{\"X\": X_train, \"y\": y_train}, \n", " \"test\":{\"X\": X_test, \"y\": y_test}\n", @@ -173,10 +173,10 @@ "outputs": [], "source": [ "reg = Ridge(alpha = 0.03)\n", - "reg.fit(data['train']['X'], data['train']['y'])\n", + "reg.fit(X=data['train']['X'], y=data['train']['y'])\n", "preds = reg.predict(data['test']['X'])\n", - "print('Mean Squared Error is', mean_squared_error(preds, data['test']['y']))\n", - "joblib.dump(value = reg, filename = 'model.pkl');" + "print('Mean Squared Error is', mean_squared_error(data['test']['y'], preds))\n", + "joblib.dump(value=reg, filename='model.pkl');" ] }, { @@ -198,34 +198,35 @@ }, "outputs": [], "source": [ - "experiment = Experiment(workspace = ws, name = experiment_name)\n", + "experiment = Experiment(workspace=ws, name=experiment_name)\n", "run = experiment.start_logging()\n", + "\n", "run.tag(\"Description\",\"My first run!\")\n", "run.log('alpha', 0.03)\n", - "reg = Ridge(alpha = 0.03)\n", + "reg = Ridge(alpha=0.03)\n", "reg.fit(data['train']['X'], data['train']['y'])\n", "preds = reg.predict(data['test']['X'])\n", - "run.log('mse', mean_squared_error(preds, data['test']['y']))\n", - "joblib.dump(value = reg, filename = 'model.pkl')\n", - "run.upload_file(name = 'outputs/model.pkl', path_or_stream = './model.pkl')\n", + "run.log('mse', mean_squared_error(data['test']['y'], preds))\n", + "joblib.dump(value=reg, filename='model.pkl')\n", + "run.upload_file(name='outputs/model.pkl', path_or_stream='./model.pkl')\n", "\n", "run.complete()" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "run" + "We can browse to the recorded run. Please make sure you use Chrome to navigate the run history page." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "We can browse to the recorded run. Please make sure you use Chrome to navigate the run history page." + "run" ] }, { @@ -239,12 +240,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [ - "local run", - "outputs upload" - ] - }, + "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", @@ -253,26 +249,23 @@ "\n", "model_name = \"model.pkl\"\n", "\n", - "# start a training run\n", - "root_run = experiment.start_logging()\n", - "\n", "# list of numbers from 0 to 1.0 with a 0.05 interval\n", "alphas = np.arange(0.0, 1.0, 0.05)\n", "\n", "# try a bunch of alpha values in a Linear Regression (Ridge) model\n", "for alpha in tqdm(alphas):\n", - " # create a bunch of child runs\n", - " with root_run.child_run(\"alpha-\" + str(alpha)) as run:\n", + " # create a bunch of runs, each train a model with a different alpha value\n", + " with experiment.start_logging() as run:\n", " # Use Ridge algorithm to build a regression model\n", " reg = Ridge(alpha=alpha)\n", - " reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n", - " preds = reg.predict(data[\"test\"][\"X\"])\n", - " mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n", + " reg.fit(X=data[\"train\"][\"X\"], y=data[\"train\"][\"y\"])\n", + " preds = reg.predict(X=data[\"test\"][\"X\"])\n", + " mse = mean_squared_error(y_true=data[\"test\"][\"y\"], y_pred=preds)\n", "\n", " # log alpha, mean_squared_error and feature names in run history\n", - " run.log(\"alpha\", alpha)\n", - " run.log(\"mse\", mse)\n", - " run.log_list(\"columns\", columns)\n", + " run.log(name=\"alpha\", value=alpha)\n", + " run.log(name=\"mse\", value=mse)\n", + " run.log_list(name=\"columns\", value=columns)\n", "\n", " with open(model_name, \"wb\") as file:\n", " joblib.dump(value=reg, filename=file)\n", @@ -281,32 +274,7 @@ " run.upload_file(name=\"outputs/\" + model_name, path_or_stream=model_name)\n", "\n", " # now delete the serialized model from local folder since it is already uploaded to run history \n", - " os.remove(model_name)\n", - " \n", - "# Declare run completed\n", - "root_run.complete()\n", - "root_run_id = root_run.id\n", - "print (\"run id:\", root_run.id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now you can reconstruct this run object from captured run id in a different Notebook session." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "query history" - ] - }, - "outputs": [], - "source": [ - "rr = Run(experiment=experiment, run_id=root_run_id)" + " os.remove(path=model_name)" ] }, { @@ -314,21 +282,23 @@ "metadata": {}, "source": [ "## Select best model from the experiment\n", - "Load all child run metrics recursively from the experiment into a dictionary object." + "Load all experiment run metrics recursively from the experiment into a dictionary object." ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "tags": [ - "query history", - "get metrics" - ] - }, + "metadata": {}, "outputs": [], "source": [ - "child_run_metrics = rr.get_metrics(recursive=True)" + "runs = {}\n", + "run_metrics = {}\n", + "\n", + "for r in tqdm(experiment.get_runs()):\n", + " metrics = r.get_metrics()\n", + " if 'mse' in metrics.keys():\n", + " runs[r.id] = r\n", + " run_metrics[r.id] = metrics" ] }, { @@ -344,10 +314,10 @@ "metadata": {}, "outputs": [], "source": [ - "best_run_id = min(child_run_metrics, key = lambda k: child_run_metrics[k]['mse'])\n", - "best_run = Run(experiment=experiment, run_id=best_run_id)\n", + "best_run_id = min(run_metrics, key = lambda k: run_metrics[k]['mse'])\n", + "best_run = runs[best_run_id]\n", "print('Best run is:', best_run_id)\n", - "print('Metrics:', child_run_metrics[best_run_id])" + "print('Metrics:', run_metrics[best_run_id])" ] }, { @@ -390,10 +360,10 @@ "import matplotlib\n", "import matplotlib.pyplot as plt\n", "\n", - "best_alpha = child_run_metrics[best_run_id]['alpha']\n", - "min_mse = child_run_metrics[best_run_id]['mse']\n", + "best_alpha = run_metrics[best_run_id]['alpha']\n", + "min_mse = run_metrics[best_run_id]['mse']\n", "\n", - "alpha_mse = np.array([(child_run_metrics[k]['alpha'], child_run_metrics[k]['mse']) for k in child_run_metrics.keys()])\n", + "alpha_mse = np.array([(run_metrics[k]['alpha'], run_metrics[k]['mse']) for k in run_metrics.keys()])\n", "sorted_alpha_mse = alpha_mse[alpha_mse[:,0].argsort()]\n", "\n", "plt.plot(sorted_alpha_mse[:,0], sorted_alpha_mse[:,1], 'r--')\n", @@ -498,23 +468,22 @@ }, "outputs": [], "source": [ - "model.download(target_dir='.')" + "# remove the model file if it is already on disk\n", + "if os.path.isfile('model.pkl'): \n", + " os.remove('model.pkl')\n", + "# download the model\n", + "model.download(target_dir=\"./\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Create scoring script\n", + "## Scoring script\n", "\n", - "The scoring script consists of two functions: `init` that is used to load the model to memory when starting the container, and `run` that makes the prediction when web service is called." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `%%writefile` cell magic is used to write the scoring function to a local file. Pay special attention to how the model is loaded in the `init()` function. When Docker image is built for this model, the actual model file is downloaded and placed on disk, and `get_model_path` function returns the local path where the model is placed." + "Now we are ready to build a Docker image and deploy the model in it as a web service. The first step is creating the scoring script. For convenience, we have created the scoring script for you. It is printed below as text, but you can also run `%pfile ./score.py` in a cell to show the file.\n", + "\n", + "Tbe scoring script consists of two functions: `init` that is used to load the model to memory when starting the container, and `run` that makes the prediction when web service is called. Please pay special attention to how the model is loaded in the `init()` function. When Docker image is built for this model, the actual model file is downloaded and placed on disk, and `get_model_path` function returns the local path where the model is placed." ] }, { @@ -523,43 +492,17 @@ "metadata": {}, "outputs": [], "source": [ - "%%writefile score.py\n", - "import pickle\n", - "import json\n", - "import numpy as np\n", - "from sklearn.externals import joblib\n", - "from sklearn.linear_model import Ridge\n", - "from azureml.core.model import Model\n", - "\n", - "\n", - "def init():\n", - " global model\n", - " # note here \"best_model\" is the name of the model registered under the workspace\n", - " # this call should return the path to the model.pkl file on the local disk.\n", - " model_path = Model.get_model_path(model_name='best_model')\n", - " # deserialize the model file back into a sklearn model\n", - " model = joblib.load(model_path)\n", - "\n", - " \n", - "# note you can pass in multiple rows for scoring\n", - "def run(raw_data):\n", - " try:\n", - " data = json.loads(raw_data)['data']\n", - " data = np.array(data)\n", - " result = model.predict(data)\n", - " return json.dumps({\"result\": result.tolist()})\n", - " except Exception as e:\n", - " result = str(e)\n", - " return json.dumps({\"error\": result})\n" + "with open('./score.py', 'r') as scoring_script:\n", + " print(scoring_script.read())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Create conda dependency file\n", + "## Create environment dependency file\n", "\n", - "This `myenv.yml` file is used to specify which library dependencies to install on the web service. Note that the CondaDependencies API automatically adds necessary Azure ML dependencies." + "We need a environment dependency file `myenv.yml` to specify which libraries are needed by the scoring script when building the Docker image for web service deployment. We can manually create this file, or we can use the `CondaDependencies` API to automatically create this file." ] }, { @@ -572,27 +515,12 @@ "\n", "myenv = CondaDependencies()\n", "myenv.add_conda_package(\"scikit-learn\")\n", + "print(myenv.serialize_to_string())\n", "\n", "with open(\"myenv.yml\",\"w\") as f:\n", " f.write(myenv.serialize_to_string())" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "View the `myenv.yml` file written." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pfile myenv.yml" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -838,6 +766,13 @@ "%%time\n", "service.delete()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/01.getting-started/01.train-within-notebook/score.py b/01.getting-started/01.train-within-notebook/score.py new file mode 100644 index 000000000..166615947 --- /dev/null +++ b/01.getting-started/01.train-within-notebook/score.py @@ -0,0 +1,27 @@ +import pickle +import json +import numpy as np +from sklearn.externals import joblib +from sklearn.linear_model import Ridge +from azureml.core.model import Model + + +def init(): + global model + # note here "best_model" is the name of the model registered under the workspace + # this call should return the path to the model.pkl file on the local disk. + model_path = Model.get_model_path(model_name='best_model') + # deserialize the model file back into a sklearn model + model = joblib.load(model_path) + + +# note you can pass in multiple rows for scoring +def run(raw_data): + try: + data = json.loads(raw_data)['data'] + data = np.array(data) + result = model.predict(data) + return json.dumps({"result": result.tolist()}) + except Exception as e: + result = str(e) + return json.dumps({"error": result}) diff --git a/00.Getting Started/02.train-on-local/02.train-on-local.ipynb b/01.getting-started/02.train-on-local/02.train-on-local.ipynb similarity index 84% rename from 00.Getting Started/02.train-on-local/02.train-on-local.ipynb rename to 01.getting-started/02.train-on-local/02.train-on-local.ipynb index e3ce4f9c0..4c2cea8e6 100644 --- a/00.Getting Started/02.train-on-local/02.train-on-local.ipynb +++ b/01.getting-started/02.train-on-local/02.train-on-local.ipynb @@ -311,6 +311,74 @@ "run" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Docker-based execution\n", + "**NOTE** You must have Docker engine installed locally in order to use this execution mode. You can also ask the system to pull down a Docker image and execute your scripts in it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "run_config_docker = RunConfiguration()\n", + "\n", + "run_config_docker.environment.python.user_managed_dependencies = False\n", + "run_config_docker.prepare_environment = True\n", + "run_config_docker.environment.docker.enabled = True\n", + "run_config_docker.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", + "\n", + "# Specify conda dependencies with scikit-learn\n", + "cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n", + "run_config_docker.environment.python.conda_dependencies = cd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Submit script to run in the system-managed environment\n", + "A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_docker)\n", + "run = exp.submit(src)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Get run history details\n", + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -408,7 +476,6 @@ } ], "metadata": { - "celltoolbar": "Edit Metadata", "kernelspec": { "display_name": "Python [default]", "language": "python", diff --git a/00.Getting Started/02.train-on-local/train.py b/01.getting-started/02.train-on-local/train.py similarity index 100% rename from 00.Getting Started/02.train-on-local/train.py rename to 01.getting-started/02.train-on-local/train.py diff --git a/01.getting-started/03.train-on-aci/03.train-on-aci.ipynb b/01.getting-started/03.train-on-aci/03.train-on-aci.ipynb new file mode 100644 index 000000000..4039b0b23 --- /dev/null +++ b/01.getting-started/03.train-on-aci/03.train-on-aci.ipynb @@ -0,0 +1,325 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 03. Train on Azure Container Instance (EXPERIMENTAL)\n", + "\n", + "* Create Workspace\n", + "* Create Project\n", + "* Create `train.py` in the project folder.\n", + "* Configure an ACI (Azure Container Instance) run\n", + "* Execute in ACI" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create An Experiment\n", + "\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "experiment_name = 'train-on-aci'\n", + "experiment = Experiment(workspace = ws, name = experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a folder to store the training script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "script_folder = './samples/train-on-aci'\n", + "os.makedirs(script_folder, exist_ok = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Remote execution on ACI\n", + "\n", + "Use `%%writefile` magic to write training code to `train.py` file under the project folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $script_folder/train.py\n", + "\n", + "import os\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "from azureml.core.run import Run\n", + "from sklearn.externals import joblib\n", + "\n", + "import numpy as np\n", + "\n", + "os.makedirs('./outputs', exist_ok=True)\n", + "\n", + "X, y = load_diabetes(return_X_y = True)\n", + "\n", + "run = Run.get_submitted_run()\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n", + "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n", + " \"test\": {\"X\": X_test, \"y\": y_test}}\n", + "\n", + "# list of numbers from 0.0 to 1.0 with a 0.05 interval\n", + "alphas = np.arange(0.0, 1.0, 0.05)\n", + "\n", + "for alpha in alphas:\n", + " # Use Ridge algorithm to create a regression model\n", + " reg = Ridge(alpha = alpha)\n", + " reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n", + "\n", + " preds = reg.predict(data[\"test\"][\"X\"])\n", + " mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n", + " run.log('alpha', alpha)\n", + " run.log('mse', mse)\n", + " \n", + " model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n", + " with open(model_file_name, \"wb\") as file:\n", + " joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n", + "\n", + " print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure for using ACI\n", + "Linux-based ACI is available in `westus`, `eastus`, `westeurope`, `northeurope`, `westus2` and `southeastasia` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "# create a new runconfig object\n", + "run_config = RunConfiguration()\n", + "\n", + "# signal that you want to use ACI to execute script.\n", + "run_config.target = \"containerinstance\"\n", + "\n", + "# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n", + "run_config.container_instance.region = 'eastus'\n", + "\n", + "# set the ACI CPU and Memory \n", + "run_config.container_instance.cpu_cores = 1\n", + "run_config.container_instance.memory_gb = 2\n", + "\n", + "# enable Docker \n", + "run_config.environment.docker.enabled = True\n", + "\n", + "# set Docker base image to the default CPU-based image\n", + "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", + "#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n", + "\n", + "# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n", + "run_config.environment.python.user_managed_dependencies = False\n", + "\n", + "# auto-prepare the Docker image when used for execution (if it is not already prepared)\n", + "run_config.auto_prepare_environment = True\n", + "\n", + "# specify CondaDependencies obj\n", + "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit the Experiment\n", + "Finally, run the training job on the ACI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "aci" + ] + }, + "outputs": [], + "source": [ + "%%time \n", + "from azureml.core.script_run_config import ScriptRunConfig\n", + "\n", + "script_run_config = ScriptRunConfig(source_directory = script_folder,\n", + " script= 'train.py',\n", + " run_config = run_config)\n", + "\n", + "run = experiment.submit(script_run_config)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "aci" + ] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Shows output of the run on stdout.\n", + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "# Show run details\n", + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "get metrics" + ] + }, + "outputs": [], + "source": [ + "# get all metris logged in the run\n", + "run.get_metrics()\n", + "metrics = run.get_metrics()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n", + " min(metrics['mse']), \n", + " metrics['alpha'][np.argmin(metrics['mse'])]\n", + "))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb b/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb similarity index 100% rename from 00.Getting Started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb rename to 01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb diff --git a/00.Getting Started/04.train-on-remote-vm/train.py b/01.getting-started/04.train-on-remote-vm/train.py similarity index 100% rename from 00.Getting Started/04.train-on-remote-vm/train.py rename to 01.getting-started/04.train-on-remote-vm/train.py diff --git a/00.Getting Started/05.train-in-spark/05.train-in-spark.ipynb b/01.getting-started/05.train-in-spark/05.train-in-spark.ipynb similarity index 100% rename from 00.Getting Started/05.train-in-spark/05.train-in-spark.ipynb rename to 01.getting-started/05.train-in-spark/05.train-in-spark.ipynb diff --git a/00.Getting Started/05.train-in-spark/iris.csv b/01.getting-started/05.train-in-spark/iris.csv similarity index 100% rename from 00.Getting Started/05.train-in-spark/iris.csv rename to 01.getting-started/05.train-in-spark/iris.csv diff --git a/00.Getting Started/05.train-in-spark/train-spark.py b/01.getting-started/05.train-in-spark/train-spark.py similarity index 100% rename from 00.Getting Started/05.train-in-spark/train-spark.py rename to 01.getting-started/05.train-in-spark/train-spark.py diff --git a/01.getting-started/07.hyperdrive-with-sklearn/.ipynb_checkpoints/07.hyperdrive-with-sklearn-checkpoint.ipynb b/01.getting-started/07.hyperdrive-with-sklearn/.ipynb_checkpoints/07.hyperdrive-with-sklearn-checkpoint.ipynb new file mode 100644 index 000000000..beec8dc59 --- /dev/null +++ b/01.getting-started/07.hyperdrive-with-sklearn/.ipynb_checkpoints/07.hyperdrive-with-sklearn-checkpoint.ipynb @@ -0,0 +1,4031 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 07. HyperDrive with scikit-learn\n", + "- Create Batch AI cluster\n", + "- Train on a single node\n", + "- Set up Hyperdrive\n", + "- Parameter sweep with Hyperdrive on Batch AI cluster\n", + "- Monitor parameter sweep runs with run history widget\n", + "- Find best model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create An Experiment\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "experiment_name = 'hyperdrive-with-sklearn'\n", + "experiment = Experiment(workspace = ws, name = experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a folder to store the training script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "script_folder = './samples/hyperdrive-with-sklearn'\n", + "os.makedirs(script_folder, exist_ok = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Provision New Cluster\n", + "Create a new Batch AI cluster using the following Python code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create mlc", + "batchai" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "# choose a name for your cluster\n", + "batchai_cluster_name = ws.name + \"cpu\"\n", + "\n", + "found = False\n", + "# see if this compute target already exists in the workspace\n", + "for ct in ws.compute_targets():\n", + " print(ct.name, ct.type)\n", + " if (ct.name == batchai_cluster_name and ct.type == 'BatchAI'):\n", + " found = True\n", + " print('found compute target. just use it.')\n", + " compute_target = ct\n", + " break\n", + " \n", + "if not found:\n", + " print('creating a new compute target...')\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n", + " #vm_priority = 'lowpriority', # optional\n", + " autoscale_enabled = True,\n", + " cluster_min_nodes = 1, \n", + " cluster_max_nodes = 4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws,batchai_cluster_name, provisioning_config)\n", + " \n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + " \n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + " print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ridge Regression with scikit-learn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from shutil import copyfile\n", + "# copy the diabetes_sklearn.py file to the project folder\n", + "copyfile('./diabetes_sklearn.py', os.path.join(script_folder, 'diabetes_sklearn.py'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# review the diabetes_sklearn.py file if you'd like\n", + "with open(os.path.join(script_folder, 'diabetes_sklearn.py'), 'r') as fin:\n", + " print (fin.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create an estimator for the sklearn script\n", + "You can use an estimator pattern to run the script. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.estimator import Estimator\n", + "script_params = {\n", + " '--alpha': 0.1\n", + "}\n", + "\n", + "sk_est = Estimator(source_directory = script_folder,\n", + " script_params = script_params,\n", + " compute_target = compute_target,\n", + " entry_script = 'diabetes_sklearn.py',\n", + " conda_packages = ['scikit-learn'])\n", + " #custom_docker_base_image = 'ninghai/azureml:0.3') # use a custom image here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "batchai" + ] + }, + "outputs": [], + "source": [ + "# start the job\n", + "from azureml.core.experiment import Experiment\n", + "\n", + "run = experiment.submit(sk_est)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### View run details\n", + "**IMPORTANT**: please use Chrome to navigate the below URL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "batchai" + ] + }, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "use notebook widget" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also check the Batch AI cluster and job status using az-cli commands:\n", + "\n", + "```shell\n", + "# check cluster status. You can see how many nodes are running.\n", + "$ az batchai cluster list\n", + "\n", + "# check job status. You can see how many jobs are running\n", + "$ az batchai job list\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Now Try a Hyperdrive run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.hyperdrive import *\n", + "\n", + "# parameter space to sweep over\n", + "ps = RandomParameterSampling(\n", + " {\n", + " \"alpha\": uniform(0.0, 1.0)\n", + " }\n", + ")\n", + "\n", + "# early termniation policy\n", + "# check every 2 iterations and if the primary metric (epoch_val_acc) falls\n", + "# outside of the range of 10% of the best recorded run so far, terminate it.\n", + "etp = BanditPolicy(slack_factor = 0.1, evaluation_interval = 2)\n", + "\n", + "# Hyperdrive run configuration\n", + "hrc = HyperDriveRunConfig(\n", + " estimator = sk_est,\n", + " hyperparameter_sampling = ps,\n", + " policy = etp,\n", + " # metric to watch (for early termination)\n", + " primary_metric_name = 'mse',\n", + " # terminate if metric falls below threshold\n", + " primary_metric_goal = PrimaryMetricGoal.MINIMIZE,\n", + " max_total_runs = 20,\n", + " max_concurrent_runs = 4,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hyperdrive run", + "batchai" + ] + }, + "outputs": [], + "source": [ + "# Start Hyperdrive run\n", + "\n", + "hr = experiment.submit(hrc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use a widget to show runs\n", + "Runs will automatically start to show in the following widget once rendered. You can keep the Notebook open and watch them \"grow\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "use notebook widget" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(hr).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note**: This is a sample image with 200 runs. Your result might look different.\n", + "![img](../images/hyperdrive-sklearn.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# check cluster status, pay attention to the # of running nodes\n", + "# !az batchai cluster list -o table\n", + "\n", + "# check the Batch AI job queue. Notice the Job name is the run history Id. Pay attention to the State of the job.\n", + "# !az batchai job list -o table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find best run\n", + "Wait until all Hyperdrive runs finish before running the below cells." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "hr.get_status()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history", + "get metrics" + ] + }, + "outputs": [], + "source": [ + "from tqdm import tqdm\n", + "\n", + "runs = {}\n", + "\n", + "for r in tqdm(hr.get_children()):\n", + " metrics = r.get_metrics()\n", + " if ('mse' in metrics.keys()):\n", + " runs[r.id] = metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "best_run_id = min(runs, key = lambda k: runs[k]['mse'])\n", + "best_run = runs[best_run_id]\n", + "print('Best Run: alpha = {0:.4f}, MSE = {1:.4f}'.format(best_run['alpha'], best_run['mse']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot the best run [Optional] \n", + "Note you will need to install `matplotlib` for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get metrics of alpha and mse for all runs\n", + "metrics = np.array([[runs[r]['alpha'], runs[r]['mse']] for r in runs])\n", + "\n", + "# sort the metrics by alpha values\n", + "metrics = np.array(sorted(metrics, key = lambda m: m[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.title('MSE over alpha', fontsize = 16)\n", + "\n", + "plt.plot(metrics[:,0], metrics[:,1], 'r--')\n", + "plt.plot(metrics[:,0], metrics[:,1], 'bo')\n", + "\n", + "plt.xlabel('alpha', fontsize = 14)\n", + "plt.ylabel('mean squared error', fontsize = 14)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "7cf278f65a36435fb03137ca56bcd263": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "83e0767b6c3a41a2833d0f8fcf690c72": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "aa3181a75ca34d729b0dce89e779ec0e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_train_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowHyperDriveRunsView", + "layout": "IPY_MODEL_7cf278f65a36435fb03137ca56bcd263", + "value": [ + { + "run_id": "hyperdrive-sklearn-diabetes_1526126138942", + "status": "Running", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-e2e-rg%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2euapws/projects/hyperdrive-sklearn-diabetes/run-history/run-details/hyperdrive-sklearn-diabetes_1526126138942?type=HyperDrive" + }, + [ + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.411237364035508", + "created_time": "2018-05-12 12:12:51.261530+00:00", + "created_time_dt": "2018-05-12T12:12:51.261530", + "duration": "0:00:12", + "end_time": "2018-05-12 12:13:03.803382+00:00", + "hyperdrive_id": "8382", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8382_0beb029b", + "metric": 3295.672, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.409690853942327", + "created_time": "2018-05-12 13:13:23.950880+00:00", + "created_time_dt": "2018-05-12T13:13:23.950880", + "duration": "0:00:28", + "end_time": "2018-05-12 13:13:52.707230+00:00", + "hyperdrive_id": "8479", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8479_5832d5b1", + "metric": 3295.6743, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.417026424561565", + "created_time": "2018-05-12 12:08:27.465571+00:00", + "created_time_dt": "2018-05-12T12:08:27.465571", + "duration": "0:00:12", + "end_time": "2018-05-12 12:08:39.848811+00:00", + "hyperdrive_id": "8370", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8370_c9260eec", + "metric": 3295.6834, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.417442688875759", + "created_time": "2018-05-12 13:03:26.849626+00:00", + "created_time_dt": "2018-05-12T13:03:26.849626", + "duration": "0:00:14", + "end_time": "2018-05-12 13:03:41.499999+00:00", + "hyperdrive_id": "8468", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8468_5e92ff25", + "metric": 3295.6854, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.40612547022086", + "created_time": "2018-05-12 12:40:16.219170+00:00", + "created_time_dt": "2018-05-12T12:40:16.219170", + "duration": "0:00:12", + "end_time": "2018-05-12 12:40:29.063299+00:00", + "hyperdrive_id": "8436", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8436_8d7667d7", + "metric": 3295.6882, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.404098690541524", + "created_time": "2018-05-12 12:28:12.532204+00:00", + "created_time_dt": "2018-05-12T12:28:12.532204", + "duration": "0:00:12", + "end_time": "2018-05-12 12:28:25.123133+00:00", + "hyperdrive_id": "8415", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8415_cd663398", + "metric": 3295.7016, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.422501486914154", + "created_time": "2018-05-12 12:06:04.978496+00:00", + "created_time_dt": "2018-05-12T12:06:04.978496", + "duration": "0:00:13", + "end_time": "2018-05-12 12:06:18.355669+00:00", + "hyperdrive_id": "8363", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8363_b1db4981", + "metric": 3295.7227, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.423906941816355", + "created_time": "2018-05-12 12:27:06.723050+00:00", + "created_time_dt": "2018-05-12T12:27:06.723050", + "duration": "0:00:14", + "end_time": "2018-05-12 12:27:20.746252+00:00", + "hyperdrive_id": "8414", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8414_8f74b802", + "metric": 3295.7372, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.431026531225767", + "created_time": "2018-05-12 12:25:04.908855+00:00", + "created_time_dt": "2018-05-12T12:25:04.908855", + "duration": "0:00:14", + "end_time": "2018-05-12 12:25:19.602114+00:00", + "hyperdrive_id": "8409", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8409_5345e6b2", + "metric": 3295.8375, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.393032827195967", + "created_time": "2018-05-12 12:07:13.020312+00:00", + "created_time_dt": "2018-05-12T12:07:13.020312", + "duration": "0:00:34", + "end_time": "2018-05-12 12:07:47.944409+00:00", + "hyperdrive_id": "8367", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8367_c39107f9", + "metric": 3295.8465, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.436567624426189", + "created_time": "2018-05-12 13:04:49.897871+00:00", + "created_time_dt": "2018-05-12T13:04:49.897871", + "duration": "0:00:14", + "end_time": "2018-05-12 13:05:04.491673+00:00", + "hyperdrive_id": "8470", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8470_a73b2d7b", + "metric": 3295.9462, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.383629052679639", + "created_time": "2018-05-12 12:12:37.531581+00:00", + "created_time_dt": "2018-05-12T12:12:37.531581", + "duration": "0:00:12", + "end_time": "2018-05-12 12:12:50.210199+00:00", + "hyperdrive_id": "8381", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8381_b638e983", + "metric": 3296.0679, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.382111348518354", + "created_time": "2018-05-12 11:55:54.563179+00:00", + "created_time_dt": "2018-05-12T11:55:54.563179", + "duration": "0:00:13", + "end_time": "2018-05-12 11:56:07.888796+00:00", + "hyperdrive_id": "8327", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8327_a045606f", + "metric": 3296.1124, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.381187002593777", + "created_time": "2018-05-12 12:09:12.703228+00:00", + "created_time_dt": "2018-05-12T12:09:12.703228", + "duration": "0:00:17", + "end_time": "2018-05-12 12:09:29.741640+00:00", + "hyperdrive_id": "8373", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8373_053f25c6", + "metric": 3296.1406, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.379109970937149", + "created_time": "2018-05-12 13:02:00.253981+00:00", + "created_time_dt": "2018-05-12T13:02:00.253981", + "duration": "0:00:12", + "end_time": "2018-05-12 13:02:12.909525+00:00", + "hyperdrive_id": "8466", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8466_a379787d", + "metric": 3296.2076, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.448204115660274", + "created_time": "2018-05-12 13:44:11.787530+00:00", + "created_time_dt": "2018-05-12T13:44:11.787530", + "duration": "0:00:13", + "end_time": "2018-05-12 13:44:25.111437+00:00", + "hyperdrive_id": "8514", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8514_3f6ef25a", + "metric": 3296.2587, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.448713062576673", + "created_time": "2018-05-12 12:49:55.612577+00:00", + "created_time_dt": "2018-05-12T12:49:55.612577", + "duration": "0:00:30", + "end_time": "2018-05-12 12:50:26.163813+00:00", + "hyperdrive_id": "8449", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8449_d0363c5b", + "metric": 3296.275, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.451812447966424", + "created_time": "2018-05-12 13:46:02.887675+00:00", + "created_time_dt": "2018-05-12T13:46:02.887675", + "duration": "0:00:13", + "end_time": "2018-05-12 13:46:15.984786+00:00", + "hyperdrive_id": "8515", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8515_131388fa", + "metric": 3296.3782, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.454660892639694", + "created_time": "2018-05-12 12:08:27.411796+00:00", + "created_time_dt": "2018-05-12T12:08:27.411796", + "duration": "0:00:45", + "end_time": "2018-05-12 12:09:12.989012+00:00", + "hyperdrive_id": "8372", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8372_94748f49", + "metric": 3296.4798, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.45772812795942", + "created_time": "2018-05-12 13:15:05.575428+00:00", + "created_time_dt": "2018-05-12T13:15:05.575428", + "duration": "0:00:28", + "end_time": "2018-05-12 13:15:34.181749+00:00", + "hyperdrive_id": "8481", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8481_7b4d7aae", + "metric": 3296.5964, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.459845797045424", + "created_time": "2018-05-12 12:14:11.140060+00:00", + "created_time_dt": "2018-05-12T12:14:11.140060", + "duration": "0:00:13", + "end_time": "2018-05-12 12:14:24.996486+00:00", + "hyperdrive_id": "8386", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8386_f396881b", + "metric": 3296.6811, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.366571747107722", + "created_time": "2018-05-12 12:08:27.253377+00:00", + "created_time_dt": "2018-05-12T12:08:27.253377", + "duration": "0:00:29", + "end_time": "2018-05-12 12:08:56.307309+00:00", + "hyperdrive_id": "8371", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8371_cd392eb6", + "metric": 3296.7127, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.462046659150171", + "created_time": "2018-05-12 12:42:50.088268+00:00", + "created_time_dt": "2018-05-12T12:42:50.088268", + "duration": "0:00:29", + "end_time": "2018-05-12 12:43:19.725900+00:00", + "hyperdrive_id": "8440", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8440_2809b87a", + "metric": 3296.7729, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.466061327723322", + "created_time": "2018-05-12 13:32:48.214641+00:00", + "created_time_dt": "2018-05-12T13:32:48.214641", + "duration": "0:00:33", + "end_time": "2018-05-12 13:33:21.400760+00:00", + "hyperdrive_id": "8502", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8502_f26e1d7a", + "metric": 3296.9498, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.360764179714059", + "created_time": "2018-05-12 12:52:59.773606+00:00", + "created_time_dt": "2018-05-12T12:52:59.773606", + "duration": "0:00:29", + "end_time": "2018-05-12 12:53:29.269383+00:00", + "hyperdrive_id": "8454", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8454_d048fd67", + "metric": 3297.0072, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.357261239177847", + "created_time": "2018-05-12 12:00:06.195143+00:00", + "created_time_dt": "2018-05-12T12:00:06.195143", + "duration": "0:00:15", + "end_time": "2018-05-12 12:00:21.894647+00:00", + "hyperdrive_id": "8343", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8343_4b4ee27d", + "metric": 3297.2039, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.357032645286086", + "created_time": "2018-05-12 12:29:17.822255+00:00", + "created_time_dt": "2018-05-12T12:29:17.822255", + "duration": "0:00:13", + "end_time": "2018-05-12 12:29:31.531549+00:00", + "hyperdrive_id": "8418", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8418_17853174", + "metric": 3297.2173, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.478523449448336", + "created_time": "2018-05-12 12:11:12.128813+00:00", + "created_time_dt": "2018-05-12T12:11:12.128813", + "duration": "0:00:12", + "end_time": "2018-05-12 12:11:24.470896+00:00", + "hyperdrive_id": "8378", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8378_137b1616", + "metric": 3297.5751, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.478800785224639", + "created_time": "2018-05-12 13:48:14.367164+00:00", + "created_time_dt": "2018-05-12T13:48:14.367164", + "duration": "0:00:32", + "end_time": "2018-05-12 13:48:46.834752+00:00", + "hyperdrive_id": "8518", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8518_729c4598", + "metric": 3297.5903, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.344791270146422", + "created_time": "2018-05-12 13:00:23.018949+00:00", + "created_time_dt": "2018-05-12T13:00:23.018949", + "duration": "0:00:31", + "end_time": "2018-05-12 13:00:54.639071+00:00", + "hyperdrive_id": "8464", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8464_7857e9c2", + "metric": 3298.0249, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.338103667119868", + "created_time": "2018-05-12 13:21:55.155823+00:00", + "created_time_dt": "2018-05-12T13:21:55.155823", + "duration": "0:00:30", + "end_time": "2018-05-12 13:22:26.141154+00:00", + "hyperdrive_id": "8489", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8489_b7ffca04", + "metric": 3298.5454, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.332097037995421", + "created_time": "2018-05-12 13:42:09.096805+00:00", + "created_time_dt": "2018-05-12T13:42:09.096805", + "duration": "0:00:18", + "end_time": "2018-05-12 13:42:27.483311+00:00", + "hyperdrive_id": "8511", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8511_ec3cc7c9", + "metric": 3299.0623, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.331939228048221", + "created_time": "2018-05-12 12:45:34.049683+00:00", + "created_time_dt": "2018-05-12T12:45:34.049683", + "duration": "0:00:14", + "end_time": "2018-05-12 12:45:48.102359+00:00", + "hyperdrive_id": "8444", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8444_46d2f35f", + "metric": 3299.0766, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.505093423597719", + "created_time": "2018-05-12 13:18:18.551188+00:00", + "created_time_dt": "2018-05-12T13:18:18.551188", + "duration": "0:00:13", + "end_time": "2018-05-12 13:18:31.915661+00:00", + "hyperdrive_id": "8486", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8486_c437b7f7", + "metric": 3299.2713, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.506815861512464", + "created_time": "2018-05-12 12:27:06.875070+00:00", + "created_time_dt": "2018-05-12T12:27:06.875070", + "duration": "0:00:12", + "end_time": "2018-05-12 12:27:19.538922+00:00", + "hyperdrive_id": "8413", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8413_a969f346", + "metric": 3299.3974, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.509599014162797", + "created_time": "2018-05-12 12:02:10.749542+00:00", + "created_time_dt": "2018-05-12T12:02:10.749542", + "duration": "0:00:22", + "end_time": "2018-05-12 12:02:32.904491+00:00", + "hyperdrive_id": "8351", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8351_08acc5b3", + "metric": 3299.605, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.511754766725497", + "created_time": "2018-05-12 12:31:33.630006+00:00", + "created_time_dt": "2018-05-12T12:31:33.630006", + "duration": "0:00:13", + "end_time": "2018-05-12 12:31:46.932013+00:00", + "hyperdrive_id": "8421", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8421_d66288a6", + "metric": 3299.7693, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.517357778454532", + "created_time": "2018-05-12 13:32:47.999414+00:00", + "created_time_dt": "2018-05-12T13:32:47.999414", + "duration": "0:00:16", + "end_time": "2018-05-12 13:33:04.221248+00:00", + "hyperdrive_id": "8501", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8501_f6988ef5", + "metric": 3300.2095, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.319331797643165", + "created_time": "2018-05-12 13:23:42.824307+00:00", + "created_time_dt": "2018-05-12T13:23:42.824307", + "duration": "0:00:14", + "end_time": "2018-05-12 13:23:56.961000+00:00", + "hyperdrive_id": "8491", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8491_f97ca369", + "metric": 3300.3225, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.319074585293429", + "created_time": "2018-05-12 13:32:34.003659+00:00", + "created_time_dt": "2018-05-12T13:32:34.003659", + "duration": "0:00:14", + "end_time": "2018-05-12 13:32:48.094689+00:00", + "hyperdrive_id": "8500", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8500_d491060c", + "metric": 3300.3502, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.318218570599629", + "created_time": "2018-05-12 12:05:05.927100+00:00", + "created_time_dt": "2018-05-12T12:05:05.927100", + "duration": "0:00:18", + "end_time": "2018-05-12 12:05:23.961623+00:00", + "hyperdrive_id": "8360", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8360_8d3ad717", + "metric": 3300.4432, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.310459620980281", + "created_time": "2018-05-12 13:01:59.639205+00:00", + "created_time_dt": "2018-05-12T13:01:59.639205", + "duration": "0:00:30", + "end_time": "2018-05-12 13:02:29.644690+00:00", + "hyperdrive_id": "8465", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8465_a642fe78", + "metric": 3301.3332, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.308825721895932", + "created_time": "2018-05-12 12:26:00.849703+00:00", + "created_time_dt": "2018-05-12T12:26:00.849703", + "duration": "0:00:14", + "end_time": "2018-05-12 12:26:14.900534+00:00", + "hyperdrive_id": "8411", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8411_59c695f4", + "metric": 3301.5318, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.306982685343673", + "created_time": "2018-05-12 13:25:29.647713+00:00", + "created_time_dt": "2018-05-12T13:25:29.647713", + "duration": "0:00:12", + "end_time": "2018-05-12 13:25:42.574230+00:00", + "hyperdrive_id": "8494", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8494_5b797969", + "metric": 3301.7606, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.306861470957928", + "created_time": "2018-05-12 12:04:02.669871+00:00", + "created_time_dt": "2018-05-12T12:04:02.669871", + "duration": "0:00:11", + "end_time": "2018-05-12 12:04:14.471005+00:00", + "hyperdrive_id": "8357", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8357_d468e6ae", + "metric": 3301.7758, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.539137316483953", + "created_time": "2018-05-12 13:15:05.900228+00:00", + "created_time_dt": "2018-05-12T13:15:05.900228", + "duration": "0:00:43", + "end_time": "2018-05-12 13:15:49.159519+00:00", + "hyperdrive_id": "8482", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8482_d29d0b57", + "metric": 3302.0981, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.54004647672626", + "created_time": "2018-05-12 12:32:49.211373+00:00", + "created_time_dt": "2018-05-12T12:32:49.211373", + "duration": "0:00:14", + "end_time": "2018-05-12 12:33:03.460382+00:00", + "hyperdrive_id": "8423", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8423_88f12348", + "metric": 3302.1828, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.543570780620328", + "created_time": "2018-05-12 12:41:33.851103+00:00", + "created_time_dt": "2018-05-12T12:41:33.851103", + "duration": "0:00:13", + "end_time": "2018-05-12 12:41:47.611145+00:00", + "hyperdrive_id": "8438", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8438_9402e974", + "metric": 3302.5156, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.289360077382501", + "created_time": "2018-05-12 12:06:44.051674+00:00", + "created_time_dt": "2018-05-12T12:06:44.051674", + "duration": "0:00:30", + "end_time": "2018-05-12 12:07:14.422703+00:00", + "hyperdrive_id": "8365", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8365_1677e84b", + "metric": 3304.2101, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.568847163331273", + "created_time": "2018-05-12 13:29:05.740663+00:00", + "created_time_dt": "2018-05-12T13:29:05.740663", + "duration": "0:00:44", + "end_time": "2018-05-12 13:29:49.857716+00:00", + "hyperdrive_id": "8497", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8497_7c3c2de2", + "metric": 3305.0952, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.283304106660311", + "created_time": "2018-05-12 12:17:26.855111+00:00", + "created_time_dt": "2018-05-12T12:17:26.855111", + "duration": "0:00:13", + "end_time": "2018-05-12 12:17:40.339685+00:00", + "hyperdrive_id": "8394", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8394_7d55e673", + "metric": 3305.1657, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.569536449260573", + "created_time": "2018-05-12 12:02:44.927994+00:00", + "created_time_dt": "2018-05-12T12:02:44.927994", + "duration": "0:00:21", + "end_time": "2018-05-12 12:03:06.734426+00:00", + "hyperdrive_id": "8352", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8352_3e2d86ef", + "metric": 3305.1701, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.571288125093237", + "created_time": "2018-05-12 12:23:04.038356+00:00", + "created_time_dt": "2018-05-12T12:23:04.038356", + "duration": "0:00:13", + "end_time": "2018-05-12 12:23:17.679064+00:00", + "hyperdrive_id": "8405", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8405_39ff6a35", + "metric": 3305.3615, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.571422319269393", + "created_time": "2018-05-12 13:29:04.794872+00:00", + "created_time_dt": "2018-05-12T13:29:04.794872", + "duration": "0:00:14", + "end_time": "2018-05-12 13:29:19.297193+00:00", + "hyperdrive_id": "8498", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8498_d7b81242", + "metric": 3305.3762, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.277607345545865", + "created_time": "2018-05-12 12:01:28.481497+00:00", + "created_time_dt": "2018-05-12T12:01:28.481497", + "duration": "0:00:14", + "end_time": "2018-05-12 12:01:42.740559+00:00", + "hyperdrive_id": "8348", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8348_5aef9466", + "metric": 3306.1203, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.275710729664475", + "created_time": "2018-05-12 12:23:03.837019+00:00", + "created_time_dt": "2018-05-12T12:23:03.837019", + "duration": "0:00:13", + "end_time": "2018-05-12 12:23:17.610482+00:00", + "hyperdrive_id": "8406", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8406_223e6751", + "metric": 3306.4502, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.581090311656388", + "created_time": "2018-05-12 13:00:22.858936+00:00", + "created_time_dt": "2018-05-12T13:00:22.858936", + "duration": "0:00:14", + "end_time": "2018-05-12 13:00:37.036926+00:00", + "hyperdrive_id": "8463", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8463_6eabc2c8", + "metric": 3306.4598, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.584829683488196", + "created_time": "2018-05-12 12:16:37.017705+00:00", + "created_time_dt": "2018-05-12T12:16:37.017705", + "duration": "0:00:12", + "end_time": "2018-05-12 12:16:49.387251+00:00", + "hyperdrive_id": "8392", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8392_a2a09d84", + "metric": 3306.8908, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.586916241458208", + "created_time": "2018-05-12 12:55:57.923024+00:00", + "created_time_dt": "2018-05-12T12:55:57.923024", + "duration": "0:00:13", + "end_time": "2018-05-12 12:56:11.355696+00:00", + "hyperdrive_id": "8457", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8457_a77f0d12", + "metric": 3307.134, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.590137507820594", + "created_time": "2018-05-12 12:04:42.201135+00:00", + "created_time_dt": "2018-05-12T12:04:42.201135", + "duration": "0:00:26", + "end_time": "2018-05-12 12:05:08.306074+00:00", + "hyperdrive_id": "8358", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8358_dbe58158", + "metric": 3307.5135, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.594865526074826", + "created_time": "2018-05-12 12:29:17.694994+00:00", + "created_time_dt": "2018-05-12T12:29:17.694994", + "duration": "0:00:12", + "end_time": "2018-05-12 12:29:30.169420+00:00", + "hyperdrive_id": "8417", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8417_badf4e15", + "metric": 3308.079, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.597001755207444", + "created_time": "2018-05-12 12:32:49.199564+00:00", + "created_time_dt": "2018-05-12T12:32:49.199564", + "duration": "0:00:13", + "end_time": "2018-05-12 12:33:03.126949+00:00", + "hyperdrive_id": "8424", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8424_54ed03ea", + "metric": 3308.3377, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.598431037514433", + "created_time": "2018-05-12 12:37:43.626336+00:00", + "created_time_dt": "2018-05-12T12:37:43.626336", + "duration": "0:00:31", + "end_time": "2018-05-12 12:38:15.527333+00:00", + "hyperdrive_id": "8432", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8432_46629b39", + "metric": 3308.512, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.599236029376656", + "created_time": "2018-05-12 11:57:07.392595+00:00", + "created_time_dt": "2018-05-12T11:57:07.392595", + "duration": "0:00:41", + "end_time": "2018-05-12 11:57:48.659976+00:00", + "hyperdrive_id": "8333", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8333_52ae9b26", + "metric": 3308.6105, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.601084397755814", + "created_time": "2018-05-12 11:57:34.200139+00:00", + "created_time_dt": "2018-05-12T11:57:34.200139", + "duration": "0:01:23", + "end_time": "2018-05-12 11:58:58.143110+00:00", + "hyperdrive_id": "8335", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8335_cce61bd6", + "metric": 3308.8378, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.262097025735337", + "created_time": "2018-05-12 12:19:16.917910+00:00", + "created_time_dt": "2018-05-12T12:19:16.917910", + "duration": "0:00:13", + "end_time": "2018-05-12 12:19:30.324225+00:00", + "hyperdrive_id": "8397", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8397_dd81df0e", + "metric": 3309.0036, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.604705870252792", + "created_time": "2018-05-12 13:48:00.448137+00:00", + "created_time_dt": "2018-05-12T13:48:00.448137", + "duration": "0:00:13", + "end_time": "2018-05-12 13:48:14.174291+00:00", + "hyperdrive_id": "8516", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8516_b74e4c8b", + "metric": 3309.2874, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.260351772485082", + "created_time": "2018-05-12 12:40:16.140757+00:00", + "created_time_dt": "2018-05-12T12:40:16.140757", + "duration": "0:00:29", + "end_time": "2018-05-12 12:40:46.115691+00:00", + "hyperdrive_id": "8435", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8435_f6976bb9", + "metric": 3309.355, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.260086732484324", + "created_time": "2018-05-12 12:23:59.882354+00:00", + "created_time_dt": "2018-05-12T12:23:59.882354", + "duration": "0:00:14", + "end_time": "2018-05-12 12:24:14.855275+00:00", + "hyperdrive_id": "8408", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8408_9c5acb0f", + "metric": 3309.4088, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.258916574615307", + "created_time": "2018-05-12 13:36:21.596115+00:00", + "created_time_dt": "2018-05-12T13:36:21.596115", + "duration": "0:00:50", + "end_time": "2018-05-12 13:37:12.123919+00:00", + "hyperdrive_id": "8504", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8504_0c6a00e8", + "metric": 3309.6482, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.611064223594442", + "created_time": "2018-05-12 12:44:18.250013+00:00", + "created_time_dt": "2018-05-12T12:44:18.250013", + "duration": "0:00:29", + "end_time": "2018-05-12 12:44:47.391810+00:00", + "hyperdrive_id": "8442", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8442_abd96bbf", + "metric": 3310.0902, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.256461605043284", + "created_time": "2018-05-12 12:54:31.251204+00:00", + "created_time_dt": "2018-05-12T12:54:31.251204", + "duration": "0:00:29", + "end_time": "2018-05-12 12:55:00.767057+00:00", + "hyperdrive_id": "8455", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8455_b1c25c7c", + "metric": 3310.1585, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.612949039336068", + "created_time": "2018-05-12 12:41:33.890361+00:00", + "created_time_dt": "2018-05-12T12:41:33.890361", + "duration": "0:00:30", + "end_time": "2018-05-12 12:42:04.062774+00:00", + "hyperdrive_id": "8437", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8437_6c50e94e", + "metric": 3310.3315, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.255526064542372", + "created_time": "2018-05-12 13:21:55.194489+00:00", + "created_time_dt": "2018-05-12T13:21:55.194489", + "duration": "0:00:13", + "end_time": "2018-05-12 13:22:08.712290+00:00", + "hyperdrive_id": "8488", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8488_c295f3e2", + "metric": 3310.356, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.2549933776509", + "created_time": "2018-05-12 13:52:12.028365+00:00", + "created_time_dt": "2018-05-12T13:52:12.028365", + "duration": "0:00:13", + "end_time": "2018-05-12 13:52:25.432736+00:00", + "hyperdrive_id": "8520", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8520_0caa9d1d", + "metric": 3310.4692, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.614709777900403", + "created_time": "2018-05-12 13:18:18.431023+00:00", + "created_time_dt": "2018-05-12T13:18:18.431023", + "duration": "0:00:30", + "end_time": "2018-05-12 13:18:48.651732+00:00", + "hyperdrive_id": "8485", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8485_4590c651", + "metric": 3310.5581, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.253919017785107", + "created_time": "2018-05-12 13:39:56.741734+00:00", + "created_time_dt": "2018-05-12T13:39:56.741734", + "duration": "0:01:03", + "end_time": "2018-05-12 13:41:00.308668+00:00", + "hyperdrive_id": "8510", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8510_49564c20", + "metric": 3310.699, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.616902331561374", + "created_time": "2018-05-12 11:56:01.551240+00:00", + "created_time_dt": "2018-05-12T11:56:01.551240", + "duration": "0:01:12", + "end_time": "2018-05-12 11:57:13.590773+00:00", + "hyperdrive_id": "8330", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8330_f4b61ef0", + "metric": 3310.8422, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.25086514487739", + "created_time": "2018-05-12 13:36:21.728793+00:00", + "created_time_dt": "2018-05-12T13:36:21.728793", + "duration": "0:00:33", + "end_time": "2018-05-12 13:36:54.777053+00:00", + "hyperdrive_id": "8505", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8505_cea12178", + "metric": 3311.3645, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.621860856811298", + "created_time": "2018-05-12 12:33:54.903277+00:00", + "created_time_dt": "2018-05-12T12:33:54.903277", + "duration": "0:00:13", + "end_time": "2018-05-12 12:34:08.188676+00:00", + "hyperdrive_id": "8426", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8426_10eb543c", + "metric": 3311.4917, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.24900598383144", + "created_time": "2018-05-12 12:13:25.069763+00:00", + "created_time_dt": "2018-05-12T12:13:25.069763", + "duration": "0:00:25", + "end_time": "2018-05-12 12:13:50.748571+00:00", + "hyperdrive_id": "8383", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8383_c3046cd8", + "metric": 3311.7784, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.6241675621646", + "created_time": "2018-05-12 12:06:44.216070+00:00", + "created_time_dt": "2018-05-12T12:06:44.216070", + "duration": "0:00:12", + "end_time": "2018-05-12 12:06:56.959070+00:00", + "hyperdrive_id": "8366", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8366_f69aa45d", + "metric": 3311.7972, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.625703892831805", + "created_time": "2018-05-12 12:03:08.955336+00:00", + "created_time_dt": "2018-05-12T12:03:08.955336", + "duration": "0:00:31", + "end_time": "2018-05-12 12:03:40.562848+00:00", + "hyperdrive_id": "8353", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8353_a9ca61ba", + "metric": 3312.0018, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.628276137144992", + "created_time": "2018-05-12 13:10:08.091192+00:00", + "created_time_dt": "2018-05-12T13:10:08.091192", + "duration": "0:00:13", + "end_time": "2018-05-12 13:10:21.613075+00:00", + "hyperdrive_id": "8475", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8475_849afdb5", + "metric": 3312.3465, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.628845460109139", + "created_time": "2018-05-12 12:57:35.276994+00:00", + "created_time_dt": "2018-05-12T12:57:35.276994", + "duration": "0:00:13", + "end_time": "2018-05-12 12:57:48.651279+00:00", + "hyperdrive_id": "8459", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8459_6791263c", + "metric": 3312.4231, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.63976851775956", + "created_time": "2018-05-12 13:11:47.176422+00:00", + "created_time_dt": "2018-05-12T13:11:47.176422", + "duration": "0:00:44", + "end_time": "2018-05-12 13:12:31.806029+00:00", + "hyperdrive_id": "8477", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8477_e415c4e4", + "metric": 3313.9173, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.64333587616801", + "created_time": "2018-05-12 11:58:56.044779+00:00", + "created_time_dt": "2018-05-12T11:58:56.044779", + "duration": "0:01:09", + "end_time": "2018-05-12 12:00:05.332499+00:00", + "hyperdrive_id": "8339", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8339_de3651e2", + "metric": 3314.4149, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.651378196123682", + "created_time": "2018-05-12 12:01:04.756275+00:00", + "created_time_dt": "2018-05-12T12:01:04.756275", + "duration": "0:00:53", + "end_time": "2018-05-12 12:01:58.228294+00:00", + "hyperdrive_id": "8346", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8346_bd6c414b", + "metric": 3315.5536, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.652689173629813", + "created_time": "2018-05-12 12:39:00.282391+00:00", + "created_time_dt": "2018-05-12T12:39:00.282391", + "duration": "0:00:12", + "end_time": "2018-05-12 12:39:13.084499+00:00", + "hyperdrive_id": "8434", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8434_8af5e86e", + "metric": 3315.7414, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.231071484600443", + "created_time": "2018-05-12 11:55:54.528694+00:00", + "created_time_dt": "2018-05-12T11:55:54.528694", + "duration": "0:00:29", + "end_time": "2018-05-12 11:56:23.827901+00:00", + "hyperdrive_id": "8328", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8328_21815c4b", + "metric": 3316.1244, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.227069222503092", + "created_time": "2018-05-12 12:35:12.203475+00:00", + "created_time_dt": "2018-05-12T12:35:12.203475", + "duration": "0:00:13", + "end_time": "2018-05-12 12:35:25.412676+00:00", + "hyperdrive_id": "8428", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8428_4bc7ce29", + "metric": 3317.1849, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.223418940687153", + "created_time": "2018-05-12 12:58:56.432059+00:00", + "created_time_dt": "2018-05-12T12:58:56.432059", + "duration": "0:00:14", + "end_time": "2018-05-12 12:59:10.619204+00:00", + "hyperdrive_id": "8461", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8461_cd6afd47", + "metric": 3318.1821, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.670450207932727", + "created_time": "2018-05-12 11:59:52.831246+00:00", + "created_time_dt": "2018-05-12T11:59:52.831246", + "duration": "0:00:44", + "end_time": "2018-05-12 12:00:37.246166+00:00", + "hyperdrive_id": "8342", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8342_e752fb63", + "metric": 3318.3439, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.222216466573058", + "created_time": "2018-05-12 12:10:27.604728+00:00", + "created_time_dt": "2018-05-12T12:10:27.604728", + "duration": "0:02:02", + "end_time": "2018-05-12 12:12:30.373183+00:00", + "hyperdrive_id": "8377", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8377_75383e3d", + "metric": 3318.517, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.222056103824308", + "created_time": "2018-05-12 12:01:04.816005+00:00", + "created_time_dt": "2018-05-12T12:01:04.816005", + "duration": "0:00:23", + "end_time": "2018-05-12 12:01:28.176392+00:00", + "hyperdrive_id": "8347", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8347_796e4acb", + "metric": 3318.5619, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.672122563818468", + "created_time": "2018-05-12 12:49:55.647218+00:00", + "created_time_dt": "2018-05-12T12:49:55.647218", + "duration": "0:00:13", + "end_time": "2018-05-12 12:50:09.437568+00:00", + "hyperdrive_id": "8450", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8450_4d73ae11", + "metric": 3318.5944, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.674219319793153", + "created_time": "2018-05-12 12:06:19.493627+00:00", + "created_time_dt": "2018-05-12T12:06:19.493627", + "duration": "0:00:14", + "end_time": "2018-05-12 12:06:33.612828+00:00", + "hyperdrive_id": "8364", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8364_616bf29d", + "metric": 3318.9097, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.676816186158726", + "created_time": "2018-05-12 11:57:20.761449+00:00", + "created_time_dt": "2018-05-12T11:57:20.761449", + "duration": "0:00:45", + "end_time": "2018-05-12 11:58:06.670890+00:00", + "hyperdrive_id": "8334", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8334_59867097", + "metric": 3319.3021, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.677040806205791", + "created_time": "2018-05-12 12:42:49.923029+00:00", + "created_time_dt": "2018-05-12T12:42:49.923029", + "duration": "0:00:14", + "end_time": "2018-05-12 12:43:04.705488+00:00", + "hyperdrive_id": "8439", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8439_ef9bc000", + "metric": 3319.3362, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.217998990024581", + "created_time": "2018-05-12 13:07:59.395985+00:00", + "created_time_dt": "2018-05-12T13:07:59.395985", + "duration": "0:00:46", + "end_time": "2018-05-12 13:08:45.534751+00:00", + "hyperdrive_id": "8472", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8472_350a8911", + "metric": 3319.7167, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.6799503791279", + "created_time": "2018-05-12 13:04:49.831744+00:00", + "created_time_dt": "2018-05-12T13:04:49.831744", + "duration": "0:00:30", + "end_time": "2018-05-12 13:05:20.589562+00:00", + "hyperdrive_id": "8469", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8469_8ff01e12", + "metric": 3319.7786, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.685038562031804", + "created_time": "2018-05-12 12:01:57.377138+00:00", + "created_time_dt": "2018-05-12T12:01:57.377138", + "duration": "0:00:18", + "end_time": "2018-05-12 12:02:16.176403+00:00", + "hyperdrive_id": "8349", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8349_9dd0edcd", + "metric": 3320.5588, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.213186744612845", + "created_time": "2018-05-12 11:59:28.796665+00:00", + "created_time_dt": "2018-05-12T11:59:28.796665", + "duration": "0:00:20", + "end_time": "2018-05-12 11:59:48.832949+00:00", + "hyperdrive_id": "8341", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8341_8a47e3c3", + "metric": 3321.1344, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.21256169431142", + "created_time": "2018-05-12 12:14:55.830300+00:00", + "created_time_dt": "2018-05-12T12:14:55.830300", + "duration": "0:00:13", + "end_time": "2018-05-12 12:15:08.838193+00:00", + "hyperdrive_id": "8387", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8387_4de71177", + "metric": 3321.3224, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.691031495877125", + "created_time": "2018-05-12 11:59:14.526241+00:00", + "created_time_dt": "2018-05-12T11:59:14.526241", + "duration": "0:00:17", + "end_time": "2018-05-12 11:59:31.757101+00:00", + "hyperdrive_id": "8340", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8340_f729e8a8", + "metric": 3321.4879, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.211515835622294", + "created_time": "2018-05-12 13:56:22.463994+00:00", + "created_time_dt": "2018-05-12T13:56:22.463994", + "duration": "0:00:14", + "end_time": "2018-05-12 13:56:36.879820+00:00", + "hyperdrive_id": "8526", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8526_fc03d679", + "metric": 3321.639, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.694531075877101", + "created_time": "2018-05-12 13:03:26.709852+00:00", + "created_time_dt": "2018-05-12T13:03:26.709852", + "duration": "0:00:30", + "end_time": "2018-05-12 13:03:57.298099+00:00", + "hyperdrive_id": "8467", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8467_820bf267", + "metric": 3322.0354, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.696647020776985", + "created_time": "2018-05-12 12:17:26.399107+00:00", + "created_time_dt": "2018-05-12T12:17:26.399107", + "duration": "0:00:13", + "end_time": "2018-05-12 12:17:39.855186+00:00", + "hyperdrive_id": "8393", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8393_e269fd60", + "metric": 3322.3682, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.207668270269089", + "created_time": "2018-05-12 12:23:59.802255+00:00", + "created_time_dt": "2018-05-12T12:23:59.802255", + "duration": "0:00:14", + "end_time": "2018-05-12 12:24:13.954962+00:00", + "hyperdrive_id": "8407", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8407_f600791c", + "metric": 3322.8255, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.704450171518138", + "created_time": "2018-05-12 12:51:33.047182+00:00", + "created_time_dt": "2018-05-12T12:51:33.047182", + "duration": "0:00:13", + "end_time": "2018-05-12 12:51:46.189745+00:00", + "hyperdrive_id": "8452", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8452_7a96c125", + "metric": 3323.6069, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.204555124314782", + "created_time": "2018-05-12 11:55:54.722551+00:00", + "created_time_dt": "2018-05-12T11:55:54.722551", + "duration": "0:00:45", + "end_time": "2018-05-12 11:56:40.533180+00:00", + "hyperdrive_id": "8329", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8329_3f426453", + "metric": 3323.811, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.203734687408454", + "created_time": "2018-05-12 12:07:13.094366+00:00", + "created_time_dt": "2018-05-12T12:07:13.094366", + "duration": "0:00:18", + "end_time": "2018-05-12 12:07:31.308677+00:00", + "hyperdrive_id": "8368", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8368_cb6c8945", + "metric": 3324.0745, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.709476658707224", + "created_time": "2018-05-12 12:11:12.061752+00:00", + "created_time_dt": "2018-05-12T12:11:12.061752", + "duration": "0:00:28", + "end_time": "2018-05-12 12:11:40.328129+00:00", + "hyperdrive_id": "8379", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8379_c9ae195c", + "metric": 3324.4139, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.202618610645979", + "created_time": "2018-05-12 12:52:59.522663+00:00", + "created_time_dt": "2018-05-12T12:52:59.522663", + "duration": "0:00:13", + "end_time": "2018-05-12 12:53:12.744129+00:00", + "hyperdrive_id": "8453", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8453_2704afe0", + "metric": 3324.4356, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.710489985561568", + "created_time": "2018-05-12 12:13:25.214412+00:00", + "created_time_dt": "2018-05-12T12:13:25.214412", + "duration": "0:00:13", + "end_time": "2018-05-12 12:13:38.794172+00:00", + "hyperdrive_id": "8384", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8384_29ac75d7", + "metric": 3324.5775, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.201388195391872", + "created_time": "2018-05-12 12:11:57.134173+00:00", + "created_time_dt": "2018-05-12T12:11:57.134173", + "duration": "0:00:12", + "end_time": "2018-05-12 12:12:09.855865+00:00", + "hyperdrive_id": "8380", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8380_0393567a", + "metric": 3324.8372, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.199904360954639", + "created_time": "2018-05-12 12:05:06.281666+00:00", + "created_time_dt": "2018-05-12T12:05:06.281666", + "duration": "0:00:35", + "end_time": "2018-05-12 12:05:41.415671+00:00", + "hyperdrive_id": "8361", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8361_790554e0", + "metric": 3325.3264, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.715946699073426", + "created_time": "2018-05-12 13:50:03.986351+00:00", + "created_time_dt": "2018-05-12T13:50:03.986351", + "duration": "0:00:13", + "end_time": "2018-05-12 13:50:17.671869+00:00", + "hyperdrive_id": "8519", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8519_b45b95fe", + "metric": 3325.4631, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.716295824315661", + "created_time": "2018-05-12 12:22:03.607952+00:00", + "created_time_dt": "2018-05-12T12:22:03.607952", + "duration": "0:00:12", + "end_time": "2018-05-12 12:22:16.329865+00:00", + "hyperdrive_id": "8403", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8403_111315cc", + "metric": 3325.5201, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.721309232403544", + "created_time": "2018-05-12 12:09:47.802542+00:00", + "created_time_dt": "2018-05-12T12:09:47.802542", + "duration": "0:00:12", + "end_time": "2018-05-12 12:10:00.639613+00:00", + "hyperdrive_id": "8375", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8375_083d89cd", + "metric": 3326.3413, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.722171183492785", + "created_time": "2018-05-12 13:56:22.539569+00:00", + "created_time_dt": "2018-05-12T13:56:22.539569", + "duration": "0:00:29", + "end_time": "2018-05-12 13:56:52.506291+00:00", + "hyperdrive_id": "8524", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8524_80f88880", + "metric": 3326.4832, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.726313386025429", + "created_time": "2018-05-12 12:44:18.163495+00:00", + "created_time_dt": "2018-05-12T12:44:18.163495", + "duration": "0:00:13", + "end_time": "2018-05-12 12:44:31.168855+00:00", + "hyperdrive_id": "8441", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8441_020fdf69", + "metric": 3327.1678, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.72930361168177", + "created_time": "2018-05-12 13:25:30.013247+00:00", + "created_time_dt": "2018-05-12T13:25:30.013247", + "duration": "0:00:50", + "end_time": "2018-05-12 13:26:20.789242+00:00", + "hyperdrive_id": "8492", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8492_c360744c", + "metric": 3327.6647, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.735331084674993", + "created_time": "2018-05-12 12:19:17.001645+00:00", + "created_time_dt": "2018-05-12T12:19:17.001645", + "duration": "0:00:12", + "end_time": "2018-05-12 12:19:29.055472+00:00", + "hyperdrive_id": "8398", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8398_6057c4b8", + "metric": 3328.6733, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.741333101612688", + "created_time": "2018-05-12 12:30:24.252672+00:00", + "created_time_dt": "2018-05-12T12:30:24.252672", + "duration": "0:00:16", + "end_time": "2018-05-12 12:30:40.316267+00:00", + "hyperdrive_id": "8420", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8420_6adb8838", + "metric": 3329.6868, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.742368394312335", + "created_time": "2018-05-12 13:48:00.662005+00:00", + "created_time_dt": "2018-05-12T13:48:00.662005", + "duration": "0:00:29", + "end_time": "2018-05-12 13:48:29.733162+00:00", + "hyperdrive_id": "8517", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8517_01c069cd", + "metric": 3329.8625, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.74451533044112", + "created_time": "2018-05-12 12:00:06.297211+00:00", + "created_time_dt": "2018-05-12T12:00:06.297211", + "duration": "0:01:05", + "end_time": "2018-05-12 12:01:11.612251+00:00", + "hyperdrive_id": "8344", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8344_2036cd0a", + "metric": 3330.2276, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.750274984006123", + "created_time": "2018-05-12 12:39:00.160319+00:00", + "created_time_dt": "2018-05-12T12:39:00.160319", + "duration": "0:00:29", + "end_time": "2018-05-12 12:39:29.832125+00:00", + "hyperdrive_id": "8433", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8433_2238faf6", + "metric": 3331.2128, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.757819319976897", + "created_time": "2018-05-12 12:21:08.554079+00:00", + "created_time_dt": "2018-05-12T12:21:08.554079", + "duration": "0:00:13", + "end_time": "2018-05-12 12:21:21.958887+00:00", + "hyperdrive_id": "8402", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8402_b2533d98", + "metric": 3332.5151, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.765019942269395", + "created_time": "2018-05-12 12:31:33.508670+00:00", + "created_time_dt": "2018-05-12T12:31:33.508670", + "duration": "0:00:12", + "end_time": "2018-05-12 12:31:46.245844+00:00", + "hyperdrive_id": "8422", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8422_a004deac", + "metric": 3333.7701, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.772240783940475", + "created_time": "2018-05-12 13:11:46.536678+00:00", + "created_time_dt": "2018-05-12T13:11:46.536678", + "duration": "0:00:13", + "end_time": "2018-05-12 13:12:00.351416+00:00", + "hyperdrive_id": "8478", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8478_f9b60072", + "metric": 3335.0403, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.775376593821348", + "created_time": "2018-05-12 11:57:51.483491+00:00", + "created_time_dt": "2018-05-12T11:57:51.483491", + "duration": "0:00:49", + "end_time": "2018-05-12 11:58:41.345056+00:00", + "hyperdrive_id": "8336", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8336_8d9fcbf2", + "metric": 3335.5954, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.776902840582832", + "created_time": "2018-05-12 13:16:42.209279+00:00", + "created_time_dt": "2018-05-12T13:16:42.209279", + "duration": "0:00:12", + "end_time": "2018-05-12 13:16:54.766193+00:00", + "hyperdrive_id": "8483", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8483_a61bd458", + "metric": 3335.8663, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.787404580220175", + "created_time": "2018-05-12 13:44:16.718411+00:00", + "created_time_dt": "2018-05-12T13:44:16.718411", + "duration": "0:00:41", + "end_time": "2018-05-12 13:44:58.084345+00:00", + "hyperdrive_id": "8513", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8513_9bff58a2", + "metric": 3337.7437, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.789523827771144", + "created_time": "2018-05-12 13:20:06.458660+00:00", + "created_time_dt": "2018-05-12T13:20:06.458660", + "duration": "0:00:14", + "end_time": "2018-05-12 13:20:21.078795+00:00", + "hyperdrive_id": "8487", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8487_ff1fa053", + "metric": 3338.1253, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.802056733580286", + "created_time": "2018-05-12 11:58:06.769725+00:00", + "created_time_dt": "2018-05-12T11:58:06.769725", + "duration": "0:00:17", + "end_time": "2018-05-12 11:58:23.830337+00:00", + "hyperdrive_id": "8337", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8337_27db767d", + "metric": 3340.4001, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.158706098065124", + "created_time": "2018-05-12 13:07:59.714140+00:00", + "created_time_dt": "2018-05-12T13:07:59.714140", + "duration": "0:00:12", + "end_time": "2018-05-12 13:08:12.356448+00:00", + "hyperdrive_id": "8471", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8471_4684fafc", + "metric": 3341.1883, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.81666319772656", + "created_time": "2018-05-12 12:57:35.142880+00:00", + "created_time_dt": "2018-05-12T12:57:35.142880", + "duration": "0:00:29", + "end_time": "2018-05-12 12:58:05.114772+00:00", + "hyperdrive_id": "8460", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8460_a5bfa8fb", + "metric": 3343.0888, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.153939964737971", + "created_time": "2018-05-12 12:26:01.040538+00:00", + "created_time_dt": "2018-05-12T12:26:01.040538", + "duration": "0:00:13", + "end_time": "2018-05-12 12:26:14.946367+00:00", + "hyperdrive_id": "8412", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8412_f38b79e1", + "metric": 3343.3287, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.818247357781626", + "created_time": "2018-05-12 12:51:32.713599+00:00", + "created_time_dt": "2018-05-12T12:51:32.713599", + "duration": "0:00:28", + "end_time": "2018-05-12 12:52:01.253396+00:00", + "hyperdrive_id": "8451", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8451_02f74578", + "metric": 3343.3827, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.819893819434544", + "created_time": "2018-05-12 11:58:31.362582+00:00", + "created_time_dt": "2018-05-12T11:58:31.362582", + "duration": "0:00:43", + "end_time": "2018-05-12 11:59:15.122246+00:00", + "hyperdrive_id": "8338", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8338_1d2f97a7", + "metric": 3343.6887, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.152741652399469", + "created_time": "2018-05-12 12:16:36.776970+00:00", + "created_time_dt": "2018-05-12T12:16:36.776970", + "duration": "0:00:12", + "end_time": "2018-05-12 12:16:49.408701+00:00", + "hyperdrive_id": "8391", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8391_4fee6774", + "metric": 3343.8776, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.821590167168393", + "created_time": "2018-05-12 13:27:06.766331+00:00", + "created_time_dt": "2018-05-12T13:27:06.766331", + "duration": "0:00:13", + "end_time": "2018-05-12 13:27:20.387070+00:00", + "hyperdrive_id": "8495", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8495_d4f07494", + "metric": 3344.0045, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.15225619855891", + "created_time": "2018-05-12 13:39:56.625205+00:00", + "created_time_dt": "2018-05-12T13:39:56.625205", + "duration": "0:00:13", + "end_time": "2018-05-12 13:40:10.343274+00:00", + "hyperdrive_id": "8509", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8509_0ced32a8", + "metric": 3344.1012, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.826290872584932", + "created_time": "2018-05-12 12:48:28.934533+00:00", + "created_time_dt": "2018-05-12T12:48:28.934533", + "duration": "0:00:13", + "end_time": "2018-05-12 12:48:41.996203+00:00", + "hyperdrive_id": "8447", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8447_cc33a2e2", + "metric": 3344.8821, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.150287304842608", + "created_time": "2018-05-12 12:20:11.826010+00:00", + "created_time_dt": "2018-05-12T12:20:11.826010", + "duration": "0:00:14", + "end_time": "2018-05-12 12:20:26.700669+00:00", + "hyperdrive_id": "8400", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8400_fb9de45f", + "metric": 3345.0153, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.827917639441571", + "created_time": "2018-05-12 13:30:45.754478+00:00", + "created_time_dt": "2018-05-12T13:30:45.754478", + "duration": "0:00:17", + "end_time": "2018-05-12 13:31:03.690041+00:00", + "hyperdrive_id": "8499", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8499_5da756fc", + "metric": 3345.1866, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.831400494081907", + "created_time": "2018-05-12 13:25:29.760666+00:00", + "created_time_dt": "2018-05-12T13:25:29.760666", + "duration": "0:00:28", + "end_time": "2018-05-12 13:25:58.618049+00:00", + "hyperdrive_id": "8493", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8493_1067756d", + "metric": 3345.8403, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.143633113372336", + "created_time": "2018-05-12 12:15:41.187737+00:00", + "created_time_dt": "2018-05-12T12:15:41.187737", + "duration": "0:00:13", + "end_time": "2018-05-12 12:15:54.635649+00:00", + "hyperdrive_id": "8390", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8390_6bb4c33e", + "metric": 3348.1929, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.843864112354753", + "created_time": "2018-05-12 13:11:46.478245+00:00", + "created_time_dt": "2018-05-12T13:11:46.478245", + "duration": "0:00:30", + "end_time": "2018-05-12 13:12:16.864318+00:00", + "hyperdrive_id": "8476", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8476_7d515d12", + "metric": 3348.1958, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.844834242372722", + "created_time": "2018-05-12 13:34:34.862819+00:00", + "created_time_dt": "2018-05-12T13:34:34.862819", + "duration": "0:00:18", + "end_time": "2018-05-12 13:34:53.000220+00:00", + "hyperdrive_id": "8503", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8503_22cc2459", + "metric": 3348.3802, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.847189528199022", + "created_time": "2018-05-12 12:05:30.243006+00:00", + "created_time_dt": "2018-05-12T12:05:30.243006", + "duration": "0:00:27", + "end_time": "2018-05-12 12:05:57.958523+00:00", + "hyperdrive_id": "8362", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8362_507ea67e", + "metric": 3348.8285, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.847412052270933", + "created_time": "2018-05-12 13:52:12.711262+00:00", + "created_time_dt": "2018-05-12T13:52:12.711262", + "duration": "0:00:29", + "end_time": "2018-05-12 13:52:41.855615+00:00", + "hyperdrive_id": "8522", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8522_49d2012a", + "metric": 3348.8709, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.849158168297161", + "created_time": "2018-05-12 12:35:11.088836+00:00", + "created_time_dt": "2018-05-12T12:35:11.088836", + "duration": "0:00:29", + "end_time": "2018-05-12 12:35:40.836866+00:00", + "hyperdrive_id": "8427", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8427_c559cbd5", + "metric": 3349.2039, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.858392736114089", + "created_time": "2018-05-12 12:10:27.136482+00:00", + "created_time_dt": "2018-05-12T12:10:27.136482", + "duration": "0:00:13", + "end_time": "2018-05-12 12:10:40.337157+00:00", + "hyperdrive_id": "8376", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8376_eae11b68", + "metric": 3350.9728, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.858806435401327", + "created_time": "2018-05-12 12:18:21.437029+00:00", + "created_time_dt": "2018-05-12T12:18:21.437029", + "duration": "0:00:15", + "end_time": "2018-05-12 12:18:37.238879+00:00", + "hyperdrive_id": "8395", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8395_7ad8e7cc", + "metric": 3351.0523, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.870882618277203", + "created_time": "2018-05-12 12:54:31.035593+00:00", + "created_time_dt": "2018-05-12T12:54:31.035593", + "duration": "0:00:13", + "end_time": "2018-05-12 12:54:44.090425+00:00", + "hyperdrive_id": "8456", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8456_6948cbcc", + "metric": 3353.3854, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.870957972314387", + "created_time": "2018-05-12 12:03:37.974468+00:00", + "created_time_dt": "2018-05-12T12:03:37.974468", + "duration": "0:00:54", + "end_time": "2018-05-12 12:04:32.645363+00:00", + "hyperdrive_id": "8356", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8356_c89bedd1", + "metric": 3353.4, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.874263482882214", + "created_time": "2018-05-12 13:44:12.039777+00:00", + "created_time_dt": "2018-05-12T13:44:12.039777", + "duration": "0:00:28", + "end_time": "2018-05-12 13:44:40.898814+00:00", + "hyperdrive_id": "8512", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8512_c8b83bf0", + "metric": 3354.0423, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.874529541822094", + "created_time": "2018-05-12 13:39:56.277448+00:00", + "created_time_dt": "2018-05-12T13:39:56.277448", + "duration": "0:00:29", + "end_time": "2018-05-12 13:40:25.570608+00:00", + "hyperdrive_id": "8507", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8507_1cd72ece", + "metric": 3354.094, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.875032535947787", + "created_time": "2018-05-12 12:03:09.088430+00:00", + "created_time_dt": "2018-05-12T12:03:09.088430", + "duration": "0:00:14", + "end_time": "2018-05-12 12:03:23.413857+00:00", + "hyperdrive_id": "8354", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8354_ba9b01e8", + "metric": 3354.1919, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.131482857415543", + "created_time": "2018-05-12 12:00:40.831510+00:00", + "created_time_dt": "2018-05-12T12:00:40.831510", + "duration": "0:00:13", + "end_time": "2018-05-12 12:00:54.473484+00:00", + "hyperdrive_id": "8345", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8345_e90e298a", + "metric": 3354.3567, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.876760429991959", + "created_time": "2018-05-12 11:56:29.419120+00:00", + "created_time_dt": "2018-05-12T11:56:29.419120", + "duration": "0:01:00", + "end_time": "2018-05-12 11:57:29.880644+00:00", + "hyperdrive_id": "8331", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8331_23196033", + "metric": 3354.5285, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.879527886321761", + "created_time": "2018-05-12 12:48:29.201551+00:00", + "created_time_dt": "2018-05-12T12:48:29.201551", + "duration": "0:00:28", + "end_time": "2018-05-12 12:48:57.994549+00:00", + "hyperdrive_id": "8448", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8448_0adee24d", + "metric": 3355.0683, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.882185892591819", + "created_time": "2018-05-12 12:36:28.013382+00:00", + "created_time_dt": "2018-05-12T12:36:28.013382", + "duration": "0:00:12", + "end_time": "2018-05-12 12:36:40.470870+00:00", + "hyperdrive_id": "8429", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8429_38aa4de9", + "metric": 3355.5878, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.898540073965212", + "created_time": "2018-05-12 13:52:12.001244+00:00", + "created_time_dt": "2018-05-12T13:52:12.001244", + "duration": "0:01:56", + "end_time": "2018-05-12 13:54:08.239396+00:00", + "hyperdrive_id": "8521", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8521_bdd8810c", + "metric": 3358.8047, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.898790550820804", + "created_time": "2018-05-12 12:45:34.119530+00:00", + "created_time_dt": "2018-05-12T12:45:34.119530", + "duration": "0:00:29", + "end_time": "2018-05-12 12:46:03.683003+00:00", + "hyperdrive_id": "8443", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8443_6ecf5b63", + "metric": 3358.8543, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.123019805934679", + "created_time": "2018-05-12 13:18:18.097594+00:00", + "created_time_dt": "2018-05-12T13:18:18.097594", + "duration": "0:00:48", + "end_time": "2018-05-12 13:19:06.174482+00:00", + "hyperdrive_id": "8484", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8484_90ffa59e", + "metric": 3358.9367, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.909619395012488", + "created_time": "2018-05-12 12:04:37.269556+00:00", + "created_time_dt": "2018-05-12T12:04:37.269556", + "duration": "0:00:13", + "end_time": "2018-05-12 12:04:50.677702+00:00", + "hyperdrive_id": "8359", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8359_bec7b5c7", + "metric": 3361.0033, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.116537281119354", + "created_time": "2018-05-12 12:25:05.224578+00:00", + "created_time_dt": "2018-05-12T12:25:05.224578", + "duration": "0:00:13", + "end_time": "2018-05-12 12:25:19.188493+00:00", + "hyperdrive_id": "8410", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8410_481b4e19", + "metric": 3362.6096, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.922247917060456", + "created_time": "2018-05-12 12:55:58.393210+00:00", + "created_time_dt": "2018-05-12T12:55:58.393210", + "duration": "0:00:28", + "end_time": "2018-05-12 12:56:27.202928+00:00", + "hyperdrive_id": "8458", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8458_118e67f8", + "metric": 3363.527, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.924207993821612", + "created_time": "2018-05-12 13:07:59.698584+00:00", + "created_time_dt": "2018-05-12T13:07:59.698584", + "duration": "0:00:28", + "end_time": "2018-05-12 13:08:28.412662+00:00", + "hyperdrive_id": "8473", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8473_92f27331", + "metric": 3363.9203, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.9281605775003", + "created_time": "2018-05-12 12:33:55.035789+00:00", + "created_time_dt": "2018-05-12T12:33:55.035789", + "duration": "0:00:13", + "end_time": "2018-05-12 12:34:08.214661+00:00", + "hyperdrive_id": "8425", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8425_1500a0dd", + "metric": 3364.7148, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.928925041400716", + "created_time": "2018-05-12 12:03:37.850247+00:00", + "created_time_dt": "2018-05-12T12:03:37.850247", + "duration": "0:00:19", + "end_time": "2018-05-12 12:03:57.704131+00:00", + "hyperdrive_id": "8355", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8355_42f510c9", + "metric": 3364.8687, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.929577236031663", + "created_time": "2018-05-12 13:21:55.037697+00:00", + "created_time_dt": "2018-05-12T13:21:55.037697", + "duration": "0:00:48", + "end_time": "2018-05-12 13:22:43.185540+00:00", + "hyperdrive_id": "8490", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8490_5ea3cd3f", + "metric": 3365, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.112224359908429", + "created_time": "2018-05-12 11:56:42.923129+00:00", + "created_time_dt": "2018-05-12T11:56:42.923129", + "duration": "0:00:14", + "end_time": "2018-05-12 11:56:57.464771+00:00", + "hyperdrive_id": "8332", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8332_5219ec61", + "metric": 3365.1342, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.93757819924753", + "created_time": "2018-05-12 12:28:12.472109+00:00", + "created_time_dt": "2018-05-12T12:28:12.472109", + "duration": "0:00:13", + "end_time": "2018-05-12 12:28:25.911571+00:00", + "hyperdrive_id": "8416", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8416_95953de1", + "metric": 3366.6147, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.940420194928342", + "created_time": "2018-05-12 12:22:03.666129+00:00", + "created_time_dt": "2018-05-12T12:22:03.666129", + "duration": "0:00:12", + "end_time": "2018-05-12 12:22:16.335802+00:00", + "hyperdrive_id": "8404", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8404_d0736441", + "metric": 3367.1899, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.943042536771435", + "created_time": "2018-05-12 13:54:15.260969+00:00", + "created_time_dt": "2018-05-12T13:54:15.260969", + "duration": "0:00:11", + "end_time": "2018-05-12 13:54:26.918336+00:00", + "hyperdrive_id": "8523", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8523_76882d61", + "metric": 3367.7213, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.106668849133971", + "created_time": "2018-05-12 12:09:52.702880+00:00", + "created_time_dt": "2018-05-12T12:09:52.702880", + "duration": "0:00:25", + "end_time": "2018-05-12 12:10:18.241796+00:00", + "hyperdrive_id": "8374", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8374_9e11481e", + "metric": 3368.4832, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.954067716722258", + "created_time": "2018-05-12 12:30:24.308130+00:00", + "created_time_dt": "2018-05-12T12:30:24.308130", + "duration": "0:00:16", + "end_time": "2018-05-12 12:30:40.608925+00:00", + "hyperdrive_id": "8419", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8419_1b2ded51", + "metric": 3369.9633, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.104027755017491", + "created_time": "2018-05-12 12:20:11.870461+00:00", + "created_time_dt": "2018-05-12T12:20:11.870461", + "duration": "0:00:14", + "end_time": "2018-05-12 12:20:26.823381+00:00", + "hyperdrive_id": "8399", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8399_70048e50", + "metric": 3370.114, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.104023113674115", + "created_time": "2018-05-12 12:07:53.084399+00:00", + "created_time_dt": "2018-05-12T12:07:53.084399", + "duration": "0:00:13", + "end_time": "2018-05-12 12:08:06.237712+00:00", + "hyperdrive_id": "8369", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8369_993e0dd7", + "metric": 3370.1169, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.955790693896138", + "created_time": "2018-05-12 12:47:02.252823+00:00", + "created_time_dt": "2018-05-12T12:47:02.252823", + "duration": "0:00:30", + "end_time": "2018-05-12 12:47:32.329335+00:00", + "hyperdrive_id": "8445", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8445_c70f8879", + "metric": 3370.3148, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.957619945976621", + "created_time": "2018-05-12 12:14:10.784220+00:00", + "created_time_dt": "2018-05-12T12:14:10.784220", + "duration": "0:00:13", + "end_time": "2018-05-12 12:14:23.785111+00:00", + "hyperdrive_id": "8385", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8385_8b801e22", + "metric": 3370.6882, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.958613790873744", + "created_time": "2018-05-12 12:21:08.499878+00:00", + "created_time_dt": "2018-05-12T12:21:08.499878", + "duration": "0:00:12", + "end_time": "2018-05-12 12:21:21.109495+00:00", + "hyperdrive_id": "8401", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8401_b84ee061", + "metric": 3370.8913, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0991051795076507", + "created_time": "2018-05-12 13:15:04.980450+00:00", + "created_time_dt": "2018-05-12T13:15:04.980450", + "duration": "0:00:13", + "end_time": "2018-05-12 13:15:18.786874+00:00", + "hyperdrive_id": "8480", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8480_614cf569", + "metric": 3373.221, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.970852980638722", + "created_time": "2018-05-12 12:36:27.820576+00:00", + "created_time_dt": "2018-05-12T12:36:27.820576", + "duration": "0:00:29", + "end_time": "2018-05-12 12:36:57.322649+00:00", + "hyperdrive_id": "8430", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8430_b2673e0c", + "metric": 3373.3991, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.979767975474247", + "created_time": "2018-05-12 13:07:59.532196+00:00", + "created_time_dt": "2018-05-12T13:07:59.532196", + "duration": "0:01:02", + "end_time": "2018-05-12 13:09:02.075600+00:00", + "hyperdrive_id": "8474", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8474_fcd51b10", + "metric": 3375.2343, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.984154480758112", + "created_time": "2018-05-12 12:14:55.708218+00:00", + "created_time_dt": "2018-05-12T12:14:55.708218", + "duration": "0:00:13", + "end_time": "2018-05-12 12:15:09.126182+00:00", + "hyperdrive_id": "8388", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8388_c4f98fe4", + "metric": 3376.1398, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.987309642340916", + "created_time": "2018-05-12 12:02:10.550056+00:00", + "created_time_dt": "2018-05-12T12:02:10.550056", + "duration": "0:00:39", + "end_time": "2018-05-12 12:02:50.233034+00:00", + "hyperdrive_id": "8350", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8350_73634488", + "metric": 3376.7921, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0726051008474307", + "created_time": "2018-05-12 13:36:21.997998+00:00", + "created_time_dt": "2018-05-12T13:36:21.997998", + "duration": "0:00:17", + "end_time": "2018-05-12 13:36:39.458975+00:00", + "hyperdrive_id": "8506", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8506_97095faa", + "metric": 3391.4684, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0704611645652615", + "created_time": "2018-05-12 12:37:43.597084+00:00", + "created_time_dt": "2018-05-12T12:37:43.597084", + "duration": "0:00:13", + "end_time": "2018-05-12 12:37:56.959322+00:00", + "hyperdrive_id": "8431", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8431_c751fb54", + "metric": 3393.0553, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0473324391576943", + "created_time": "2018-05-12 13:40:01.568786+00:00", + "created_time_dt": "2018-05-12T13:40:01.568786", + "duration": "0:00:41", + "end_time": "2018-05-12 13:40:43.404157+00:00", + "hyperdrive_id": "8508", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8508_8b1d3c4f", + "metric": 3411.0568, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0423407988138264", + "created_time": "2018-05-12 12:58:56.644162+00:00", + "created_time_dt": "2018-05-12T12:58:56.644162", + "duration": "0:00:31", + "end_time": "2018-05-12 12:59:28.208493+00:00", + "hyperdrive_id": "8462", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8462_296fe088", + "metric": 3415.083, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0356284101972079", + "created_time": "2018-05-12 13:29:08.960435+00:00", + "created_time_dt": "2018-05-12T13:29:08.960435", + "duration": "0:00:25", + "end_time": "2018-05-12 13:29:34.834119+00:00", + "hyperdrive_id": "8496", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8496_37c0fdc0", + "metric": 3420.4809, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0348149292173761", + "created_time": "2018-05-12 12:47:02.035005+00:00", + "created_time_dt": "2018-05-12T12:47:02.035005", + "duration": "0:00:13", + "end_time": "2018-05-12 12:47:15.727381+00:00", + "hyperdrive_id": "8446", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8446_8bd39a10", + "metric": 3421.1286, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.00351793616431617", + "created_time": "2018-05-12 13:56:22.665906+00:00", + "created_time_dt": "2018-05-12T13:56:22.665906", + "duration": "0:00:44", + "end_time": "2018-05-12 13:57:07.550427+00:00", + "hyperdrive_id": "8525", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8525_e1268c9b", + "metric": 3432.3808, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0127022983869274", + "created_time": "2018-05-12 12:18:21.730539+00:00", + "created_time_dt": "2018-05-12T12:18:21.730539", + "duration": "0:00:13", + "end_time": "2018-05-12 12:18:35.552439+00:00", + "hyperdrive_id": "8396", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8396_836f4190", + "metric": 3435.4146, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0109599437801077", + "created_time": "2018-05-12 12:15:41.253563+00:00", + "created_time_dt": "2018-05-12T12:15:41.253563", + "duration": "0:00:14", + "end_time": "2018-05-12 12:15:55.261054+00:00", + "hyperdrive_id": "8389", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8389_3429880c", + "metric": 3435.7702, + "start_time": "None", + "status": "Completed" + } + ], + { + "categories": [ + "8327", + "8328", + "8329", + "8330", + "8331", + "8332", + "8333", + "8334", + "8335", + "8336", + "8337", + "8338", + "8339", + "8340", + "8341", + "8342", + "8343", + "8344", + "8345", + "8346", + "8347", + "8348", + "8349", + "8350", + "8351", + "8352", + "8353", + "8354", + "8355", + "8356", + "8357", + "8358", + "8359", + "8360", + "8361", + "8362", + "8363", + "8364", + "8365", + "8366", + "8367", + "8368", + "8369", + "8370", + "8371", + "8372", + "8373", + "8374", + "8375", + "8376", + "8377", + "8378", + "8379", + "8380", + "8381", + "8382", + "8383", + "8384", + "8385", + "8386", + "8387", + "8388", + "8389", + "8390", + "8391", + "8392", + "8393", + "8394", + "8395", + "8396", + "8397", + "8398", + "8399", + "8400", + "8401", + "8402", + "8403", + "8404", + "8405", + "8406", + "8407", + "8408", + "8409", + "8410", + "8411", + "8412", + "8413", + "8414", + "8415", + "8416", + "8417", + "8418", + "8419", + "8420", + "8421", + "8422", + "8423", + "8424", + "8425", + "8426", + "8427", + "8428", + "8429", + "8430", + "8431", + "8432", + "8433", + "8434", + "8435", + "8436", + "8437", + "8438", + "8439", + "8440", + "8441", + "8442", + "8443", + "8444", + "8445", + "8446", + "8447", + "8448", + "8449", + "8450", + "8451", + "8452", + "8453", + "8454", + "8455", + "8456", + "8457", + "8458", + "8459", + "8460", + "8461", + "8462", + "8463", + "8464", + "8465", + "8466", + "8467", + "8468", + "8469", + "8470", + "8471", + "8472", + "8473", + "8474", + "8475", + "8476", + "8477", + "8478", + "8479", + "8480", + "8481", + "8482", + "8483", + "8484", + "8485", + "8486", + "8487", + "8488", + "8489", + "8490", + "8491", + "8492", + "8493", + "8494", + "8495", + "8496", + "8497", + "8498", + "8499", + "8500", + "8501", + "8502", + "8503", + "8504", + "8505", + "8506", + "8507", + "8508", + "8509", + "8510", + "8511", + "8512", + "8513", + "8514", + "8515", + "8516", + "8517", + "8518", + "8519", + "8520", + "8521", + "8522", + "8523", + "8524", + "8525", + "8526" + ], + "metricName": "mse", + "series": [ + { + "mode": "markers", + "name": "mse", + "stepped": false, + "type": "scatter", + "uid": "836252", + "x": [ + "8327", + "8328", + "8329", + "8330", + "8331", + "8332", + "8333", + "8334", + "8335", + "8336", + "8337", + "8338", + "8339", + "8340", + "8341", + "8342", + "8343", + "8344", + "8345", + "8346", + "8347", + "8348", + "8349", + "8350", + "8351", + "8352", + "8353", + "8354", + "8355", + "8356", + "8357", + "8358", + "8359", + "8360", + "8361", + "8362", + "8363", + "8364", + "8365", + "8366", + "8367", + "8368", + "8369", + "8370", + "8371", + "8372", + "8373", + "8374", + "8375", + "8376", + "8377", + "8378", + "8379", + "8380", + "8381", + "8382", + "8383", + "8384", + "8385", + "8386", + "8387", + "8388", + "8389", + "8390", + "8391", + "8392", + "8393", + "8394", + "8395", + "8396", + "8397", + "8398", + "8399", + "8400", + "8401", + "8402", + "8403", + "8404", + "8405", + "8406", + "8407", + "8408", + "8409", + "8410", + "8411", + "8412", + "8413", + "8414", + "8415", + "8416", + "8417", + "8418", + "8419", + "8420", + "8421", + "8422", + "8423", + "8424", + "8425", + "8426", + "8427", + "8428", + "8429", + "8430", + "8431", + "8432", + "8433", + "8434", + "8435", + "8436", + "8437", + "8438", + "8439", + "8440", + "8441", + "8442", + "8443", + "8444", + "8445", + "8446", + "8447", + "8448", + "8449", + "8450", + "8451", + "8452", + "8453", + "8454", + "8455", + "8456", + "8457", + "8458", + "8459", + "8460", + "8461", + "8462", + "8463", + "8464", + "8465", + "8466", + "8467", + "8468", + "8469", + "8470", + "8471", + "8472", + "8473", + "8474", + "8475", + "8476", + "8477", + "8478", + "8479", + "8480", + "8481", + "8482", + "8483", + "8484", + "8485", + "8486", + "8487", + "8488", + "8489", + "8490", + "8491", + "8492", + "8493", + "8494", + "8495", + "8496", + "8497", + "8498", + "8499", + "8500", + "8501", + "8502", + "8503", + "8504", + "8505", + "8506", + "8507", + "8508", + "8509", + "8510", + "8511", + "8512", + "8513", + "8514", + "8515", + "8516", + "8517", + "8518", + "8519", + "8520", + "8521", + "8522", + "8523", + "8524", + "8525", + "8526" + ] + }, + { + "line": { + "shape": "hv" + }, + "mode": "lines", + "name": "mse_min", + "stepped": true, + "type": "scatter", + "uid": "aa1605", + "x": [ + "8327", + "8328", + "8329", + "8330", + "8331", + "8332", + "8333", + "8334", + "8335", + "8336", + "8337", + "8338", + "8339", + "8340", + "8341", + "8342", + "8343", + "8344", + "8345", + "8346", + "8347", + "8348", + "8349", + "8350", + "8351", + "8352", + "8353", + "8354", + "8355", + "8356", + "8357", + "8358", + "8359", + "8360", + "8361", + "8362", + "8363", + "8364", + "8365", + "8366", + "8367", + "8368", + "8369", + "8370", + "8371", + "8372", + "8373", + "8374", + "8375", + "8376", + "8377", + "8378", + "8379", + "8380", + "8381", + "8382", + "8383", + "8384", + "8385", + "8386", + "8387", + "8388", + "8389", + "8390", + "8391", + "8392", + "8393", + "8394", + "8395", + "8396", + "8397", + "8398", + "8399", + "8400", + "8401", + "8402", + "8403", + "8404", + "8405", + "8406", + "8407", + "8408", + "8409", + "8410", + "8411", + "8412", + "8413", + "8414", + "8415", + "8416", + "8417", + "8418", + "8419", + "8420", + "8421", + "8422", + "8423", + "8424", + "8425", + "8426", + "8427", + "8428", + "8429", + "8430", + "8431", + "8432", + "8433", + "8434", + "8435", + "8436", + "8437", + "8438", + "8439", + "8440", + "8441", + "8442", + "8443", + "8444", + "8445", + "8446", + "8447", + "8448", + "8449", + "8450", + "8451", + "8452", + "8453", + "8454", + "8455", + "8456", + "8457", + "8458", + "8459", + "8460", + "8461", + "8462", + "8463", + "8464", + "8465", + "8466", + "8467", + "8468", + "8469", + "8470", + "8471", + "8472", + "8473", + "8474", + "8475", + "8476", + "8477", + "8478", + "8479", + "8480", + "8481", + "8482", + "8483", + "8484", + "8485", + "8486", + "8487", + "8488", + "8489", + "8490", + "8491", + "8492", + "8493", + "8494", + "8495", + "8496", + "8497", + "8498", + "8499", + "8500", + "8501", + "8502", + "8503", + "8504", + "8505", + "8506", + "8507", + "8508", + "8509", + "8510", + "8511", + "8512", + "8513", + "8514", + "8515", + "8516", + "8517", + "8518", + "8519", + "8520", + "8521", + "8522", + "8523", + "8524", + "8525", + "8526" + ] + } + ], + "showLegend": false, + "title": "HyperDrive Run Primary Metric : mse" + } + ] + } + }, + "f5ccd42f25e8402bbcccf511b3c6e08f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_train_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowHyperDriveRunsView", + "layout": "IPY_MODEL_83e0767b6c3a41a2833d0f8fcf690c72", + "value": [ + { + "run_id": "hyperdrive-sklearn-diabetes_1526099364301", + "status": "Running", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-e2e-rg%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2euapws/projects/hyperdrive-sklearn-diabetes/run-history/run-details/hyperdrive-sklearn-diabetes_1526099364301?type=HyperDrive" + }, + [ + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.393891676993532", + "created_time": "2018-05-12 04:34:15.248693+00:00", + "created_time_dt": "2018-05-12T04:34:15.248693", + "duration": "0:00:19", + "end_time": "2018-05-12 04:34:34.352899+00:00", + "hyperdrive_id": "8324", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8324_3d18af85", + "metric": 3295.8309, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.375901006177515", + "created_time": "2018-05-12 04:33:51.986890+00:00", + "created_time_dt": "2018-05-12T04:33:51.986890", + "duration": "0:00:25", + "end_time": "2018-05-12 04:34:17.199718+00:00", + "hyperdrive_id": "8323", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8323_ef8a489b", + "metric": 3296.3202, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.358260413565936", + "created_time": "2018-05-12 04:29:36.537978+00:00", + "created_time_dt": "2018-05-12T04:29:36.537978", + "duration": "0:00:30", + "end_time": "2018-05-12 04:30:07.443012+00:00", + "hyperdrive_id": "8307", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8307_6bb93d21", + "metric": 3297.1463, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.505489715550404", + "created_time": "2018-05-12 04:33:25.471155+00:00", + "created_time_dt": "2018-05-12T04:33:25.471155", + "duration": "0:00:33", + "end_time": "2018-05-12 04:33:59.378295+00:00", + "hyperdrive_id": "8322", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8322_5151e960", + "metric": 3299.3001, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.513294267924078", + "created_time": "2018-05-12 04:31:23.890814+00:00", + "created_time_dt": "2018-05-12T04:31:23.890814", + "duration": "0:00:38", + "end_time": "2018-05-12 04:32:02.172094+00:00", + "hyperdrive_id": "8315", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8315_a8d0df53", + "metric": 3299.8883, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.320051010308542", + "created_time": "2018-05-12 04:32:09.433779+00:00", + "created_time_dt": "2018-05-12T04:32:09.433779", + "duration": "0:00:43", + "end_time": "2018-05-12 04:32:53.026804+00:00", + "hyperdrive_id": "8318", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8318_9c385d92", + "metric": 3300.2455, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.658960235847498", + "created_time": "2018-05-12 04:32:47.239418+00:00", + "created_time_dt": "2018-05-12T04:32:47.239418", + "duration": "0:00:55", + "end_time": "2018-05-12 04:33:43.185284+00:00", + "hyperdrive_id": "8320", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8320_2e273b36", + "metric": 3316.6481, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.222378138720275", + "created_time": "2018-05-12 04:31:50.981871+00:00", + "created_time_dt": "2018-05-12T04:31:50.981871", + "duration": "0:00:28", + "end_time": "2018-05-12 04:32:19.478985+00:00", + "hyperdrive_id": "8317", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8317_7551a3d3", + "metric": 3318.4718, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.196888642456712", + "created_time": "2018-05-12 04:31:05.402230+00:00", + "created_time_dt": "2018-05-12T04:31:05.402230", + "duration": "0:00:23", + "end_time": "2018-05-12 04:31:29.205264+00:00", + "hyperdrive_id": "8314", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8314_24c262a7", + "metric": 3326.3371, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.189096447548258", + "created_time": "2018-05-12 04:34:39.460591+00:00", + "created_time_dt": "2018-05-12T04:34:39.460591", + "duration": "0:00:29", + "end_time": "2018-05-12 04:35:08.900171+00:00", + "hyperdrive_id": "8326", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8326_9e14bae2", + "metric": 3329.0536, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.150066832506793", + "created_time": "2018-05-12 04:30:12.926261+00:00", + "created_time_dt": "2018-05-12T04:30:12.926261", + "duration": "0:00:42", + "end_time": "2018-05-12 04:30:55.921563+00:00", + "hyperdrive_id": "8311", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8311_eaaf9dd9", + "metric": 3345.1184, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.853877240746265", + "created_time": "2018-05-12 04:34:15.379264+00:00", + "created_time_dt": "2018-05-12T04:34:15.379264", + "duration": "0:00:36", + "end_time": "2018-05-12 04:34:51.753259+00:00", + "hyperdrive_id": "8325", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8325_76cf6d75", + "metric": 3350.1062, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.864495102907021", + "created_time": "2018-05-12 04:29:36.618937+00:00", + "created_time_dt": "2018-05-12T04:29:36.618937", + "duration": "0:00:47", + "end_time": "2018-05-12 04:30:23.776039+00:00", + "hyperdrive_id": "8309", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8309_950b505d", + "metric": 3352.1487, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.102729222141871", + "created_time": "2018-05-12 04:29:36.417036+00:00", + "created_time_dt": "2018-05-12T04:29:36.417036", + "duration": "0:00:13", + "end_time": "2018-05-12 04:29:50.008716+00:00", + "hyperdrive_id": "8308", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8308_9e19788b", + "metric": 3370.925, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.98813448350046", + "created_time": "2018-05-12 04:29:36.482277+00:00", + "created_time_dt": "2018-05-12T04:29:36.482277", + "duration": "0:01:02", + "end_time": "2018-05-12 04:30:39.202800+00:00", + "hyperdrive_id": "8310", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8310_b7a0d869", + "metric": 3376.9628, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.992686382474524", + "created_time": "2018-05-12 04:32:23.342063+00:00", + "created_time_dt": "2018-05-12T04:32:23.342063", + "duration": "0:00:45", + "end_time": "2018-05-12 04:33:08.616066+00:00", + "hyperdrive_id": "8319", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8319_7d385753", + "metric": 3377.9056, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0720166631869791", + "created_time": "2018-05-12 04:33:00.471785+00:00", + "created_time_dt": "2018-05-12T04:33:00.471785", + "duration": "0:00:24", + "end_time": "2018-05-12 04:33:25.342289+00:00", + "hyperdrive_id": "8321", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8321_93092a4f", + "metric": 3391.9024, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0399458028478119", + "created_time": "2018-05-12 04:31:37.490004+00:00", + "created_time_dt": "2018-05-12T04:31:37.490004", + "duration": "0:00:58", + "end_time": "2018-05-12 04:32:36.227261+00:00", + "hyperdrive_id": "8316", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8316_47adbb4d", + "metric": 3417.0157, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0371855776131041", + "created_time": "2018-05-12 04:30:40.022059+00:00", + "created_time_dt": "2018-05-12T04:30:40.022059", + "duration": "0:00:31", + "end_time": "2018-05-12 04:31:11.265877+00:00", + "hyperdrive_id": "8313", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8313_a2cf1a87", + "metric": 3419.2355, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0108521659414031", + "created_time": "2018-05-12 04:30:26.126697+00:00", + "created_time_dt": "2018-05-12T04:30:26.126697", + "duration": "0:01:19", + "end_time": "2018-05-12 04:31:45.385829+00:00", + "hyperdrive_id": "8312", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8312_333b9d43", + "metric": 3435.7832, + "start_time": "None", + "status": "Completed" + } + ], + { + "categories": [ + "8307", + "8308", + "8309", + "8310", + "8311", + "8312", + "8313", + "8314", + "8315", + "8316", + "8317", + "8318", + "8319", + "8320", + "8321", + "8322", + "8323", + "8324", + "8325", + "8326" + ], + "metricName": "mse", + "series": [ + { + "mode": "markers", + "name": "mse", + "stepped": false, + "type": "scatter", + "uid": "0d668b", + "x": [ + "8307", + "8308", + "8309", + "8310", + "8311", + "8312", + "8313", + "8314", + "8315", + "8316", + "8317", + "8318", + "8319", + "8320", + "8321", + "8322", + "8323", + "8324", + "8325", + "8326" + ], + "y": [ + 3297.146322567479, + 3370.9250492845417, + 3352.1487032874497, + 3376.962795304554, + 3345.1183624558485, + 3435.783246565139, + 3419.235515804575, + 3326.3371118238074, + 3299.888294102396, + 3417.015692446415, + 3318.471799408107, + 3300.2455334502383, + 3377.9056290478743, + 3316.6480785229305, + 3391.902383224928, + 3299.300119043289, + 3296.320211066935, + 3295.8308612858723, + 3350.1062329850233, + 3329.0535888350505 + ] + }, + { + "line": { + "shape": "hv" + }, + "mode": "lines", + "name": "mse_min", + "stepped": true, + "type": "scatter", + "uid": "686b96", + "x": [ + "8307", + "8308", + "8309", + "8310", + "8311", + "8312", + "8313", + "8314", + "8315", + "8316", + "8317", + "8318", + "8319", + "8320", + "8321", + "8322", + "8323", + "8324", + "8325", + "8326" + ], + "y": [ + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3296.320211066935, + 3295.8308612858723, + 3295.8308612858723, + 3295.8308612858723 + ] + } + ], + "showLegend": false, + "title": "HyperDrive Run Primary Metric : mse" + } + ] + } + } + }, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/07.hyperdrive-with-sklearn/07.hyperdrive-with-sklearn.ipynb b/01.getting-started/07.hyperdrive-with-sklearn/07.hyperdrive-with-sklearn.ipynb similarity index 99% rename from 00.Getting Started/07.hyperdrive-with-sklearn/07.hyperdrive-with-sklearn.ipynb rename to 01.getting-started/07.hyperdrive-with-sklearn/07.hyperdrive-with-sklearn.ipynb index 5f0e460f6..beec8dc59 100644 --- a/00.Getting Started/07.hyperdrive-with-sklearn/07.hyperdrive-with-sklearn.ipynb +++ b/01.getting-started/07.hyperdrive-with-sklearn/07.hyperdrive-with-sklearn.ipynb @@ -422,7 +422,20 @@ "metadata": {}, "source": [ "### Find best run\n", - "Please wait till all Hyperdrive runs finish before running the below cells." + "Wait until all Hyperdrive runs finish before running the below cells." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" ] }, { diff --git a/00.Getting Started/07.hyperdrive-with-sklearn/diabetes_sklearn.py b/01.getting-started/07.hyperdrive-with-sklearn/diabetes_sklearn.py similarity index 100% rename from 00.Getting Started/07.hyperdrive-with-sklearn/diabetes_sklearn.py rename to 01.getting-started/07.hyperdrive-with-sklearn/diabetes_sklearn.py diff --git a/01.getting-started/08.hyperdrive-with-TensorFlow/.ipynb_checkpoints/08.hyperdrive-with-TensorFlow-checkpoint.ipynb b/01.getting-started/08.hyperdrive-with-TensorFlow/.ipynb_checkpoints/08.hyperdrive-with-TensorFlow-checkpoint.ipynb new file mode 100644 index 000000000..d749b57d4 --- /dev/null +++ b/01.getting-started/08.hyperdrive-with-TensorFlow/.ipynb_checkpoints/08.hyperdrive-with-TensorFlow-checkpoint.ipynb @@ -0,0 +1,3448 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training and Hyperparameter Tuning of a TensorFlow Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this tutorial, we demonstrate how to use the Azure ML Python SDK to train a Convolutional Neural Network (CNN) in TensorFlow to perform handwritten digit recognition on the popular MNIST dataset. We will demonstrate how to perform hyperparameter tuning of the model using AML's HyperDrive service. \n", + "\n", + "We will cover the following concepts:\n", + "* Create a Batch AI GPU cluster\n", + "* (To do): DataStore\n", + "* Train a TensorFlow model on a single node\n", + "* Logging metrics to Run History\n", + "* Set up a hyperparameter sweep with HyperDrive\n", + "* Select the best model for download" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't. In addition, to run through this notebook, you will need to install a few additional packages by running `pip install pillow tensorflow matplotlib pandas tqdm`\n", + "\n", + "### Authorize Hyperdrive Service Principal\n", + "\n", + "Hyperdrive service is in preview so you need to explicitly grant permissions. In Azure portal, add `vienna-test-westus` as a `Contributor` to your resource group. Or, you can also do this from azure-cli:\n", + "```sh\n", + "# find the ARM id of your resource group. Copy into memory.\n", + "$ az group show -n -o json\n", + "\n", + "# check if https://vienna-test-westus-cluster.sp.azureml.net is a Contributor.\n", + "$ az role assignment list --scope -o table\n", + "\n", + "# if not, add it. you will need to be a resource group owner to do this.\n", + "$ az role assignment create --role Contributor --scope --assignee https://vienna-test-westus-cluster.sp.azureml.net\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Set Up a Workspace\n", + "Workspace is the top-level Azure Resource for Azure ML services." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create An Experiment\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "experiment_name = 'hyperdrive-with-tf'\n", + "experiment = Experiment(workspace = ws, name = experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a folder to store the training script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "script_folder = './samples/hyperdrive-with-tf'\n", + "os.makedirs(script_folder, exist_ok = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Provision a New Batch AI Cluster\n", + "Training machine learning models is often a compute-intensive process. Azure's [Batch AI](#https://docs.microsoft.com/en-us/azure/batch-ai/overview) service allows data scientists to leverage the power of compute clusters of CPU or GPU-enabled VMs for training their models. Using the Python SDK, we can easily provision a Batch AI cluster with the specifications we want." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create mlc", + "batchai" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "# choose a name for your cluster\n", + "batchai_cluster_name = ws.name + \"gpu\"\n", + "\n", + "found = False\n", + "# see if this compute target already exists in the workspace\n", + "for ct in ws.compute_targets():\n", + " print(ct.name, ct.type)\n", + " if ct.name == batchai_cluster_name and type(ct) is BatchAiCompute:\n", + " found = True\n", + " print('found compute target. just use it.')\n", + " compute_target = ct\n", + " break\n", + " \n", + "if not found:\n", + " print('creating a new compute target...')\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\", # NC6 is GPU-enabled\n", + " #vm_priority = 'lowpriority', # optional\n", + " autoscale_enabled = True,\n", + " cluster_min_nodes = 1, \n", + " cluster_max_nodes = 4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n", + " \n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + " \n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + " print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we specify the following parameters for the `provisioning_config`:\n", + "* `vm_size`: the family and size of the VM to use. For this tutorial we want to leverage GPU nodes, so we specify the `STANDARD_NC6` VM, which has one NVIDIA K80 GPU\n", + "* `vm_priority`: `'lowpriority'` or `'dedicated'`\n", + "* `autoscale_enabled`: with autoscaling set to `True`, Batch AI will automatically resize the cluster based on the demands of your workload. Default is `False`, will create a cluster with a fixed # of nodes\n", + "* `cluster_min_nodes`: minimum number of VMs for autoscaling\n", + "* `cluster_max_nodes`: maximum number of VMs for autoscaling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Train TensorFlow MNIST\n", + "Now let's train a CNN on the MNIST dataset for predicting handwritten digits. The training script `tf_mnist_train.py` is adapted from TensorFlow's [MNIST](#https://www.tensorflow.org/versions/r1.4/get_started/mnist/pros) tutorial. The changes to the original on concerned logging some metrics about the training run to the AML run history. See the adapted file here: [tf_mnist_train.py](tf_mnist_train.py) -- search for 'run_logger' to find the added lines of code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from shutil import copyfile\n", + "\n", + "training_script = 'tf_mnist_train.py'\n", + "# copy the mnist_tf.py file to the project folder\n", + "copyfile(training_script, os.path.join(script_folder, training_script))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# take a look at the training script\n", + "!more $training_script" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### a. Run a single-node TensorFlow experiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To facilitate ML training, the Python SDK provides a high-level abstraction called Estimators that allows users to train CNTK, TensorFlow, or custom scripts in the Azure ML ecosystem. Let's instantiate an AML TensorFlow Estimator (not to be conflated with the [`tf.estimator.Estimator`](#https://www.tensorflow.org/programmers_guide/estimators) class)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run", + "tensorflow" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.dnn import TensorFlow\n", + "\n", + "script_params = {\n", + " '--minibatch_size': 64,\n", + " '--learning_rate': 0.001,\n", + " '--keep_probability': 0.5,\n", + " '--output_dir': 'outputs',\n", + " '--num_iterations': 1000\n", + "}\n", + "\n", + "tf_estimator = TensorFlow(source_directory = script_folder, \n", + " script_params = script_params, \n", + " compute_target = compute_target, \n", + " entry_script = training_script, \n", + " node_count = 1,\n", + " use_gpu = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We specify the following parameters to the TensorFlow constructor:\n", + "* `script_params`: a dictionary specifying the command-line arguments to your `entry_script`\n", + "* `compute_target`: the compute target object. Can be a local, DSVM, or Batch AI compute target\n", + "* `entry_script`: the relative(?) path to the project directory of the file to be executed during training\n", + "* `node_count`: the number of nodes to use for the training job. Defaults to `1`\n", + "* `use_gpu`: to leverage the GPU for training, set this flag to `True`. Defaults to `False`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Note on `outputs` folder: **\n", + "\n", + "When running an experiment using the Python SDK, you can write files out to a folder named `outputs` that is relative to the root directory. This folder is specially tracked by AML in the sense that any files written to that folder during script execution will be picked up by Run History; these files (known as *artifacts*) will be available as part of the run history record." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "batchai", + "tensorflow" + ] + }, + "outputs": [], + "source": [ + "run = experiment.submit(tf_estimator)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### b. Monitoring the training run\n", + "There are several ways with which the user can monitor the details and status of the training run. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Browse to the run history report (use Chrome please, for now)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print out the current run status" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also use a widget to monitor the progress of your submitted run, which allows you to do so without blocking your notebook execution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "use notebook widget" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![img](../images/hd_tf1.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "batchai", + "tensorflow" + ] + }, + "outputs": [], + "source": [ + "# to block and wait for training to complete \n", + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also check on the Batch AI cluster and job status using `az-cli` commands:\n", + "```shell\n", + "# check cluster status. You can see how many nodes are running.\n", + "$ az batchai cluster list\n", + "\n", + "# check job status. You can see how many jobs are running\n", + "$ az batchai job list\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### c. Log metrics to Run History\n", + "\n", + "Another useful feature of the Python SDK is the ability to log metrics for each run. These metrics are persisted in the run history by AML. In addition, they are automatically displayed and visualized by the RunDetails widget. (Logging run metrics is also required in order to use the HyperDrive service, which we will go over in more detail in section 4.)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The below code snippet from `tf_mnist_train.py` shows how we can we log the script parameters for a training run, by specifying a key for the metric and the corresponding value:\n", + "```python\n", + "from azureml.core.run import Run\n", + "\n", + "run_logger = Run.get_submitted_run()\n", + "run_logger.log(\"learning_rate\", args.learning_rate)\n", + "run_logger.log(\"minibatch_size\", args.minibatch_size)\n", + "run_logger.log(\"keep_probability\", args.keep_probability)\n", + "run_logger.log(\"num_iterations\", args.num_iterations)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "get metrics" + ] + }, + "outputs": [], + "source": [ + "run.get_metrics()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Hyperparameter Tuning with HyperDrive\n", + "\n", + "Now that we've seen how to do a simple TensorFlow training run using the Python SDK, let's see if we can further improve the accuracy of our model.\n", + "\n", + "Hyperparameter tuning is a key part of machine learning experimentation, in which the data scientist tries different configurations of hyperparameters in order to find a set of values that optimizes a specific target metric, such as the accuracy of the model. To this end, Azure ML provides the ** HyperDrive service ** to faciliate the hyperparameter tuning process. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### a. Start a HyperDrive run\n", + "\n", + "Using HyperDrive, we specify the hyperparameter space to sweep over, the primary metric to optimize, and an early termination policy. HyperDrive will kick off multiple children runs with different hyperparameter configurations, and terminate underperforming runs according to the early termination policy provided." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run", + "tensorflow" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.hyperdrive import *\n", + "\n", + "param_sampling = RandomParameterSampling( {\n", + " \"learning_rate\": loguniform(-10, -3),\n", + " \"keep_probability\": uniform(0.5, 0.1)\n", + " }\n", + ")\n", + "\n", + "early_termination_policy = BanditPolicy(slack_factor = 0.15, evaluation_interval=2)\n", + "\n", + "hyperdrive_run_config = HyperDriveRunConfig(estimator = tf_estimator, \n", + " hyperparameter_sampling = param_sampling, \n", + " policy = early_termination_policy,\n", + " primary_metric_name = \"Accuracy\",\n", + " primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,\n", + " max_total_runs = 20,\n", + " max_concurrent_runs = 4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the above cell, we first define a sampling space for the hyperparameters we want to sweep over, specifically the `learning_rate` and `keep_probability`. In this case we are using `RandomParameterSampling`, which allows us to specify the parameter values as either a choice among discrete values or as a distribution over a continuous range (here, we are using a uniform distribution for the `keep_probability`). You can run `help(RandomParameterSampling)` for more API details on this class.\n", + "\n", + "Then, we specify the early termination policy to use. If not specified, the policy defaults (?) to `None`, in which case all training runs are run to completion. Here we use the `BanditPolicy`, which will terminate any run that doesn't fall within the slack factor of our primary evaluation metric. Run `help(BanditPolicy)` for more details on this policy.\n", + "\n", + "To do: explain `evaluation_interval` within context of our training script.\n", + "\n", + "We specify the following parameters to the `HyperDriveRunConfig` constructor:\n", + "* explain input_paths?\n", + "* `estimator`: the estimator that will be called with the sampled hyperparameters\n", + "* `hyperparameter_sampling`: the sampling space to use\n", + "* `policy`: the early termination policy\n", + "* `primary_metric_name`: the name of the metric logged to the AML Run that HyperDrive will use to evaluate runs. Here, we are using the test accuracy (logged as 'Accuracy' in our training script)\n", + "* `primary_metric_goal`: the optimization goal of the primary metric (either `PrimaryMetricGoal.MAXIMIZE` or `PrimaryMetricGoal.MINIMIZE`)\n", + "* `max_total_runs`: the maximum number of runs HyperDrive will kick off\n", + "* `max_concurrent_runs`: the maximum number of runs to run concurrently\n", + "* `compute_target`: the compute target. In our case, the Batch AI cluster we provisioned" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Note on logging metrics for HyperDrive: ** \n", + "\n", + "In order to use HyperDrive, we will need to log the metric we want the service to use for evaluating run performance (`primary_metric_name`). In our script, we will use the accuracy of the model evaluated on the MNIST test dataset as our primary metric. For every 100 training iterations, we calculate and log this test accuracy (`'Accuracy'`). We also log an additional utility metric, `'Iterations'`, to inform us of the number of iterations the model was trained on that corresponds to each Accuracy metric logged (see `tf_mnist.py` for more details). This is useful for seeing how many iterations were trained for jobs that were terminated early.\n", + "\n", + "```python\n", + "run_logger.log(\"Accuracy\", float(test_acc))\n", + "run_logger.log(\"Iterations\", i)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hyperdrive run", + "batchai", + "tensorflow" + ] + }, + "outputs": [], + "source": [ + "# start the HyperDrive run\n", + "hyperdrive_run = experiment.submit(hyperdrive_run_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### b. Use a widget to visualize details of the HyperDrive runs\n", + "\n", + "Runs will automatically start to show in the following widget once rendered." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "use notebook widget" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "\n", + "RunDetails(hyperdrive_run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![img](../images/hd_tf2.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# check cluster status, pay attention to the # of running nodes\n", + "# !az batchai cluster list -o table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# check the Batch AI job queue. Notice the Job name is the run history ID. \n", + "# Pay attention to the state of the job.\n", + "# !az batchai job list -o table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### c. Find the best run\n", + "\n", + "Once all of the HyperDrive runs have completed, we can find the run that achieved the highest accuracy and its corresponding hyperparameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history", + "get metrics" + ] + }, + "outputs": [], + "source": [ + "table = helpers.ListTable()\n", + "from tqdm import tqdm\n", + "\n", + "run_metrics = {}\n", + "table.append(['Accuracy', 'Run', 'Iterations', 'learning_rate', 'keep_probability'])\n", + "for run in tqdm(hyperdrive_run.get_children()):\n", + " metrics = run.get_metrics()\n", + " if 'Accuracy' in metrics.keys():\n", + " metrics['Accuracy'] = metrics['Accuracy'][-1] # final test accuracy\n", + " metrics['Iterations'] = max(metrics['Iterations']) # number of iterations the run ran for\n", + " \n", + " table.append([metrics['Accuracy'], \n", + " run.id, \n", + " metrics['Iterations'], \n", + " metrics['learning_rate'], \n", + " metrics['keep_probability']])\n", + " run_metrics[run.id] = metrics\n", + "table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.run import Run\n", + "\n", + "best_run_id = max(run_metrics, key = lambda k: run_metrics[k]['Accuracy'])\n", + "best_run_metrics = run_metrics[best_run_id]\n", + "experiment = Experiment(ws, experiment_name)\n", + "best_run = Run(experiment, best_run_id)\n", + "\n", + "print('Best Run is:\\n Accuracy: {0:.6f} \\n Learning rate: {1:.6f} \\n Keep probability: {2}'.format(\n", + " best_run_metrics['Accuracy'],\n", + " best_run_metrics['learning_rate'],\n", + " best_run_metrics['keep_probability']\n", + " ))\n", + "\n", + "print(helpers.get_run_history_url(best_run))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot the runs [Optional] \n", + "Note you will need to install `matplotlib` for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plot_data = np.array([[run_metrics[i]['keep_probability'], \n", + " run_metrics[i]['learning_rate'], \n", + " run_metrics[i]['Accuracy']] for i in run_metrics.keys()])\n", + "area = np.array([[run_metrics[i]['Iterations']/5] for i in run_metrics.keys()])\n", + "\n", + "plt.figure(figsize = (15,5))\n", + "plt.scatter(plot_data[:,0], plot_data[:,1], s = area, c = plot_data[:,2], alpha = 0.4)\n", + "plt.xlabel(\"keep_probability\")\n", + "plt.ylabel(\"learning_rate\")\n", + "plt.yscale('log')\n", + "plt.ylim(0.00001,0.06)\n", + "plt.colorbar()\n", + "plt.clim(0.95, max(plot_data[:,2]))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### d. Download model from the best run\n", + "Once we've identified the best run from HyperDrive, we can download the model files to our local machine. \n", + "\n", + "The final trained model checkpoint files are located in the `outputs` directory picked up by AML. We can run the below line of code to confirm that those files are present:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "best_run.get_file_names()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can download the relevant checkpoint files. Note there is currently a bug on uploading files when executing on Batch AI cluster so the below code doesn't work yet." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "download file" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "output_dir = 'outputs'\n", + "target_dir = os.path.join('sample_projects', 'outputs')\n", + "model_files_to_download = ['checkpoint', 'model.ckpt.data-00000-of-00001', 'model.ckpt.index', 'model.ckpt.meta']\n", + "for file in model_files_to_download:\n", + " model_src_path = os.path.join(output_dir, file)\n", + " model_dest_path = os.path.join(target_dir, file)\n", + " print('downloading ' + file)\n", + " best_run.download_file(name = model_src_path, output_file_path = model_dest_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### e. Test the model locally\n", + "Now that we have downloaded the best-performing model, we can use it locally to score images of hand-written digits. For this we have prepared a scoring file [tf_mnist_score.py](tf_mnist_score.py) which we import below. tf_mnist_score.py provides a function `run(input_data)` which accepts a base64-encoded image in a JSON dict format (this format is friendly for the deployment of a webservice, which we will do later). \n", + "\n", + "Note that this scoring code requires tensorflow and PIL (`pip install tensorflow pillow`).\n", + "\n", + "First, we will create a base64-encoded image in a json structure based on one of the test images provided in the folder `mnist_test_images`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os, json, base64\n", + "from PIL import Image \n", + "import tf_mnist_score\n", + "from io import BytesIO\n", + "\n", + "def imgToBase64(img):\n", + " imgio = BytesIO()\n", + " img.save(imgio, 'JPEG')\n", + " img_str = base64.b64encode(imgio.getvalue())\n", + " return img_str.decode('utf-8')\n", + "\n", + "# Generate JSON Base64-encoded image from sample test input\n", + "test_img_path = os.path.join('mnist_test_images', 'img_3.jpg')\n", + "base64Img = imgToBase64(Image.open(test_img_path))\n", + "data = json.dumps({'data': base64Img})\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we will call `tf_mnist_score.run()` with the json data structure we created above. And we draw the image that we are scoring, so we can compare the label returned by the image with the acutual handwritten digit." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image as IPImage\n", + "tf_mnist_score.init()\n", + "result = tf_mnist_score.run(data)\n", + "print(result)\n", + "IPImage(filename=test_img_path, width=200)" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "208cc3b53e2c45fea1440188a863efb8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_contrib_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowHyperDriveRunsView", + "layout": "IPY_MODEL_8a36279a14624bbdb1926c2572748861", + "value": { + "child_runs": [ + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000852395056049516 --keep_probability 0.434530370965995", + "created_time": "2018-06-01 19:41:39.666220+00:00", + "created_time_dt": "2018-06-01T19:41:39.666220", + "duration": "0:00:57", + "end_time": "2018-06-01 19:42:37.511351+00:00", + "hyperdrive_id": "5488", + "metric": 0.9842000007629395, + "paras_keep_probability": "0.434530370965995", + "paras_learning_rate": "0.000852395056049516", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5488_abbecb6c", + "run_number": 16, + "start_time": "2018-06-01 19:41:40.368621+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00179999057463703 --keep_probability 0.296515321882523", + "created_time": "2018-06-01 19:29:46.303636+00:00", + "created_time_dt": "2018-06-01T19:29:46.303636", + "duration": "0:02:03", + "end_time": "2018-06-01 19:31:50.043486+00:00", + "hyperdrive_id": "5469", + "metric": 0.9836999773979187, + "paras_keep_probability": "0.296515321882523", + "paras_learning_rate": "0.00179999057463703", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5469_9f034e69", + "run_number": 10, + "start_time": "2018-06-01 19:29:47.033264+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000676904386677712 --keep_probability 0.4154535083569", + "created_time": "2018-06-01 19:50:31.651044+00:00", + "created_time_dt": "2018-06-01T19:50:31.651044", + "duration": "0:14:30", + "end_time": "2018-06-01 20:05:02.591649+00:00", + "hyperdrive_id": "5498", + "metric": 0.9828000068664551, + "paras_keep_probability": "0.4154535083569", + "paras_learning_rate": "0.000676904386677712", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5498_32e0a249", + "run_number": 20, + "start_time": "2018-06-01 19:50:37.386350+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000586938713321222 --keep_probability 0.432942295536284", + "created_time": "2018-06-01 19:37:36.678691+00:00", + "created_time_dt": "2018-06-01T19:37:36.678691", + "duration": "0:01:46", + "end_time": "2018-06-01 19:39:23.211000+00:00", + "hyperdrive_id": "5479", + "metric": 0.9818000197410583, + "paras_keep_probability": "0.432942295536284", + "paras_learning_rate": "0.000586938713321222", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5479_cb5037ed", + "run_number": 11, + "start_time": "2018-06-01 19:37:43.143211+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321696353537414 --keep_probability 0.446837800410634", + "created_time": "2018-06-01 19:41:39.915872+00:00", + "created_time_dt": "2018-06-01T19:41:39.915872", + "duration": "0:02:58", + "end_time": "2018-06-01 19:44:38.693923+00:00", + "hyperdrive_id": "5490", + "metric": 0.9812999963760376, + "paras_keep_probability": "0.446837800410634", + "paras_learning_rate": "0.000321696353537414", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5490_cfcbcea1", + "run_number": 17, + "start_time": "2018-06-01 19:41:40.688804+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000598930751146987 --keep_probability 0.173175740602207", + "created_time": "2018-06-01 19:41:44.682554+00:00", + "created_time_dt": "2018-06-01T19:41:44.682554", + "duration": "0:01:54", + "end_time": "2018-06-01 19:43:38.690104+00:00", + "hyperdrive_id": "5491", + "metric": 0.9785000085830688, + "paras_keep_probability": "0.173175740602207", + "paras_learning_rate": "0.000598930751146987", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5491_1ab60563", + "run_number": 18, + "start_time": "2018-06-01 19:41:45.356160+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00313856224079023 --keep_probability 0.308708329651949", + "created_time": "2018-06-01 19:29:46.140940+00:00", + "created_time_dt": "2018-06-01T19:29:46.140940", + "duration": "0:02:02", + "end_time": "2018-06-01 19:31:49.127224+00:00", + "hyperdrive_id": "5471", + "metric": 0.9764000177383423, + "paras_keep_probability": "0.308708329651949", + "paras_learning_rate": "0.00313856224079023", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5471_05cdc17b", + "run_number": 8, + "start_time": "2018-06-01 19:29:46.876362+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321079619657115 --keep_probability 0.166071686996525", + "created_time": "2018-06-01 19:26:11.468523+00:00", + "created_time_dt": "2018-06-01T19:26:11.468523", + "duration": "0:01:48", + "end_time": "2018-06-01 19:28:00.170666+00:00", + "hyperdrive_id": "5460", + "metric": 0.9703999757766724, + "paras_keep_probability": "0.166071686996525", + "paras_learning_rate": "0.000321079619657115", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5460_0ff67ff3", + "run_number": 6, + "start_time": "2018-06-01 19:26:12.172473+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00970484525511844 --keep_probability 0.334371206847485", + "created_time": "2018-06-01 19:25:53.815492+00:00", + "created_time_dt": "2018-06-01T19:25:53.815492", + "duration": "0:02:07", + "end_time": "2018-06-01 19:28:01.507944+00:00", + "hyperdrive_id": "5457", + "metric": 0.968500018119812, + "paras_keep_probability": "0.334371206847485", + "paras_learning_rate": "0.00970484525511844", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5457_a4c3a147", + "run_number": 4, + "start_time": "2018-06-01 19:26:08.553859+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00922621594789716 --keep_probability 0.227683838955561", + "created_time": "2018-06-01 19:37:36.835723+00:00", + "created_time_dt": "2018-06-01T19:37:36.835723", + "duration": "0:01:03", + "end_time": "2018-06-01 19:38:40.652773+00:00", + "hyperdrive_id": "5480", + "metric": 0.9663000106811523, + "paras_keep_probability": "0.227683838955561", + "paras_learning_rate": "0.00922621594789716", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5480_f234fb08", + "run_number": 14, + "start_time": "2018-06-01 19:37:38.116439+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0155645426732787 --keep_probability 0.159123698168668", + "created_time": "2018-06-01 19:29:46.277531+00:00", + "created_time_dt": "2018-06-01T19:29:46.277531", + "duration": "0:01:09", + "end_time": "2018-06-01 19:30:55.727701+00:00", + "hyperdrive_id": "5472", + "metric": 0.964900016784668, + "paras_keep_probability": "0.159123698168668", + "paras_learning_rate": "0.0155645426732787", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5472_23122c4b", + "run_number": 9, + "start_time": "2018-06-01 19:29:46.964148+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00838536088211458 --keep_probability 0.102478957268164", + "created_time": "2018-06-01 19:25:53.548553+00:00", + "created_time_dt": "2018-06-01T19:25:53.548553", + "duration": "0:01:05", + "end_time": "2018-06-01 19:26:59.136632+00:00", + "hyperdrive_id": "5459", + "metric": 0.9646999835968018, + "paras_keep_probability": "0.102478957268164", + "paras_learning_rate": "0.00838536088211458", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5459_030491ad", + "run_number": 3, + "start_time": "2018-06-01 19:25:54.739654+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000143958552086584 --keep_probability 0.273084377226789", + "created_time": "2018-06-01 19:29:46.057879+00:00", + "created_time_dt": "2018-06-01T19:29:46.057879", + "duration": "0:01:18", + "end_time": "2018-06-01 19:31:04.843202+00:00", + "hyperdrive_id": "5470", + "metric": 0.963699996471405, + "paras_keep_probability": "0.273084377226789", + "paras_learning_rate": "0.000143958552086584", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5470_a648dbea", + "run_number": 7, + "start_time": "2018-06-01 19:29:46.780201+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.14051833348127E-05 --keep_probability 0.472685817381368", + "created_time": "2018-06-01 19:41:39.648602+00:00", + "created_time_dt": "2018-06-01T19:41:39.648602", + "duration": "0:03:06", + "end_time": "2018-06-01 19:44:45.699811+00:00", + "hyperdrive_id": "5489", + "metric": 0.9613000154495239, + "paras_keep_probability": "0.472685817381368", + "paras_learning_rate": "7.14051833348127E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5489_38907948", + "run_number": 15, + "start_time": "2018-06-01 19:41:40.512369+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00737747352627753 --keep_probability 0.205239625544216", + "created_time": "2018-06-01 19:50:33.596963+00:00", + "created_time_dt": "2018-06-01T19:50:33.596963", + "duration": "0:01:51", + "end_time": "2018-06-01 19:52:25.281499+00:00", + "hyperdrive_id": "5497", + "metric": 0.9581999778747559, + "paras_keep_probability": "0.205239625544216", + "paras_learning_rate": "0.00737747352627753", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5497_8130025b", + "run_number": 22, + "start_time": "2018-06-01 19:50:34.456850+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0211316024512922 --keep_probability 0.456008246140918", + "created_time": "2018-06-01 19:50:31.581841+00:00", + "created_time_dt": "2018-06-01T19:50:31.581841", + "duration": "0:01:03", + "end_time": "2018-06-01 19:51:35.272415+00:00", + "hyperdrive_id": "5499", + "metric": 0.9580000042915344, + "paras_keep_probability": "0.456008246140918", + "paras_learning_rate": "0.0211316024512922", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5499_e0b5a73f", + "run_number": 19, + "start_time": "2018-06-01 19:50:32.786951+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.56451710371043E-05 --keep_probability 0.321364540919092", + "created_time": "2018-06-01 19:50:33.421674+00:00", + "created_time_dt": "2018-06-01T19:50:33.421674", + "duration": "0:06:27", + "end_time": "2018-06-01 19:57:00.982688+00:00", + "hyperdrive_id": "5496", + "metric": 0.9520000219345093, + "paras_keep_probability": "0.321364540919092", + "paras_learning_rate": "7.56451710371043E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5496_46a98c1f", + "run_number": 21, + "start_time": "2018-06-01 19:50:34.379782+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 6.86923046964849E-05 --keep_probability 0.229123758955098", + "created_time": "2018-06-01 19:37:36.816510+00:00", + "created_time_dt": "2018-06-01T19:37:36.816510", + "duration": "0:01:12", + "end_time": "2018-06-01 19:38:49.439465+00:00", + "hyperdrive_id": "5477", + "metric": 0.9483000040054321, + "paras_keep_probability": "0.229123758955098", + "paras_learning_rate": "6.86923046964849E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5477_c428bcf0", + "run_number": 13, + "start_time": "2018-06-01 19:37:42.971387+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.014609502490554 --keep_probability 0.480459935106515", + "created_time": "2018-06-01 19:26:10.258955+00:00", + "created_time_dt": "2018-06-01T19:26:10.258955", + "duration": "0:02:41", + "end_time": "2018-06-01 19:28:52.069673+00:00", + "hyperdrive_id": "5458", + "metric": 0.12110000103712082, + "paras_keep_probability": "0.480459935106515", + "paras_learning_rate": "0.014609502490554", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5458_3f73f0ac", + "run_number": 5, + "start_time": "2018-06-01 19:26:17.107379+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0149932664638274 --keep_probability 0.284424630578217", + "created_time": "2018-06-01 19:37:36.730460+00:00", + "created_time_dt": "2018-06-01T19:37:36.730460", + "duration": "0:01:08", + "end_time": "2018-06-01 19:38:44.881339+00:00", + "hyperdrive_id": "5478", + "metric": 0.11349999904632568, + "paras_keep_probability": "0.284424630578217", + "paras_learning_rate": "0.0149932664638274", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5478_24390740", + "run_number": 12, + "start_time": "2018-06-01 19:37:42.865594+00:00", + "status": "Completed" + } + ], + "children_metrics": { + "allArguments": [ + "keep_probability", + "learning_rate", + "minibatch_size", + "output_dir", + "num_iterations", + "Accuracy" + ], + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "jobHistData": [ + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000852395056049516 --keep_probability 0.434530370965995", + "created_time": "2018-06-01 19:41:39.666220+00:00", + "created_time_dt": "2018-06-01T19:41:39.666220", + "duration": "0:00:57", + "end_time": "2018-06-01 19:42:37.511351+00:00", + "hyperdrive_id": "5488", + "metric": 0.9842000007629395, + "paras_keep_probability": "0.434530370965995", + "paras_learning_rate": "0.000852395056049516", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5488_abbecb6c", + "run_number": 16, + "start_time": "2018-06-01 19:41:40.368621+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00179999057463703 --keep_probability 0.296515321882523", + "created_time": "2018-06-01 19:29:46.303636+00:00", + "created_time_dt": "2018-06-01T19:29:46.303636", + "duration": "0:02:03", + "end_time": "2018-06-01 19:31:50.043486+00:00", + "hyperdrive_id": "5469", + "metric": 0.9836999773979187, + "paras_keep_probability": "0.296515321882523", + "paras_learning_rate": "0.00179999057463703", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5469_9f034e69", + "run_number": 10, + "start_time": "2018-06-01 19:29:47.033264+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000676904386677712 --keep_probability 0.4154535083569", + "created_time": "2018-06-01 19:50:31.651044+00:00", + "created_time_dt": "2018-06-01T19:50:31.651044", + "duration": "0:14:30", + "end_time": "2018-06-01 20:05:02.591649+00:00", + "hyperdrive_id": "5498", + "metric": 0.9828000068664551, + "paras_keep_probability": "0.4154535083569", + "paras_learning_rate": "0.000676904386677712", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5498_32e0a249", + "run_number": 20, + "start_time": "2018-06-01 19:50:37.386350+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000586938713321222 --keep_probability 0.432942295536284", + "created_time": "2018-06-01 19:37:36.678691+00:00", + "created_time_dt": "2018-06-01T19:37:36.678691", + "duration": "0:01:46", + "end_time": "2018-06-01 19:39:23.211000+00:00", + "hyperdrive_id": "5479", + "metric": 0.9818000197410583, + "paras_keep_probability": "0.432942295536284", + "paras_learning_rate": "0.000586938713321222", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5479_cb5037ed", + "run_number": 11, + "start_time": "2018-06-01 19:37:43.143211+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321696353537414 --keep_probability 0.446837800410634", + "created_time": "2018-06-01 19:41:39.915872+00:00", + "created_time_dt": "2018-06-01T19:41:39.915872", + "duration": "0:02:58", + "end_time": "2018-06-01 19:44:38.693923+00:00", + "hyperdrive_id": "5490", + "metric": 0.9812999963760376, + "paras_keep_probability": "0.446837800410634", + "paras_learning_rate": "0.000321696353537414", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5490_cfcbcea1", + "run_number": 17, + "start_time": "2018-06-01 19:41:40.688804+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000598930751146987 --keep_probability 0.173175740602207", + "created_time": "2018-06-01 19:41:44.682554+00:00", + "created_time_dt": "2018-06-01T19:41:44.682554", + "duration": "0:01:54", + "end_time": "2018-06-01 19:43:38.690104+00:00", + "hyperdrive_id": "5491", + "metric": 0.9785000085830688, + "paras_keep_probability": "0.173175740602207", + "paras_learning_rate": "0.000598930751146987", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5491_1ab60563", + "run_number": 18, + "start_time": "2018-06-01 19:41:45.356160+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00313856224079023 --keep_probability 0.308708329651949", + "created_time": "2018-06-01 19:29:46.140940+00:00", + "created_time_dt": "2018-06-01T19:29:46.140940", + "duration": "0:02:02", + "end_time": "2018-06-01 19:31:49.127224+00:00", + "hyperdrive_id": "5471", + "metric": 0.9764000177383423, + "paras_keep_probability": "0.308708329651949", + "paras_learning_rate": "0.00313856224079023", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5471_05cdc17b", + "run_number": 8, + "start_time": "2018-06-01 19:29:46.876362+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321079619657115 --keep_probability 0.166071686996525", + "created_time": "2018-06-01 19:26:11.468523+00:00", + "created_time_dt": "2018-06-01T19:26:11.468523", + "duration": "0:01:48", + "end_time": "2018-06-01 19:28:00.170666+00:00", + "hyperdrive_id": "5460", + "metric": 0.9703999757766724, + "paras_keep_probability": "0.166071686996525", + "paras_learning_rate": "0.000321079619657115", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5460_0ff67ff3", + "run_number": 6, + "start_time": "2018-06-01 19:26:12.172473+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00970484525511844 --keep_probability 0.334371206847485", + "created_time": "2018-06-01 19:25:53.815492+00:00", + "created_time_dt": "2018-06-01T19:25:53.815492", + "duration": "0:02:07", + "end_time": "2018-06-01 19:28:01.507944+00:00", + "hyperdrive_id": "5457", + "metric": 0.968500018119812, + "paras_keep_probability": "0.334371206847485", + "paras_learning_rate": "0.00970484525511844", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5457_a4c3a147", + "run_number": 4, + "start_time": "2018-06-01 19:26:08.553859+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00922621594789716 --keep_probability 0.227683838955561", + "created_time": "2018-06-01 19:37:36.835723+00:00", + "created_time_dt": "2018-06-01T19:37:36.835723", + "duration": "0:01:03", + "end_time": "2018-06-01 19:38:40.652773+00:00", + "hyperdrive_id": "5480", + "metric": 0.9663000106811523, + "paras_keep_probability": "0.227683838955561", + "paras_learning_rate": "0.00922621594789716", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5480_f234fb08", + "run_number": 14, + "start_time": "2018-06-01 19:37:38.116439+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0155645426732787 --keep_probability 0.159123698168668", + "created_time": "2018-06-01 19:29:46.277531+00:00", + "created_time_dt": "2018-06-01T19:29:46.277531", + "duration": "0:01:09", + "end_time": "2018-06-01 19:30:55.727701+00:00", + "hyperdrive_id": "5472", + "metric": 0.964900016784668, + "paras_keep_probability": "0.159123698168668", + "paras_learning_rate": "0.0155645426732787", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5472_23122c4b", + "run_number": 9, + "start_time": "2018-06-01 19:29:46.964148+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00838536088211458 --keep_probability 0.102478957268164", + "created_time": "2018-06-01 19:25:53.548553+00:00", + "created_time_dt": "2018-06-01T19:25:53.548553", + "duration": "0:01:05", + "end_time": "2018-06-01 19:26:59.136632+00:00", + "hyperdrive_id": "5459", + "metric": 0.9646999835968018, + "paras_keep_probability": "0.102478957268164", + "paras_learning_rate": "0.00838536088211458", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5459_030491ad", + "run_number": 3, + "start_time": "2018-06-01 19:25:54.739654+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000143958552086584 --keep_probability 0.273084377226789", + "created_time": "2018-06-01 19:29:46.057879+00:00", + "created_time_dt": "2018-06-01T19:29:46.057879", + "duration": "0:01:18", + "end_time": "2018-06-01 19:31:04.843202+00:00", + "hyperdrive_id": "5470", + "metric": 0.963699996471405, + "paras_keep_probability": "0.273084377226789", + "paras_learning_rate": "0.000143958552086584", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5470_a648dbea", + "run_number": 7, + "start_time": "2018-06-01 19:29:46.780201+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.14051833348127E-05 --keep_probability 0.472685817381368", + "created_time": "2018-06-01 19:41:39.648602+00:00", + "created_time_dt": "2018-06-01T19:41:39.648602", + "duration": "0:03:06", + "end_time": "2018-06-01 19:44:45.699811+00:00", + "hyperdrive_id": "5489", + "metric": 0.9613000154495239, + "paras_keep_probability": "0.472685817381368", + "paras_learning_rate": "7.14051833348127E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5489_38907948", + "run_number": 15, + "start_time": "2018-06-01 19:41:40.512369+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00737747352627753 --keep_probability 0.205239625544216", + "created_time": "2018-06-01 19:50:33.596963+00:00", + "created_time_dt": "2018-06-01T19:50:33.596963", + "duration": "0:01:51", + "end_time": "2018-06-01 19:52:25.281499+00:00", + "hyperdrive_id": "5497", + "metric": 0.9581999778747559, + "paras_keep_probability": "0.205239625544216", + "paras_learning_rate": "0.00737747352627753", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5497_8130025b", + "run_number": 22, + "start_time": "2018-06-01 19:50:34.456850+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0211316024512922 --keep_probability 0.456008246140918", + "created_time": "2018-06-01 19:50:31.581841+00:00", + "created_time_dt": "2018-06-01T19:50:31.581841", + "duration": "0:01:03", + "end_time": "2018-06-01 19:51:35.272415+00:00", + "hyperdrive_id": "5499", + "metric": 0.9580000042915344, + "paras_keep_probability": "0.456008246140918", + "paras_learning_rate": "0.0211316024512922", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5499_e0b5a73f", + "run_number": 19, + "start_time": "2018-06-01 19:50:32.786951+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.56451710371043E-05 --keep_probability 0.321364540919092", + "created_time": "2018-06-01 19:50:33.421674+00:00", + "created_time_dt": "2018-06-01T19:50:33.421674", + "duration": "0:06:27", + "end_time": "2018-06-01 19:57:00.982688+00:00", + "hyperdrive_id": "5496", + "metric": 0.9520000219345093, + "paras_keep_probability": "0.321364540919092", + "paras_learning_rate": "7.56451710371043E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5496_46a98c1f", + "run_number": 21, + "start_time": "2018-06-01 19:50:34.379782+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 6.86923046964849E-05 --keep_probability 0.229123758955098", + "created_time": "2018-06-01 19:37:36.816510+00:00", + "created_time_dt": "2018-06-01T19:37:36.816510", + "duration": "0:01:12", + "end_time": "2018-06-01 19:38:49.439465+00:00", + "hyperdrive_id": "5477", + "metric": 0.9483000040054321, + "paras_keep_probability": "0.229123758955098", + "paras_learning_rate": "6.86923046964849E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5477_c428bcf0", + "run_number": 13, + "start_time": "2018-06-01 19:37:42.971387+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.014609502490554 --keep_probability 0.480459935106515", + "created_time": "2018-06-01 19:26:10.258955+00:00", + "created_time_dt": "2018-06-01T19:26:10.258955", + "duration": "0:02:41", + "end_time": "2018-06-01 19:28:52.069673+00:00", + "hyperdrive_id": "5458", + "metric": 0.12110000103712082, + "paras_keep_probability": "0.480459935106515", + "paras_learning_rate": "0.014609502490554", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5458_3f73f0ac", + "run_number": 5, + "start_time": "2018-06-01 19:26:17.107379+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0149932664638274 --keep_probability 0.284424630578217", + "created_time": "2018-06-01 19:37:36.730460+00:00", + "created_time_dt": "2018-06-01T19:37:36.730460", + "duration": "0:01:08", + "end_time": "2018-06-01 19:38:44.881339+00:00", + "hyperdrive_id": "5478", + "metric": 0.11349999904632568, + "paras_keep_probability": "0.284424630578217", + "paras_learning_rate": "0.0149932664638274", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5478_24390740", + "run_number": 12, + "start_time": "2018-06-01 19:37:42.865594+00:00", + "status": "Completed" + } + ], + "metricName": "Accuracy", + "series": [ + { + "mode": "lines", + "name": 20, + "run_id": 20, + "stepped": false, + "uid": "d9463a", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.052000001072883606, + 0.9318000078201294, + 0.9584000110626221, + 0.9639999866485596, + 0.9710999727249146, + 0.9746999740600586, + 0.9768999814987183, + 0.9822999835014343, + 0.978600025177002, + 0.9801999926567078, + 0.9828000068664551 + ] + }, + { + "mode": "lines", + "name": 7, + "run_id": 7, + "stepped": false, + "uid": "2d6574", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.08749999850988388, + 0.855400025844574, + 0.911300003528595, + 0.9289000034332275, + 0.9394999742507935, + 0.9431999921798706, + 0.9509999752044678, + 0.9553999900817871, + 0.9555000066757202, + 0.963699996471405, + 0.9616000056266785 + ] + }, + { + "mode": "lines", + "name": 16, + "run_id": 16, + "stepped": false, + "uid": "add68c", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.08869999647140503, + 0.9404000043869019, + 0.9574000239372253, + 0.9700999855995178, + 0.9715999960899353, + 0.979200005531311, + 0.9797000288963318, + 0.9812999963760376, + 0.9797999858856201, + 0.9817000031471252, + 0.9842000007629395 + ] + }, + { + "mode": "lines", + "name": 14, + "run_id": 14, + "stepped": false, + "uid": "38792f", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.08950000256299973, + 0.9248999953269958, + 0.9545999765396118, + 0.9581000208854675, + 0.9559999704360962, + 0.9627000093460083, + 0.9642000198364258, + 0.9663000106811523, + 0.9585999846458435, + 0.963100016117096, + 0.9595999717712402 + ] + }, + { + "mode": "lines", + "name": 6, + "run_id": 6, + "stepped": false, + "uid": "10fcf1", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.08980000019073486, + 0.8791999816894531, + 0.9261000156402588, + 0.9426000118255615, + 0.9501000046730042, + 0.9546999931335449, + 0.9573000073432922, + 0.963699996471405, + 0.965499997138977, + 0.9684000015258789, + 0.9703999757766724 + ] + }, + { + "mode": "lines", + "name": 22, + "run_id": 22, + "stepped": false, + "uid": "f09911", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.0949999988079071, + 0.8639000058174133, + 0.9139999747276306, + 0.9171000123023987, + 0.930899977684021, + 0.9441999793052673, + 0.9545999765396118, + 0.9560999870300293, + 0.9581999778747559, + 0.9539999961853027, + 0.9559000134468079 + ] + }, + { + "mode": "lines", + "name": 8, + "run_id": 8, + "stepped": false, + "uid": "e6ae29", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.0949999988079071, + 0.8953999876976013, + 0.942300021648407, + 0.9513999819755554, + 0.9617000222206116, + 0.9564999938011169, + 0.9689000248908997, + 0.9688000082969666, + 0.9704999923706055, + 0.9760000109672546, + 0.9764000177383423 + ] + }, + { + "mode": "lines", + "name": 12, + "run_id": 12, + "stepped": false, + "uid": "84d316", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.09629999846220016, + 0.11349999904632568, + 0.10279999673366547, + 0.10090000182390213, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.10279999673366547, + 0.10279999673366547, + 0.11349999904632568, + 0.11349999904632568 + ] + }, + { + "mode": "lines", + "name": 5, + "run_id": 5, + "stepped": false, + "uid": "05acc0", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.12110000103712082, + 0.0982000008225441, + 0.11349999904632568, + 0.11349999904632568, + 0.10320000350475311, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568 + ] + }, + { + "mode": "lines", + "name": 4, + "run_id": 4, + "stepped": false, + "uid": "9b5194", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.10109999775886536, + 0.8985999822616577, + 0.9424999952316284, + 0.9294999837875366, + 0.9545999765396118, + 0.9581000208854675, + 0.9616000056266785, + 0.9678999781608582, + 0.9661999940872192, + 0.9672999978065491, + 0.968500018119812 + ] + }, + { + "mode": "lines", + "name": 11, + "run_id": 11, + "stepped": false, + "uid": "b7a136", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.10440000146627426, + 0.9383000135421753, + 0.9538999795913696, + 0.9664000272750854, + 0.9717000126838684, + 0.9760000109672546, + 0.9732999801635742, + 0.9779000282287598, + 0.9817000031471252, + 0.9818000197410583, + 0.9799000024795532 + ] + }, + { + "mode": "lines", + "name": 3, + "run_id": 3, + "stepped": false, + "uid": "dab69b", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.11159999668598175, + 0.8949000239372253, + 0.9286999702453613, + 0.9498999714851379, + 0.9539999961853027, + 0.953499972820282, + 0.9606999754905701, + 0.9613000154495239, + 0.9549999833106995, + 0.9646999835968018, + 0.9581999778747559 + ] + }, + { + "mode": "lines", + "name": 15, + "run_id": 15, + "stepped": false, + "uid": "90901a", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.11159999668598175, + 0.8270000219345093, + 0.9024999737739563, + 0.9175999760627747, + 0.9323999881744385, + 0.9368000030517578, + 0.9431999921798706, + 0.9506000280380249, + 0.9526000022888184, + 0.9584000110626221, + 0.9613000154495239 + ] + }, + { + "mode": "lines", + "name": 10, + "run_id": 10, + "stepped": false, + "uid": "bd666d", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.11209999769926071, + 0.9466000199317932, + 0.9639000296592712, + 0.9722999930381775, + 0.9781000018119812, + 0.9800999760627747, + 0.9781000018119812, + 0.9814000129699707, + 0.9833999872207642, + 0.9836999773979187, + 0.9829000234603882 + ] + }, + { + "mode": "lines", + "name": 21, + "run_id": 21, + "stepped": false, + "uid": "3f3472", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.11500000208616257, + 0.789900004863739, + 0.8838000297546387, + 0.9072999954223633, + 0.9203000068664551, + 0.9312000274658203, + 0.9319000244140625, + 0.9434000253677368, + 0.9470999836921692, + 0.9477999806404114, + 0.9520000219345093 + ] + }, + { + "mode": "lines", + "name": 19, + "run_id": 19, + "stepped": false, + "uid": "982581", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.1160999983549118, + 0.9366000294685364, + 0.9473000168800354, + 0.9552000164985657, + 0.95660001039505, + 0.9465000033378601, + 0.9575999975204468, + 0.9580000042915344, + 0.949999988079071, + 0.9520999789237976, + 0.9567999839782715 + ] + }, + { + "mode": "lines", + "name": 18, + "run_id": 18, + "stepped": false, + "uid": "f812c0", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.12210000306367874, + 0.917900025844574, + 0.9437999725341797, + 0.95660001039505, + 0.9646000266075134, + 0.9692000150680542, + 0.9707000255584717, + 0.9735999703407288, + 0.9781000018119812, + 0.9775999784469604, + 0.9785000085830688 + ] + }, + { + "mode": "lines", + "name": 13, + "run_id": 13, + "stepped": false, + "uid": "022e43", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.13899999856948853, + 0.7547000050544739, + 0.8593000173568726, + 0.8981999754905701, + 0.9186999797821045, + 0.9254999756813049, + 0.934499979019165, + 0.9377999901771545, + 0.9431999921798706, + 0.9437999725341797, + 0.9483000040054321 + ] + }, + { + "mode": "lines", + "name": 9, + "run_id": 9, + "stepped": false, + "uid": "ee6d23", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.14800000190734863, + 0.9146999716758728, + 0.9452999830245972, + 0.9506000280380249, + 0.9550999999046326, + 0.9584000110626221, + 0.9599000215530396, + 0.9621000289916992, + 0.964900016784668, + 0.9510999917984009, + 0.9624000191688538 + ] + }, + { + "mode": "lines", + "name": 17, + "run_id": 17, + "stepped": false, + "uid": "4fd9d8", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.16419999301433563, + 0.9247999787330627, + 0.9431999921798706, + 0.9599000215530396, + 0.9674999713897705, + 0.9695000052452087, + 0.9765999913215637, + 0.9763000011444092, + 0.9779999852180481, + 0.9812999963760376, + 0.9789000153541565 + ] + } + ], + "showLegend": true, + "title": "HyperDrive Run Primary Metric : Accuracy" + }, + "run_id": "tensorflow-hyperdrive_1527881081325", + "run_logs": "", + "run_metrics": [], + "run_properties": { + "additional_properties": {}, + "created_utc": "2018-06-01T19:24:41.846775", + "description": null, + "end_time_utc": "2018-06-01T20:05:15.398835", + "experiment_id": "54fc7a8b-21a4-4a10-8931-bd36c717c9b7", + "heartbeat_enabled": false, + "hidden": false, + "name": "tensorflow-hyperdrive", + "parent_run_id": null, + "properties": { + "all_jobs_generated": "true", + "azureml.runsource": "hyperdrive", + "cancellation_requested": "false", + "generator_config": "{\"name\": \"RANDOM\", \"parameter_space\": {\"learning_rate\": [\"loguniform\", [-10, -3]], \"keep_probability\": [\"uniform\", [0.5, 0.1]]}}", + "is_hyperdrive_run": "true", + "max_concurrent_jobs": "4", + "max_duration_minutes": "43200", + "max_total_jobs": "20", + "policy_config": "{\"name\": \"BANDIT\", \"properties\": {\"slack_factor\": 0.15, \"evaluation_interval\": 2, \"delay_evaluation\": 0}}", + "primary_metric_config": "{\"name\": \"Accuracy\", \"goal\": \"maximize\"}", + "runTemplate": "HyperDrive" + }, + "root_run_id": "tensorflow-hyperdrive_1527881081325", + "run_id": "tensorflow-hyperdrive_1527881081325", + "run_number": 2, + "script_name": "tf_mnist_train.py", + "start_time_utc": null, + "status": "Completed", + "tags": {}, + "target": "gpucluster", + "token": null, + "token_expiry_time_utc": null, + "user_id": "fffc1c66-275f-4935-bb04-70a760c82fda" + }, + "status": "Completed", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-notebooks%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2ws3/projects/tensorflow-hyperdrive/run-history/run-details/tensorflow-hyperdrive_1527881081325?type=HyperDrive" + } + } + }, + "49be30037a73481a900026b30fc816eb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "524fd810f76f4ed69236c8e0cf20da11": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_contrib_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowRunDetailsView", + "layout": "IPY_MODEL_cce84fe5e64141a0a69e0029ca3553a5", + "value": { + "child_runs": [], + "children_metrics": {}, + "run_id": "tensorflow-hyperdrive_1527879977658", + "run_logs": "Uploading experiment status to history service.\nAdding run profile attachment azureml-logs/80_driver_log.txt\nUploading experiment status to history service.\nAdding run profile attachment azureml-logs/60_control_log.txt\n\n\rUsing Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\nSetting default log level to \"WARN\".\nTo adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n18/06/01 19:20:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n/azureml-envs/azureml_799f97443dc957270fd0268d825cda62/lib/python3.6/importlib/_bootstrap.py:205: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6\n return f(*args, **kwds)\nSuccessfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\nExtracting MNIST_data/train-images-idx3-ubyte.gz\nSuccessfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\nExtracting MNIST_data/train-labels-idx1-ubyte.gz\nSuccessfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\nExtracting MNIST_data/t10k-images-idx3-ubyte.gz\nSuccessfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\nExtracting MNIST_data/t10k-labels-idx1-ubyte.gz\n2018-06-01 19:20:33.459019: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA\n2018-06-01 19:20:33.674933: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 0 with properties: \nname: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\npciBusID: 813c:00:00.0\ntotalMemory: 11.17GiB freeMemory: 11.09GiB\n2018-06-01 19:20:33.674977: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: Tesla K80, pci bus id: 813c:00:00.0, compute capability: 3.7)\nstep 0, training accuracy 0.09375, test accuracy, 0.0958\nstep 100, training accuracy 0.953125, test accuracy, 0.9461\nstep 200, training accuracy 0.953125, test accuracy, 0.9594\nstep 300, training accuracy 0.953125, test accuracy, 0.9719\nstep 400, training accuracy 0.984375, test accuracy, 0.9787\nstep 500, training accuracy 0.96875, test accuracy, 0.9778\nstep 600, training accuracy 0.984375, test accuracy, 0.9806\nstep 700, training accuracy 0.96875, test accuracy, 0.9788\nstep 800, training accuracy 0.96875, test accuracy, 0.9819\nstep 900, training accuracy 0.984375, test accuracy, 0.9818\ntest accuracy 0.9836\nThe experiment completed successfully. Starting post-processing steps.\n\n\r", + "run_metrics": [ + { + "categories": [ + 0 + ], + "name": "learning_rate", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0.001 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "minibatch_size", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 64 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "keep_probability", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0.5 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "num_iterations", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 1000 + ] + } + ] + }, + { + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "name": "Accuracy", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "uid": "5c86a1", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.0957999974489212, + 0.9460999965667725, + 0.9593999981880188, + 0.9718999862670898, + 0.9786999821662903, + 0.9778000116348267, + 0.9805999994277954, + 0.9787999987602234, + 0.9818999767303467, + 0.9818000197410583, + 0.9836000204086304 + ] + } + ] + }, + { + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "name": "Iterations", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "uid": "8867c0", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0, + 100, + 200, + 300, + 400, + 500, + 600, + 700, + 800, + 900, + 1000 + ] + } + ] + } + ], + "run_properties": { + "additional_properties": {}, + "created_utc": "2018-06-01T19:06:18.159119", + "description": null, + "end_time_utc": "2018-06-01T19:21:08.302609", + "experiment_id": "54fc7a8b-21a4-4a10-8931-bd36c717c9b7", + "heartbeat_enabled": false, + "hidden": false, + "name": null, + "parent_run_id": null, + "properties": { + "Arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000", + "ContentSnapshotId": "67acb36c-e77e-44ae-b820-e6242b9909ab", + "azureml.runsource": "experiment" + }, + "root_run_id": "tensorflow-hyperdrive_1527879977658", + "run_id": "tensorflow-hyperdrive_1527879977658", + "run_number": 1, + "script_name": "tf_mnist_train.py", + "start_time_utc": "2018-06-01T19:19:31.364278", + "status": "Completed", + "tags": {}, + "target": "gpucluster", + "token": null, + "token_expiry_time_utc": null, + "user_id": "fb7d2bbf-2c54-46d7-8775-7e318644dd6b" + }, + "status": "Completed", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-notebooks%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2ws3/projects/tensorflow-hyperdrive/run-history/run-details/tensorflow-hyperdrive_1527879977658?type=user" + } + } + }, + "8a36279a14624bbdb1926c2572748861": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "bc94f0e90ff64d62a1ff1f84bc34803b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "cce84fe5e64141a0a69e0029ca3553a5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "d7a8a4fc54e4453fbc31d11604742430": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_contrib_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowRunDetailsView", + "layout": "IPY_MODEL_49be30037a73481a900026b30fc816eb", + "value": { + "child_runs": [], + "children_metrics": {}, + "run_id": "tensorflow-hyperdrive_1527879977658", + "run_logs": "Uploading experiment status to history service.\nAdding run profile attachment azureml-logs/80_driver_log.txt\nUploading experiment status to history service.\nAdding run profile attachment azureml-logs/60_control_log.txt\n\n\rUsing Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\nSetting default log level to \"WARN\".\nTo adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n18/06/01 19:20:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n/azureml-envs/azureml_799f97443dc957270fd0268d825cda62/lib/python3.6/importlib/_bootstrap.py:205: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6\n return f(*args, **kwds)\nSuccessfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\nExtracting MNIST_data/train-images-idx3-ubyte.gz\nSuccessfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\nExtracting MNIST_data/train-labels-idx1-ubyte.gz\nSuccessfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\nExtracting MNIST_data/t10k-images-idx3-ubyte.gz\nSuccessfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\nExtracting MNIST_data/t10k-labels-idx1-ubyte.gz\n2018-06-01 19:20:33.459019: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA\n2018-06-01 19:20:33.674933: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 0 with properties: \nname: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\npciBusID: 813c:00:00.0\ntotalMemory: 11.17GiB freeMemory: 11.09GiB\n2018-06-01 19:20:33.674977: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: Tesla K80, pci bus id: 813c:00:00.0, compute capability: 3.7)\nstep 0, training accuracy 0.09375, test accuracy, 0.0958\nstep 100, training accuracy 0.953125, test accuracy, 0.9461\nstep 200, training accuracy 0.953125, test accuracy, 0.9594\nstep 300, training accuracy 0.953125, test accuracy, 0.9719\nstep 400, training accuracy 0.984375, test accuracy, 0.9787\nstep 500, training accuracy 0.96875, test accuracy, 0.9778\nstep 600, training accuracy 0.984375, test accuracy, 0.9806\nstep 700, training accuracy 0.96875, test accuracy, 0.9788\nstep 800, training accuracy 0.96875, test accuracy, 0.9819\nstep 900, training accuracy 0.984375, test accuracy, 0.9818\ntest accuracy 0.9836\nThe experiment completed successfully. Starting post-processing steps.\n\n\r", + "run_metrics": [ + { + "categories": [ + 0 + ], + "name": "learning_rate", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0.001 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "minibatch_size", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 64 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "keep_probability", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0.5 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "num_iterations", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 1000 + ] + } + ] + }, + { + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "name": "Accuracy", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0.0957999974489212, + 0.9460999965667725, + 0.9593999981880188, + 0.9718999862670898, + 0.9786999821662903, + 0.9778000116348267, + 0.9805999994277954, + 0.9787999987602234, + 0.9818999767303467, + 0.9818000197410583, + 0.9836000204086304 + ] + } + ] + }, + { + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "name": "Iterations", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0, + 100, + 200, + 300, + 400, + 500, + 600, + 700, + 800, + 900, + 1000 + ] + } + ] + } + ], + "run_properties": { + "additional_properties": {}, + "created_utc": "2018-06-01T19:06:18.159119", + "description": null, + "end_time_utc": "2018-06-01T19:21:08.302609", + "experiment_id": "54fc7a8b-21a4-4a10-8931-bd36c717c9b7", + "heartbeat_enabled": false, + "hidden": false, + "name": null, + "parent_run_id": null, + "properties": { + "Arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000", + "ContentSnapshotId": "67acb36c-e77e-44ae-b820-e6242b9909ab", + "azureml.runsource": "experiment" + }, + "root_run_id": "tensorflow-hyperdrive_1527879977658", + "run_id": "tensorflow-hyperdrive_1527879977658", + "run_number": 1, + "script_name": "tf_mnist_train.py", + "start_time_utc": "2018-06-01T19:19:31.364278", + "status": "Completed", + "tags": {}, + "target": "gpucluster", + "token": null, + "token_expiry_time_utc": null, + "user_id": "fb7d2bbf-2c54-46d7-8775-7e318644dd6b" + }, + "status": "Completed", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-notebooks%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2ws3/projects/tensorflow-hyperdrive/run-history/run-details/tensorflow-hyperdrive_1527879977658?type=user" + } + } + }, + "fd3e0717dec9444b881194f135b82853": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_contrib_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowHyperDriveRunsView", + "layout": "IPY_MODEL_bc94f0e90ff64d62a1ff1f84bc34803b", + "value": { + "child_runs": [ + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000852395056049516 --keep_probability 0.434530370965995", + "created_time": "2018-06-01 19:41:39.666220+00:00", + "created_time_dt": "2018-06-01T19:41:39.666220", + "duration": "0:00:57", + "end_time": "2018-06-01 19:42:37.511351+00:00", + "hyperdrive_id": "5488", + "metric": 0.9842000007629395, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5488_abbecb6c", + "run_number": 16, + "start_time": "2018-06-01 19:41:40.368621+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00179999057463703 --keep_probability 0.296515321882523", + "created_time": "2018-06-01 19:29:46.303636+00:00", + "created_time_dt": "2018-06-01T19:29:46.303636", + "duration": "0:02:03", + "end_time": "2018-06-01 19:31:50.043486+00:00", + "hyperdrive_id": "5469", + "metric": 0.9836999773979187, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5469_9f034e69", + "run_number": 10, + "start_time": "2018-06-01 19:29:47.033264+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000676904386677712 --keep_probability 0.4154535083569", + "created_time": "2018-06-01 19:50:31.651044+00:00", + "created_time_dt": "2018-06-01T19:50:31.651044", + "duration": "0:14:30", + "end_time": "2018-06-01 20:05:02.591649+00:00", + "hyperdrive_id": "5498", + "metric": 0.9828000068664551, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5498_32e0a249", + "run_number": 20, + "start_time": "2018-06-01 19:50:37.386350+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000586938713321222 --keep_probability 0.432942295536284", + "created_time": "2018-06-01 19:37:36.678691+00:00", + "created_time_dt": "2018-06-01T19:37:36.678691", + "duration": "0:01:46", + "end_time": "2018-06-01 19:39:23.211000+00:00", + "hyperdrive_id": "5479", + "metric": 0.9818000197410583, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5479_cb5037ed", + "run_number": 11, + "start_time": "2018-06-01 19:37:43.143211+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321696353537414 --keep_probability 0.446837800410634", + "created_time": "2018-06-01 19:41:39.915872+00:00", + "created_time_dt": "2018-06-01T19:41:39.915872", + "duration": "0:02:58", + "end_time": "2018-06-01 19:44:38.693923+00:00", + "hyperdrive_id": "5490", + "metric": 0.9812999963760376, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5490_cfcbcea1", + "run_number": 17, + "start_time": "2018-06-01 19:41:40.688804+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000598930751146987 --keep_probability 0.173175740602207", + "created_time": "2018-06-01 19:41:44.682554+00:00", + "created_time_dt": "2018-06-01T19:41:44.682554", + "duration": "0:01:54", + "end_time": "2018-06-01 19:43:38.690104+00:00", + "hyperdrive_id": "5491", + "metric": 0.9785000085830688, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5491_1ab60563", + "run_number": 18, + "start_time": "2018-06-01 19:41:45.356160+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00313856224079023 --keep_probability 0.308708329651949", + "created_time": "2018-06-01 19:29:46.140940+00:00", + "created_time_dt": "2018-06-01T19:29:46.140940", + "duration": "0:02:02", + "end_time": "2018-06-01 19:31:49.127224+00:00", + "hyperdrive_id": "5471", + "metric": 0.9764000177383423, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5471_05cdc17b", + "run_number": 8, + "start_time": "2018-06-01 19:29:46.876362+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321079619657115 --keep_probability 0.166071686996525", + "created_time": "2018-06-01 19:26:11.468523+00:00", + "created_time_dt": "2018-06-01T19:26:11.468523", + "duration": "0:01:48", + "end_time": "2018-06-01 19:28:00.170666+00:00", + "hyperdrive_id": "5460", + "metric": 0.9703999757766724, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5460_0ff67ff3", + "run_number": 6, + "start_time": "2018-06-01 19:26:12.172473+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00970484525511844 --keep_probability 0.334371206847485", + "created_time": "2018-06-01 19:25:53.815492+00:00", + "created_time_dt": "2018-06-01T19:25:53.815492", + "duration": "0:02:07", + "end_time": "2018-06-01 19:28:01.507944+00:00", + "hyperdrive_id": "5457", + "metric": 0.968500018119812, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5457_a4c3a147", + "run_number": 4, + "start_time": "2018-06-01 19:26:08.553859+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00922621594789716 --keep_probability 0.227683838955561", + "created_time": "2018-06-01 19:37:36.835723+00:00", + "created_time_dt": "2018-06-01T19:37:36.835723", + "duration": "0:01:03", + "end_time": "2018-06-01 19:38:40.652773+00:00", + "hyperdrive_id": "5480", + "metric": 0.9663000106811523, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5480_f234fb08", + "run_number": 14, + "start_time": "2018-06-01 19:37:38.116439+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0155645426732787 --keep_probability 0.159123698168668", + "created_time": "2018-06-01 19:29:46.277531+00:00", + "created_time_dt": "2018-06-01T19:29:46.277531", + "duration": "0:01:09", + "end_time": "2018-06-01 19:30:55.727701+00:00", + "hyperdrive_id": "5472", + "metric": 0.964900016784668, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5472_23122c4b", + "run_number": 9, + "start_time": "2018-06-01 19:29:46.964148+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00838536088211458 --keep_probability 0.102478957268164", + "created_time": "2018-06-01 19:25:53.548553+00:00", + "created_time_dt": "2018-06-01T19:25:53.548553", + "duration": "0:01:05", + "end_time": "2018-06-01 19:26:59.136632+00:00", + "hyperdrive_id": "5459", + "metric": 0.9646999835968018, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5459_030491ad", + "run_number": 3, + "start_time": "2018-06-01 19:25:54.739654+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000143958552086584 --keep_probability 0.273084377226789", + "created_time": "2018-06-01 19:29:46.057879+00:00", + "created_time_dt": "2018-06-01T19:29:46.057879", + "duration": "0:01:18", + "end_time": "2018-06-01 19:31:04.843202+00:00", + "hyperdrive_id": "5470", + "metric": 0.963699996471405, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5470_a648dbea", + "run_number": 7, + "start_time": "2018-06-01 19:29:46.780201+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.14051833348127E-05 --keep_probability 0.472685817381368", + "created_time": "2018-06-01 19:41:39.648602+00:00", + "created_time_dt": "2018-06-01T19:41:39.648602", + "duration": "0:03:06", + "end_time": "2018-06-01 19:44:45.699811+00:00", + "hyperdrive_id": "5489", + "metric": 0.9613000154495239, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5489_38907948", + "run_number": 15, + "start_time": "2018-06-01 19:41:40.512369+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00737747352627753 --keep_probability 0.205239625544216", + "created_time": "2018-06-01 19:50:33.596963+00:00", + "created_time_dt": "2018-06-01T19:50:33.596963", + "duration": "0:01:51", + "end_time": "2018-06-01 19:52:25.281499+00:00", + "hyperdrive_id": "5497", + "metric": 0.9581999778747559, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5497_8130025b", + "run_number": 22, + "start_time": "2018-06-01 19:50:34.456850+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0211316024512922 --keep_probability 0.456008246140918", + "created_time": "2018-06-01 19:50:31.581841+00:00", + "created_time_dt": "2018-06-01T19:50:31.581841", + "duration": "0:01:03", + "end_time": "2018-06-01 19:51:35.272415+00:00", + "hyperdrive_id": "5499", + "metric": 0.9580000042915344, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5499_e0b5a73f", + "run_number": 19, + "start_time": "2018-06-01 19:50:32.786951+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.56451710371043E-05 --keep_probability 0.321364540919092", + "created_time": "2018-06-01 19:50:33.421674+00:00", + "created_time_dt": "2018-06-01T19:50:33.421674", + "duration": "0:06:27", + "end_time": "2018-06-01 19:57:00.982688+00:00", + "hyperdrive_id": "5496", + "metric": 0.9520000219345093, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5496_46a98c1f", + "run_number": 21, + "start_time": "2018-06-01 19:50:34.379782+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 6.86923046964849E-05 --keep_probability 0.229123758955098", + "created_time": "2018-06-01 19:37:36.816510+00:00", + "created_time_dt": "2018-06-01T19:37:36.816510", + "duration": "0:01:12", + "end_time": "2018-06-01 19:38:49.439465+00:00", + "hyperdrive_id": "5477", + "metric": 0.9483000040054321, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5477_c428bcf0", + "run_number": 13, + "start_time": "2018-06-01 19:37:42.971387+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.014609502490554 --keep_probability 0.480459935106515", + "created_time": "2018-06-01 19:26:10.258955+00:00", + "created_time_dt": "2018-06-01T19:26:10.258955", + "duration": "0:02:41", + "end_time": "2018-06-01 19:28:52.069673+00:00", + "hyperdrive_id": "5458", + "metric": 0.12110000103712082, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5458_3f73f0ac", + "run_number": 5, + "start_time": "2018-06-01 19:26:17.107379+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0149932664638274 --keep_probability 0.284424630578217", + "created_time": "2018-06-01 19:37:36.730460+00:00", + "created_time_dt": "2018-06-01T19:37:36.730460", + "duration": "0:01:08", + "end_time": "2018-06-01 19:38:44.881339+00:00", + "hyperdrive_id": "5478", + "metric": 0.11349999904632568, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5478_24390740", + "run_number": 12, + "start_time": "2018-06-01 19:37:42.865594+00:00", + "status": "Completed" + } + ], + "children_metrics": { + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "metricName": "Accuracy", + "series": [ + { + "data": [ + 0.11159999668598175, + 0.8949000239372253, + 0.9286999702453613, + 0.9498999714851379, + 0.9539999961853027, + 0.953499972820282, + 0.9606999754905701, + 0.9613000154495239, + 0.9549999833106995, + 0.9646999835968018, + 0.9581999778747559 + ], + "mode": "lines", + "name": 3, + "run_id": 3, + "stepped": false + }, + { + "data": [ + 0.10109999775886536, + 0.8985999822616577, + 0.9424999952316284, + 0.9294999837875366, + 0.9545999765396118, + 0.9581000208854675, + 0.9616000056266785, + 0.9678999781608582, + 0.9661999940872192, + 0.9672999978065491, + 0.968500018119812 + ], + "mode": "lines", + "name": 4, + "run_id": 4, + "stepped": false + }, + { + "data": [ + 0.08980000019073486, + 0.8791999816894531, + 0.9261000156402588, + 0.9426000118255615, + 0.9501000046730042, + 0.9546999931335449, + 0.9573000073432922, + 0.963699996471405, + 0.965499997138977, + 0.9684000015258789, + 0.9703999757766724 + ], + "mode": "lines", + "name": 6, + "run_id": 6, + "stepped": false + }, + { + "data": [ + 0.12110000103712082, + 0.0982000008225441, + 0.11349999904632568, + 0.11349999904632568, + 0.10320000350475311, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568 + ], + "mode": "lines", + "name": 5, + "run_id": 5, + "stepped": false + }, + { + "data": [ + 0.14800000190734863, + 0.9146999716758728, + 0.9452999830245972, + 0.9506000280380249, + 0.9550999999046326, + 0.9584000110626221, + 0.9599000215530396, + 0.9621000289916992, + 0.964900016784668, + 0.9510999917984009, + 0.9624000191688538 + ], + "mode": "lines", + "name": 9, + "run_id": 9, + "stepped": false + }, + { + "data": [ + 0.08749999850988388, + 0.855400025844574, + 0.911300003528595, + 0.9289000034332275, + 0.9394999742507935, + 0.9431999921798706, + 0.9509999752044678, + 0.9553999900817871, + 0.9555000066757202, + 0.963699996471405, + 0.9616000056266785 + ], + "mode": "lines", + "name": 7, + "run_id": 7, + "stepped": false + }, + { + "data": [ + 0.11209999769926071, + 0.9466000199317932, + 0.9639000296592712, + 0.9722999930381775, + 0.9781000018119812, + 0.9800999760627747, + 0.9781000018119812, + 0.9814000129699707, + 0.9833999872207642, + 0.9836999773979187, + 0.9829000234603882 + ], + "mode": "lines", + "name": 10, + "run_id": 10, + "stepped": false + }, + { + "data": [ + 0.0949999988079071, + 0.8953999876976013, + 0.942300021648407, + 0.9513999819755554, + 0.9617000222206116, + 0.9564999938011169, + 0.9689000248908997, + 0.9688000082969666, + 0.9704999923706055, + 0.9760000109672546, + 0.9764000177383423 + ], + "mode": "lines", + "name": 8, + "run_id": 8, + "stepped": false + }, + { + "data": [ + 0.08950000256299973, + 0.9248999953269958, + 0.9545999765396118, + 0.9581000208854675, + 0.9559999704360962, + 0.9627000093460083, + 0.9642000198364258, + 0.9663000106811523, + 0.9585999846458435, + 0.963100016117096, + 0.9595999717712402 + ], + "mode": "lines", + "name": 14, + "run_id": 14, + "stepped": false + }, + { + "data": [ + 0.09629999846220016, + 0.11349999904632568, + 0.10279999673366547, + 0.10090000182390213, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.10279999673366547, + 0.10279999673366547, + 0.11349999904632568, + 0.11349999904632568 + ], + "mode": "lines", + "name": 12, + "run_id": 12, + "stepped": false + }, + { + "data": [ + 0.13899999856948853, + 0.7547000050544739, + 0.8593000173568726, + 0.8981999754905701, + 0.9186999797821045, + 0.9254999756813049, + 0.934499979019165, + 0.9377999901771545, + 0.9431999921798706, + 0.9437999725341797, + 0.9483000040054321 + ], + "mode": "lines", + "name": 13, + "run_id": 13, + "stepped": false + }, + { + "data": [ + 0.10440000146627426, + 0.9383000135421753, + 0.9538999795913696, + 0.9664000272750854, + 0.9717000126838684, + 0.9760000109672546, + 0.9732999801635742, + 0.9779000282287598, + 0.9817000031471252, + 0.9818000197410583, + 0.9799000024795532 + ], + "mode": "lines", + "name": 11, + "run_id": 11, + "stepped": false + }, + { + "data": [ + 0.08869999647140503, + 0.9404000043869019, + 0.9574000239372253, + 0.9700999855995178, + 0.9715999960899353, + 0.979200005531311, + 0.9797000288963318, + 0.9812999963760376, + 0.9797999858856201, + 0.9817000031471252, + 0.9842000007629395 + ], + "mode": "lines", + "name": 16, + "run_id": 16, + "stepped": false + }, + { + "data": [ + 0.12210000306367874, + 0.917900025844574, + 0.9437999725341797, + 0.95660001039505, + 0.9646000266075134, + 0.9692000150680542, + 0.9707000255584717, + 0.9735999703407288, + 0.9781000018119812, + 0.9775999784469604, + 0.9785000085830688 + ], + "mode": "lines", + "name": 18, + "run_id": 18, + "stepped": false + }, + { + "data": [ + 0.11159999668598175, + 0.8270000219345093, + 0.9024999737739563, + 0.9175999760627747, + 0.9323999881744385, + 0.9368000030517578, + 0.9431999921798706, + 0.9506000280380249, + 0.9526000022888184, + 0.9584000110626221, + 0.9613000154495239 + ], + "mode": "lines", + "name": 15, + "run_id": 15, + "stepped": false + }, + { + "data": [ + 0.16419999301433563, + 0.9247999787330627, + 0.9431999921798706, + 0.9599000215530396, + 0.9674999713897705, + 0.9695000052452087, + 0.9765999913215637, + 0.9763000011444092, + 0.9779999852180481, + 0.9812999963760376, + 0.9789000153541565 + ], + "mode": "lines", + "name": 17, + "run_id": 17, + "stepped": false + }, + { + "data": [ + 0.1160999983549118, + 0.9366000294685364, + 0.9473000168800354, + 0.9552000164985657, + 0.95660001039505, + 0.9465000033378601, + 0.9575999975204468, + 0.9580000042915344, + 0.949999988079071, + 0.9520999789237976, + 0.9567999839782715 + ], + "mode": "lines", + "name": 19, + "run_id": 19, + "stepped": false + }, + { + "data": [ + 0.0949999988079071, + 0.8639000058174133, + 0.9139999747276306, + 0.9171000123023987, + 0.930899977684021, + 0.9441999793052673, + 0.9545999765396118, + 0.9560999870300293, + 0.9581999778747559, + 0.9539999961853027, + 0.9559000134468079 + ], + "mode": "lines", + "name": 22, + "run_id": 22, + "stepped": false + }, + { + "data": [ + 0.11500000208616257, + 0.789900004863739, + 0.8838000297546387, + 0.9072999954223633, + 0.9203000068664551, + 0.9312000274658203, + 0.9319000244140625, + 0.9434000253677368, + 0.9470999836921692, + 0.9477999806404114, + 0.9520000219345093 + ], + "mode": "lines", + "name": 21, + "run_id": 21, + "stepped": false + }, + { + "data": [ + 0.052000001072883606, + 0.9318000078201294, + 0.9584000110626221, + 0.9639999866485596, + 0.9710999727249146, + 0.9746999740600586, + 0.9768999814987183, + 0.9822999835014343, + 0.978600025177002, + 0.9801999926567078, + 0.9828000068664551 + ], + "mode": "lines", + "name": 20, + "run_id": 20, + "stepped": false + } + ], + "showLegend": true + }, + "run_id": "tensorflow-hyperdrive_1527881081325", + "run_logs": "", + "run_metrics": [], + "run_properties": { + "additional_properties": {}, + "created_utc": "2018-06-01T19:24:41.846775", + "description": null, + "end_time_utc": "2018-06-01T20:05:15.398835", + "experiment_id": "54fc7a8b-21a4-4a10-8931-bd36c717c9b7", + "heartbeat_enabled": false, + "hidden": false, + "name": "tensorflow-hyperdrive", + "parent_run_id": null, + "properties": { + "all_jobs_generated": "true", + "azureml.runsource": "hyperdrive", + "cancellation_requested": "false", + "generator_config": "{\"name\": \"RANDOM\", \"parameter_space\": {\"learning_rate\": [\"loguniform\", [-10, -3]], \"keep_probability\": [\"uniform\", [0.5, 0.1]]}}", + "is_hyperdrive_run": "true", + "max_concurrent_jobs": "4", + "max_duration_minutes": "43200", + "max_total_jobs": "20", + "policy_config": "{\"name\": \"BANDIT\", \"properties\": {\"slack_factor\": 0.15, \"evaluation_interval\": 2, \"delay_evaluation\": 0}}", + "primary_metric_config": "{\"name\": \"Accuracy\", \"goal\": \"maximize\"}", + "runTemplate": "HyperDrive" + }, + "root_run_id": "tensorflow-hyperdrive_1527881081325", + "run_id": "tensorflow-hyperdrive_1527881081325", + "run_number": 2, + "script_name": "tf_mnist_train.py", + "start_time_utc": null, + "status": "Completed", + "tags": {}, + "target": "gpucluster", + "token": null, + "token_expiry_time_utc": null, + "user_id": "fffc1c66-275f-4935-bb04-70a760c82fda" + }, + "status": "Completed", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-notebooks%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2ws3/projects/tensorflow-hyperdrive/run-history/run-details/tensorflow-hyperdrive_1527881081325?type=HyperDrive" + } + } + } + }, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/08.hyperdrive-with-TensorFlow.ipynb b/01.getting-started/08.hyperdrive-with-TensorFlow/08.hyperdrive-with-TensorFlow.ipynb similarity index 99% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/08.hyperdrive-with-TensorFlow.ipynb rename to 01.getting-started/08.hyperdrive-with-TensorFlow/08.hyperdrive-with-TensorFlow.ipynb index 619643135..d749b57d4 100644 --- a/00.Getting Started/08.hyperdrive-with-TensorFlow/08.hyperdrive-with-TensorFlow.ipynb +++ b/01.getting-started/08.hyperdrive-with-TensorFlow/08.hyperdrive-with-TensorFlow.ipynb @@ -588,9 +588,8 @@ "metadata": {}, "outputs": [], "source": [ - "# these 2 cells to be replaced with Python calls once DCR 246363 has been completed\n", "# check cluster status, pay attention to the # of running nodes\n", - "!az batchai cluster list -o table" + "# !az batchai cluster list -o table" ] }, { @@ -601,7 +600,7 @@ "source": [ "# check the Batch AI job queue. Notice the Job name is the run history ID. \n", "# Pay attention to the state of the job.\n", - "!az batchai job list -o table" + "# !az batchai job list -o table" ] }, { @@ -613,6 +612,15 @@ "Once all of the HyperDrive runs have completed, we can find the run that achieved the highest accuracy and its corresponding hyperparameters." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_1.jpg b/01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_1.jpg similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_1.jpg rename to 01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_1.jpg diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_10.jpg b/01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_10.jpg similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_10.jpg rename to 01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_10.jpg diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_2.jpg b/01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_2.jpg similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_2.jpg rename to 01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_2.jpg diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_3.jpg b/01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_3.jpg similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_3.jpg rename to 01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_3.jpg diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_4.jpg b/01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_4.jpg similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_4.jpg rename to 01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_4.jpg diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_5.jpg b/01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_5.jpg similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_5.jpg rename to 01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_5.jpg diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_6.jpg b/01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_6.jpg similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_6.jpg rename to 01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_6.jpg diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_7.jpg b/01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_7.jpg similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_7.jpg rename to 01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_7.jpg diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_8.jpg b/01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_8.jpg similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_8.jpg rename to 01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_8.jpg diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_9.jpg b/01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_9.jpg similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_9.jpg rename to 01.getting-started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_9.jpg diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/tf_mnist_score.py b/01.getting-started/08.hyperdrive-with-TensorFlow/tf_mnist_score.py similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/tf_mnist_score.py rename to 01.getting-started/08.hyperdrive-with-TensorFlow/tf_mnist_score.py diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/tf_mnist_train.py b/01.getting-started/08.hyperdrive-with-TensorFlow/tf_mnist_train.py similarity index 100% rename from 00.Getting Started/08.hyperdrive-with-TensorFlow/tf_mnist_train.py rename to 01.getting-started/08.hyperdrive-with-TensorFlow/tf_mnist_train.py diff --git a/00.Getting Started/10.register-model-create-image-deploy-service/10.register-model-create-image-deploy-service.ipynb b/01.getting-started/10.register-model-create-image-deploy-service/10.register-model-create-image-deploy-service.ipynb similarity index 100% rename from 00.Getting Started/10.register-model-create-image-deploy-service/10.register-model-create-image-deploy-service.ipynb rename to 01.getting-started/10.register-model-create-image-deploy-service/10.register-model-create-image-deploy-service.ipynb diff --git a/00.Getting Started/10.register-model-create-image-deploy-service/sklearn_regression_model.pkl b/01.getting-started/10.register-model-create-image-deploy-service/sklearn_regression_model.pkl similarity index 100% rename from 00.Getting Started/10.register-model-create-image-deploy-service/sklearn_regression_model.pkl rename to 01.getting-started/10.register-model-create-image-deploy-service/sklearn_regression_model.pkl diff --git a/00.Getting Started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb b/01.getting-started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb similarity index 100% rename from 00.Getting Started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb rename to 01.getting-started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb diff --git a/00.Getting Started/11.production-deploy-to-aks/sklearn_regression_model.pkl b/01.getting-started/11.production-deploy-to-aks/sklearn_regression_model.pkl similarity index 100% rename from 00.Getting Started/11.production-deploy-to-aks/sklearn_regression_model.pkl rename to 01.getting-started/11.production-deploy-to-aks/sklearn_regression_model.pkl diff --git a/00.Getting Started/12.enable-data-collection-for-models-in-aks/12.enable-data-collection-for-models-in-aks.ipynb b/01.getting-started/12.enable-data-collection-for-models-in-aks/12.enable-data-collection-for-models-in-aks.ipynb similarity index 100% rename from 00.Getting Started/12.enable-data-collection-for-models-in-aks/12.enable-data-collection-for-models-in-aks.ipynb rename to 01.getting-started/12.enable-data-collection-for-models-in-aks/12.enable-data-collection-for-models-in-aks.ipynb diff --git a/00.Getting Started/12.enable-data-collection-for-models-in-aks/sklearn_regression_model.pkl b/01.getting-started/12.enable-data-collection-for-models-in-aks/sklearn_regression_model.pkl similarity index 100% rename from 00.Getting Started/12.enable-data-collection-for-models-in-aks/sklearn_regression_model.pkl rename to 01.getting-started/12.enable-data-collection-for-models-in-aks/sklearn_regression_model.pkl diff --git a/automl/00.configuration.ipynb b/automl/00.configuration.ipynb new file mode 100644 index 000000000..752001b18 --- /dev/null +++ b/automl/00.configuration.ipynb @@ -0,0 +1,288 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 00. configuration\n", + "\n", + "In this example you will create an Azure Machine Learning Workspace and initialize your notebook directory to easily use this workspace. Typically you will only need to run this once per notebook directory, and all other notebooks in this directory or any sub-directories will automatically use the settings you indicate here.\n", + "\n", + "\n", + "## Prerequisites:\n", + "\n", + "Before running this notebook, run the automl_setup script described in README.md.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Connect to your Azure Subscription\n", + "\n", + "In order to use an AML Workspace, first you need access to an Azure Subscription. You can [create your own](https://azure.microsoft.com/en-us/free/) or get your existing subscription information from the [Azure portal](https://portal.azure.com).\n", + "\n", + "First login to azure and follow prompts to authenticate. Then check that your subscription is correct" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!az login" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!az account show" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you have multiple subscriptions and need to change the active one, you can use a command\n", + "```shell\n", + "az account set -s \n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register Machine Learning Services Resource Provider\n", + "\n", + "This step is required to use the Azure ML services backing the SDK." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# register the new RP\n", + "!az provider register -n Microsoft.MachineLearningServices\n", + "\n", + "# check the registration status\n", + "!az provider show -n Microsoft.MachineLearningServices" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check core SDK version number for validate your installation and for debugging purposes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import azureml.core\n", + "\n", + "print(\"SDK Version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize an Azure ML Workspace\n", + "### What is an Azure ML Workspace and why do I need one?\n", + "\n", + "An AML Workspace is an Azure resource that organaizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an AML Workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, operationalization, and the monitoring of operationalized models.\n", + "\n", + "\n", + "### What do I need\n", + "\n", + "To create or access an Azure ML Workspace, you will need to import the AML library and specify following information:\n", + "* A name for your workspace. You can choose one.\n", + "* Your subscription id. Use *id* value from *az account show* output above. \n", + "* The resource group name. Resource group organizes Azure resources and provides default region for the resources in the group. You can either specify a new one, in which case it gets created for your Workspace, or use an existing one or create a new one from [Azure portal](https://portal.azure.com)\n", + "\n", + "Please these values below. For workspace region, we prefer you use `eastus2` or `eastus2euap` (only if you have access to EUAP) for most scenarios. Other supported regions include `westcentralus`, `southeastasia`, `westeurope`, `australiaeast`, although their support might lag behind `eastus2` and `eastus2euap`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "subscription_id = \"\"\n", + "resource_group = \"myrg\"\n", + "workspace_name = \"myws\"\n", + "workspace_region = \"eastus2\" # or eastus2euap" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating a workspace\n", + "If you already have access to an AML Workspace you want to use, you can skip this cell. Otherwise, this cell will create an AML workspace for you in a subscription provided you have the correct permissions for the given `subscription_id`.\n", + "\n", + "This will fail when:\n", + "1. The workspace already exists\n", + "2. You do not have permission to create a workspace in the resource group\n", + "3. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this subscription\n", + "\n", + "If workspace creation fails for any reason other than already existing, please work with your IT admin to provide you with the appropriate permissions or to provision the required resources.\n", + "\n", + "**Note** The workspace creation can take several minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import the Workspace class and check the azureml SDK version\n", + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.create(name = workspace_name,\n", + " subscription_id = subscription_id,\n", + " resource_group = resource_group, \n", + " location = workspace_region)\n", + "ws.get_details()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configuring your local environment\n", + "You can validate that you have access to the specified workspace and write a configuration file to the default configuration location, `./aml_config/config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace(workspace_name = workspace_name,\n", + " subscription_id = subscription_id,\n", + " resource_group = resource_group)\n", + "\n", + "# persist the subscription id, resource group name, and workspace name in aml_config/config.json.\n", + "ws.write_config()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can then load the workspace from this config file from any notebook in the current directory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load workspace configuratio from ./aml_config/config.json file.ß\n", + "my_workspace = Workspace.from_config()\n", + "my_workspace.get_details()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a folder to host all sample projects\n", + "Lastly, create a folder where all the sample projects will be hosted." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "sample_projects_folder = './sample_projects'\n", + "\n", + "if not os.path.isdir(sample_projects_folder):\n", + " os.mkdir(sample_projects_folder)\n", + " \n", + "print('Sample projects will be created in {}.'.format(sample_projects_folder))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install additional packages for demo notebooks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install pandas_ml\n", + "!pip install seaborn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Success!\n", + "Great, you are ready to move on to the rest of the sample notebooks." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/01.auto-ml-classification.ipynb b/automl/01.auto-ml-classification.ipynb new file mode 100644 index 000000000..3529c3ba0 --- /dev/null +++ b/automl/01.auto-ml-classification.ipynb @@ -0,0 +1,406 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 01: Classification with local compute\n", + "\n", + "In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use the AutoML Classifier for a simple classification problem.\n", + "\n", + "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Creating an Experiment in an existing Workspace\n", + "2. Instantiating AutoMLConfig\n", + "3. Training the Model using local compute\n", + "4. Exploring the results\n", + "5. Testing the fitted model\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a Workspace. For AutoML you would need to create an Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for experiment\n", + "experiment_name = 'automl-local-classification'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-local-classification'\n", + "\n", + "experiment=Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace Name'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data = output, index = ['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Digits Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import datasets\n", + "\n", + "digits = datasets.load_digits()\n", + "\n", + "# only take the first 100 rows if you want the training steps to run faster\n", + "#X_digits = digits.data[100:,:]\n", + "#y_digits = digits.target[100:]\n", + "\n", + "# use full dataset\n", + "X_digits = digits.data\n", + "y_digits = digits.target" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate Auto ML Config\n", + "\n", + "Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**task**|classification or regression|\n", + "|**primary_metric**|This is the metric that you want to optimize.
Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted|\n", + "|**max_time_sec**|Time limit in seconds for each iterations|\n", + "|**iterations**|Number of iterations. In each iteration Auto ML trains the data with a specific pipeline|\n", + "|**n_cross_validations**|Number of cross validation splits|\n", + "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", + "|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]
Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n", + "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " primary_metric = 'AUC_weighted',\n", + " max_time_sec = 12000,\n", + " iterations = 50,\n", + " n_cross_validations = 3,\n", + " verbosity = logging.INFO,\n", + " X = X_digits, \n", + " y = y_digits,\n", + " path=project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Model\n", + "\n", + "You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n", + "You will see the currently running iterations printing to the console." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = experiment.submit(automl_config, show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Optionally, you can continue an interrupted local run by calling continue_experiment without the iterations parameter, or run more iterations to a completed run by specifying the iterations parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = local_run.continue_experiment(X = X_digits, \n", + " y = y_digits, \n", + " show_output = True,\n", + " iterations = 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for monitoring runs\n", + "\n", + "The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(local_run).show() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Retrieve All Child Runs\n", + "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(local_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + "\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "cm = sns.light_palette(\"lightgreen\", as_cmap = True)\n", + "s = rundata.style.background_gradient(cmap = cm)\n", + "s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = local_run.get_output()\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any other metric\n", + "Give me the run and the model that has the smallest `log_loss`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lookup_metric = \"log_loss\"\n", + "best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any iteration\n", + "Give me the run and the model from the 3rd iteration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 3\n", + "best_run, fitted_model = local_run.get_output(iteration = iteration)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Model \n", + "\n", + "#### Load Test Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()\n", + "X_digits = digits.data[:10, :]\n", + "y_digits = digits.target[:10]\n", + "images = digits.images[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Testing our best pipeline\n", + "We will try to predict 2 digits and see how our model works." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Randomly select digits and test\n", + "for index in np.random.choice(len(y_digits), 2):\n", + " print(index)\n", + " predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n", + " label = y_digits[index]\n", + " title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n", + " fig = plt.figure(1, figsize=(3,3))\n", + " ax1 = fig.add_axes((0,0,.8,.8))\n", + " ax1.set_title(title)\n", + " plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n", + " plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/02.auto-ml-regression.ipynb b/automl/02.auto-ml-regression.ipynb new file mode 100644 index 000000000..f8814de63 --- /dev/null +++ b/automl/02.auto-ml-regression.ipynb @@ -0,0 +1,432 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 02: Regression with local compute\n", + "\n", + "In this example we use the scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use the AutoML for a simple regression problem.\n", + "\n", + "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Creating an Experiment using an existing Workspace\n", + "2. Instantiating AutoMLConfig\n", + "3. Training the Model using local compute\n", + "4. Exploring the results\n", + "5. Testing the fitted model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a Workspace. For AutoML you would need to create an Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for the experiment\n", + "experiment_name = 'automl-local-regression'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-local-regression'\n", + "\n", + "experiment = Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace Name'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data = output, index = ['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load diabetes dataset, a well-known built-in small dataset that comes with scikit-learn\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "X, y = load_diabetes(return_X_y = True)\n", + "\n", + "columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n", + "\n", + "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate Auto ML Config\n", + "\n", + "Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**task**|classification or regression|\n", + "|**primary_metric**|This is the metric that you want to optimize.
Regression supports the following primary metrics
spearman_correlation
normalized_root_mean_squared_error
r2_score
normalized_mean_absolute_error
normalized_root_mean_squared_log_error|\n", + "|**max_time_sec**|Time limit in seconds for each iterations|\n", + "|**iterations**|Number of iterations. In each iteration Auto ML Classifier trains the data with a specific pipeline|\n", + "|**n_cross_validations**|Number of cross validation splits|\n", + "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", + "|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]
Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n", + "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_config = AutoMLConfig(task='regression',\n", + " max_time_sec = 600,\n", + " iterations = 10,\n", + " primary_metric = 'spearman_correlation', \n", + " n_cross_validations = 5,\n", + " debug_log = 'automl.log',\n", + " verbosity = logging.INFO,\n", + " X = x_train, \n", + " y = y_train,\n", + " path=project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Model\n", + "\n", + "You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n", + "You will see the currently running iterations printing to the console." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = experiment.submit(automl_config, show_output=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for monitoring runs\n", + "\n", + "The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(local_run).show() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Retrieve All Child Runs\n", + "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(local_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + " \n", + "import pandas as pd\n", + "import seaborn as sns\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "cm = sns.light_palette(\"lightgreen\", as_cmap = True)\n", + "s = rundata.style.background_gradient(cmap = cm)\n", + "s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = local_run.get_output()\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any other metric\n", + "Show the run and model that has the smallest `root_mean_squared_error` (which turned out to be the same as the one with largest `spearman_correlation` value):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lookup_metric = \"root_mean_squared_error\"\n", + "best_run, fitted_model = local_run.get_output(metric=lookup_metric)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any iteration\n", + "Simply show the run and model from the 3rd iteration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 3\n", + "third_run, third_model = local_run.get_output(iteration = iteration)\n", + "print(third_run)\n", + "print(third_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register fitted model for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "description = 'AutoML Model'\n", + "tags = None\n", + "local_run.register_model(description = description, tags = tags)\n", + "print(local_run.model_id) # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Predict on training and test set, and calculate residual values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred_train = fitted_model.predict(x_train)\n", + "y_residual_train = y_train - y_pred_train\n", + "\n", + "y_pred_test = fitted_model.predict(x_test)\n", + "y_residual_test = y_test - y_pred_test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn import datasets\n", + "from sklearn.metrics import mean_squared_error, r2_score\n", + "\n", + "# set up a multi-plot chart\n", + "f, (a0, a1) = plt.subplots(1, 2, gridspec_kw = {'width_ratios':[1, 1], 'wspace':0, 'hspace': 0})\n", + "f.suptitle('Regression Residual Values', fontsize = 18)\n", + "f.set_figheight(6)\n", + "f.set_figwidth(16)\n", + "\n", + "# plot residual values of training set\n", + "a0.axis([0, 360, -200, 200])\n", + "a0.plot(y_residual_train, 'bo', alpha = 0.5)\n", + "a0.plot([-10,360],[0,0], 'r-', lw = 3)\n", + "a0.text(16,170,'RMSE = {0:.2f}'.format(np.sqrt(mean_squared_error(y_train, y_pred_train))), fontsize = 12)\n", + "a0.text(16,140,'Variance = {0:.2f}'.format(r2_score(y_train, y_pred_train)), fontsize = 12)\n", + "a0.set_xlabel('Training samples', fontsize = 12)\n", + "a0.set_ylabel('Residual Values', fontsize = 12)\n", + "# plot histogram\n", + "a0.hist(y_residual_train, orientation = 'horizontal', color = 'b', bins = 10, histtype = 'step');\n", + "a0.hist(y_residual_train, orientation = 'horizontal', color = 'b', alpha = 0.2, bins = 10);\n", + "\n", + "# plot residual values of test set\n", + "a1.axis([0, 90, -200, 200])\n", + "a1.plot(y_residual_test, 'bo', alpha = 0.5)\n", + "a1.plot([-10,360],[0,0], 'r-', lw = 3)\n", + "a1.text(5,170,'RMSE = {0:.2f}'.format(np.sqrt(mean_squared_error(y_test, y_pred_test))), fontsize = 12)\n", + "a1.text(5,140,'Variance = {0:.2f}'.format(r2_score(y_test, y_pred_test)), fontsize = 12)\n", + "a1.set_xlabel('Test samples', fontsize = 12)\n", + "a1.set_yticklabels([])\n", + "# plot histogram\n", + "a1.hist(y_residual_test, orientation = 'horizontal', color = 'b', bins = 10, histtype = 'step');\n", + "a1.hist(y_residual_test, orientation = 'horizontal', color = 'b', alpha = 0.2, bins = 10);\n", + "\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/03.auto-ml-remote-execution.ipynb b/automl/03.auto-ml-remote-execution.ipynb new file mode 100644 index 000000000..626bd57df --- /dev/null +++ b/automl/03.auto-ml-remote-execution.ipynb @@ -0,0 +1,530 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 03: Remote Execution using DSVM (Ubuntu)\n", + "\n", + "In this example we use the scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use the AutoML Classifier for a simple classification problem.\n", + "\n", + "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Creating an Experiment using an existing Workspace\n", + "2. Attaching an existing DSVM to a workspace\n", + "3. Instantiating AutoMLConfig \n", + "4. Training the Model using the DSVM\n", + "5. Exploring the results\n", + "6. Testing the fitted model\n", + "\n", + "In addition this notebook showcases the following features\n", + "- **Parallel** Executions for iterations\n", + "- Asyncronous tracking of progress\n", + "- **Cancelling** individual iterations or the entire run\n", + "- Retrieving models for any iteration or logged metric\n", + "- specify automl settings as **kwargs**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a workspace. For AutoML you would need to create a Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for the run history container in the workspace\n", + "experiment_name = 'automl-remote-dsvm4'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-remote-dsvm4'\n", + "\n", + "experiment=Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace Name'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data = output, index = ['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a Remote Linux DSVM\n", + "Note: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n", + "\n", + "**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you can switch to a different port (such as 5022), you can append the port number to the address. [Read more](https://render.githubusercontent.com/documentation/sdk/ssh-issue.md) on this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import DsvmCompute\n", + "\n", + "dsvm_name = 'mydsvm'\n", + "try:\n", + " dsvm_compute = DsvmCompute(ws, dsvm_name)\n", + " print('found existing dsvm.')\n", + "except:\n", + " print('creating new dsvm.')\n", + " dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n", + " dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n", + " dsvm_compute.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a RunConfiguration with DSVM name\n", + "Run the below code to tell the runconfiguration the name of your dsvm." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "\n", + "run_config = RunConfiguration()\n", + "run_config.target = dsvm_compute" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change index to use master packages\n", + "If you want to use master rather than preview run the below code. Once Public preview is launched we would not need this cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import CondaDependencies\n", + "\n", + "cd = CondaDependencies()\n", + "\n", + "cd.remove_pip_option(pip_option=\"--index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1\")\n", + "cd.set_pip_index_url(index_url=\"--extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/master/588E708E0DF342C4A80BD954289657CF\")\n", + "\n", + "run_config.environment.python.conda_dependencies = cd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Get Data File\n", + "For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.exists(project_folder):\n", + " os.makedirs(project_folder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $project_folder/get_data.py\n", + "\n", + "from sklearn import datasets\n", + "from scipy import sparse\n", + "import numpy as np\n", + "\n", + "def get_data():\n", + " \n", + " digits = datasets.load_digits()\n", + " X_digits = digits.data\n", + " y_digits = digits.target\n", + "\n", + " return { \"X\" : X_digits, \"y\" : y_digits }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate AutoML \n", + "\n", + "You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n", + "\n", + "Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to the fit method.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**primary_metric**|This is the metric that you want to optimize.
Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted|\n", + "|**max_time_sec**|Time limit in seconds for each iterations|\n", + "|**iterations**|Number of iterations. In each iteration Auto ML Classifier trains the data with a specific pipeline|\n", + "|**n_cross_validations**|Number of cross validation splits|\n", + "|**concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_settings = {\n", + " \"max_time_sec\": 600,\n", + " \"iterations\": 20,\n", + " \"n_cross_validations\": 5,\n", + " \"primary_metric\": 'AUC_weighted',\n", + " \"preprocess\": False,\n", + " \"concurrent_iterations\": 2,\n", + " \"verbosity\": logging.INFO\n", + "}\n", + "\n", + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " path=project_folder,\n", + " run_configuration = run_config,\n", + " data_script = project_folder + \"./get_data.py\",\n", + " **automl_settings\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_run = experiment.submit(automl_config, show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the Results\n", + "\n", + "#### Loading executed runs\n", + "In case you need to load a previously executed run given a run id please enable the below cell" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "remote_run = AutoMLRun(experiment=experiment, run_id='AutoML_480d3ed6-fc94-44aa-8f4e-0b945db9d3ef')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for monitoring runs\n", + "\n", + "The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under /tmp/azureml_run/{iterationid}/azureml-logs\n", + "\n", + "NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(remote_run).show() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# wait till the run finishes\n", + "remote_run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Retrieve All Child Runs\n", + "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(remote_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + "\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "cm = sns.light_palette(\"lightgreen\", as_cmap = True)\n", + "s = rundata.style.background_gradient(cmap = cm)\n", + "s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Canceling runs\n", + "\n", + "You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cancel the ongoing experiment and stop scheduling new iterations\n", + "# remote_run.cancel()\n", + "\n", + "# Cancel iteration 1 and move onto iteration 2\n", + "# remote_run.cancel_iteration(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = remote_run.get_output()\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any other metric\n", + "Show the run/model which has the smallest `log_loss` value." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lookup_metric = \"log_loss\"\n", + "best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any iteration\n", + "Show the run and model from the 3rd iteration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 3\n", + "third_run, third_model = remote_run.get_output(iteration=iteration)\n", + "print(third_run)\n", + "print(third_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register fitted model for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "description = 'AutoML Model'\n", + "tags = None\n", + "remote_run.register_model(description=description, tags=tags)\n", + "remote_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Model \n", + "\n", + "#### Load Test Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()\n", + "X_digits = digits.data[:10, :]\n", + "y_digits = digits.target[:10]\n", + "images = digits.images[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Testing our best pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Randomly select digits and test\n", + "for index in np.random.choice(len(y_digits), 2):\n", + " print(index)\n", + " predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n", + " label = y_digits[index]\n", + " title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n", + " fig = plt.figure(1, figsize=(3,3))\n", + " ax1 = fig.add_axes((0,0,.8,.8))\n", + " ax1.set_title(title)\n", + " plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n", + " plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/03b.auto-ml-remote-batchai.ipynb b/automl/03b.auto-ml-remote-batchai.ipynb new file mode 100644 index 000000000..0104efbf2 --- /dev/null +++ b/automl/03b.auto-ml-remote-batchai.ipynb @@ -0,0 +1,609 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 03: Remote Execution using Batch AI\n", + "\n", + "In this example we use the scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use the AutoML Classifier for a simple classification problem.\n", + "\n", + "Make sure you have executed the [setup](setup.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Creating an Experiment using an existing Workspace\n", + "2. Attaching an existing Batch AI compute to a workspace\n", + "3. Instantiating AutoMLConfig \n", + "4. Training the Model using the Batch AI\n", + "5. Exploring the results\n", + "6. Testing the fitted model\n", + "\n", + "In addition this notebook showcases the following features\n", + "- **Parallel** Executions for iterations\n", + "- Asyncronous tracking of progress\n", + "- **Cancelling** individual iterations or the entire run\n", + "- Retrieving models for any iteration or logged metric\n", + "- specify automl settings as **kwargs**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a workspace. For AutoML you would need to create a Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for the run history container in the workspace\n", + "experiment_name = 'automl-remote-batchai'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-remote-batchai'\n", + "\n", + "experiment=Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace Name'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data = output, index = ['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Batch AI Cluster\n", + "The cluster is created as Machine Learning Compute and will appear under your workspace.\n", + "\n", + "Note: The cluster creation can take over 10 minutes, be patient." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "# choose a name for your cluster\n", + "batchai_cluster_name = ws.name + \"cpu21\"\n", + "\n", + "found = False\n", + "# see if this compute target already exists in the workspace\n", + "for ct in ws.compute_targets():\n", + " print(ct.name, ct.type)\n", + " if (ct.name == batchai_cluster_name and ct.type == 'BatchAI'):\n", + " found = True\n", + " print('found compute target. just use it.')\n", + " compute_target = ct\n", + " break\n", + " \n", + "if not found:\n", + " print('creating a new compute target...')\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # D2 is 2 cores\n", + " #vm_priority = 'lowpriority', # optional\n", + " autoscale_enabled = False,\n", + " cluster_min_nodes = 2, \n", + " cluster_max_nodes = 2)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws,batchai_cluster_name, provisioning_config)\n", + " \n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + " \n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + " print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a RunConfiguration with Batch AI name\n", + "Run the below code to tell the runconfiguration the name of your dsvm." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "\n", + "run_config = RunConfiguration()\n", + "run_config.target = compute_target.name\n", + "run_config.environment.docker.enabled = True\n", + "run_config.prepare_environment = True\n", + "run_config.batchai.node_count = 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change index to use master packages\n", + "If you want to use master rather than preview run the below code. Once Public preview is launched we would not need this cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import CondaDependencies\n", + "\n", + "cd = CondaDependencies()\n", + "\n", + "cd.remove_pip_option(pip_option=\"--index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1\")\n", + "cd.set_pip_index_url(index_url=\"--extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/master/588E708E0DF342C4A80BD954289657CF\")\n", + "\n", + "run_config.environment.python.conda_dependencies = cd\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_config.environment.docker.base_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_config.environment.docker.base_image = 'ninhu/amlbase:test-utf8'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Get Data File\n", + "For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.exists(project_folder):\n", + " os.makedirs(project_folder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $project_folder/get_data.py\n", + "\n", + "from sklearn import datasets\n", + "from scipy import sparse\n", + "import numpy as np\n", + "\n", + "def get_data():\n", + " \n", + " digits = datasets.load_digits()\n", + " X_digits = digits.data\n", + " y_digits = digits.target\n", + "\n", + " return { \"X\" : X_digits, \"y\" : y_digits }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate AutoML \n", + "\n", + "You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n", + "\n", + "Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to the fit method.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**primary_metric**|This is the metric that you want to optimize.
Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted|\n", + "|**max_time_sec**|Time limit in seconds for each iterations|\n", + "|**iterations**|Number of iterations. In each iteration Auto ML Classifier trains the data with a specific pipeline|\n", + "|**n_cross_validations**|Number of cross validation splits|\n", + "|**concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_settings = {\n", + " \"max_time_sec\": 120,\n", + " \"iterations\": 20,\n", + " \"n_cross_validations\": 5,\n", + " \"primary_metric\": 'AUC_weighted',\n", + " \"preprocess\": False,\n", + " \"concurrent_iterations\": 5,\n", + " \"verbosity\": logging.INFO\n", + "}\n", + "\n", + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " path=project_folder,\n", + " run_configuration = run_config,\n", + " data_script = project_folder + \"./get_data.py\",\n", + " **automl_settings\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_config.environment.docker.base_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_config.environment.docker.base_image_registry" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_config.environment.docker.enabled = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_run = experiment.submit(automl_config, show_output=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the Results\n", + "\n", + "#### Loading executed runs\n", + "In case you need to load a previously executed run given a run id please enable the below cell" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "remote_run = AutoMLRun(experiment=experiment, run_id='AutoML_5db13491-c92a-4f1d-b622-8ab8d973a058')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for monitoring runs\n", + "\n", + "The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under /tmp/azureml_run/{iterationid}/azureml-logs\n", + "\n", + "NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(remote_run).show() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# wait till the run finishes\n", + "remote_run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Retrieve All Child Runs\n", + "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(remote_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + "\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "cm = sns.light_palette(\"lightgreen\", as_cmap = True)\n", + "s = rundata.style.background_gradient(cmap = cm)\n", + "s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Canceling runs\n", + "\n", + "You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cancel the ongoing experiment and stop scheduling new iterations\n", + "remote_run.cancel()\n", + "\n", + "# Cancel iteration 1 and move onto iteration 2\n", + "# remote_run.cancel_iteration(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = remote_run.get_output()\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any other metric\n", + "Show the run/model which has the smallest `log_loss` value." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lookup_metric = \"log_loss\"\n", + "best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any iteration\n", + "Show the run and model from the 3rd iteration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 3\n", + "third_run, third_model = remote_run.get_output(iteration=iteration)\n", + "print(third_run)\n", + "print(third_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register fitted model for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "description = 'AutoML Model'\n", + "tags = None\n", + "remote_run.register_model(description=description, tags=tags)\n", + "remote_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Model \n", + "\n", + "#### Load Test Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()\n", + "X_digits = digits.data[:10, :]\n", + "y_digits = digits.target[:10]\n", + "images = digits.images[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Testing our best pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Randomly select digits and test\n", + "for index in np.random.choice(len(y_digits), 2):\n", + " print(index)\n", + " predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n", + " label = y_digits[index]\n", + " title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n", + " fig = plt.figure(1, figsize=(3,3))\n", + " ax1 = fig.add_axes((0,0,.8,.8))\n", + " ax1.set_title(title)\n", + " plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n", + " plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/03c.auto-ml-remote-batchai-compute-target.ipynb b/automl/03c.auto-ml-remote-batchai-compute-target.ipynb new file mode 100644 index 000000000..2823db00d --- /dev/null +++ b/automl/03c.auto-ml-remote-batchai-compute-target.ipynb @@ -0,0 +1,527 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 03: Remote Execution using Batch AI\n", + "\n", + "In this example we use the scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use the AutoML Classifier for a simple classification problem.\n", + "\n", + "Make sure you have executed the [setup](setup.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Creating an Experiment using an existing Workspace\n", + "2. Attaching an existing Batch AI compute to a workspace\n", + "3. Instantiating AutoMLConfig \n", + "4. Training the Model using the Batch AI\n", + "5. Exploring the results\n", + "6. Testing the fitted model\n", + "\n", + "In addition this notebook showcases the following features\n", + "- **Parallel** Executions for iterations\n", + "- Asyncronous tracking of progress\n", + "- **Cancelling** individual iterations or the entire run\n", + "- Retrieving models for any iteration or logged metric\n", + "- specify automl settings as **kwargs**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a workspace. For AutoML you would need to create a Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import azureml.core\n", + "import pandas as pd\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl.run import AutoMLRun\n", + "import time\n", + "import logging\n", + "from sklearn import datasets\n", + "import seaborn as sns\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import random\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for the run history container in the workspace\n", + "experiment_name = 'automl-remote-batchai'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-remote-batchai'\n", + "\n", + "import os\n", + "from azureml.core.experiment import Experiment\n", + "\n", + "experiment=Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace Name'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data = output, index = ['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Batch AI Cluster\n", + "The cluster is created as Machine Learning Compute and will appear under your workspace.\n", + "\n", + "Note: The cluster creation can take over 10 minutes, be patient." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "# choose a name for your cluster\n", + "batchai_cluster_name = ws.name + \"cpu21\"\n", + "\n", + "found = False\n", + "# see if this compute target already exists in the workspace\n", + "for ct in ws.compute_targets():\n", + " print(ct.name, ct.type)\n", + " if (ct.name == batchai_cluster_name and ct.type == 'BatchAI'):\n", + " found = True\n", + " print('found compute target. just use it.')\n", + " compute_target = ct\n", + " break\n", + " \n", + "if not found:\n", + " print('creating a new compute target...')\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # D2 is 2 cores\n", + " #vm_priority = 'lowpriority', # optional\n", + " autoscale_enabled = False,\n", + " cluster_min_nodes = 2, \n", + " cluster_max_nodes = 2)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws,batchai_cluster_name, provisioning_config)\n", + " \n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + " \n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + " print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change index to use master packages\n", + "If you want to use master rather than preview run the below code. Once Public preview is launched we would not need this cell." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Get Data File\n", + "For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "if not os.path.exists(project_folder):\n", + " os.makedirs(project_folder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $project_folder/get_data.py\n", + "\n", + "from sklearn import datasets\n", + "from scipy import sparse\n", + "import numpy as np\n", + "\n", + "def get_data():\n", + " \n", + " digits = datasets.load_digits()\n", + " X_digits = digits.data\n", + " y_digits = digits.target\n", + "\n", + " return { \"X\" : X_digits, \"y\" : y_digits }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate AutoML \n", + "\n", + "You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n", + "\n", + "Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to the fit method.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**primary_metric**|This is the metric that you want to optimize.
Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted|\n", + "|**max_time_sec**|Time limit in seconds for each iterations|\n", + "|**iterations**|Number of iterations. In each iteration Auto ML Classifier trains the data with a specific pipeline|\n", + "|**n_cross_validations**|Number of cross validation splits|\n", + "|**concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.automl import AutoMLConfig\n", + "import time\n", + "import logging\n", + "\n", + "automl_settings = {\n", + " \"name\": \"AutoML_Demo_Experiment_{0}\".format(time.time()),\n", + " \"max_time_sec\": 120,\n", + " \"iterations\": 20,\n", + " \"n_cross_validations\": 5,\n", + " \"primary_metric\": 'AUC_weighted',\n", + " \"preprocess\": False,\n", + " \"concurrent_iterations\": 5,\n", + " \"verbosity\": logging.INFO\n", + "}\n", + "\n", + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " path=project_folder,\n", + " compute_target = compute_target,\n", + " data_script = project_folder + \"./get_data.py\",\n", + " **automl_settings\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_run = experiment.submit(automl_config, show_output=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the Results\n", + "\n", + "#### Loading executed runs\n", + "In case you need to load a previously executed run given a run id please enable the below cell" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "remote_run = AutoMLRun(experiment=experiment, run_id='AutoML_5db13491-c92a-4f1d-b622-8ab8d973a058')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for monitoring runs\n", + "\n", + "The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under /tmp/azureml_run/{iterationid}/azureml-logs\n", + "\n", + "NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(remote_run).show() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# wait till the run finishes\n", + "remote_run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Retrieve All Child Runs\n", + "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(remote_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + " \n", + "import pandas as pd\n", + "import seaborn as sns\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "cm = sns.light_palette(\"lightgreen\", as_cmap = True)\n", + "s = rundata.style.background_gradient(cmap = cm)\n", + "s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Canceling runs\n", + "\n", + "You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cancel the ongoing experiment and stop scheduling new iterations\n", + "remote_run.cancel()\n", + "\n", + "# Cancel iteration 1 and move onto iteration 2\n", + "# remote_run.cancel_iteration(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = remote_run.get_output()\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any other metric\n", + "Show the run/model which has the smallest `log_loss` value." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lookup_metric = \"log_loss\"\n", + "best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any iteration\n", + "Show the run and model from the 3rd iteration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 3\n", + "third_run, third_model = remote_run.get_output(iteration=iteration)\n", + "print(third_run)\n", + "print(third_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register fitted model for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "description = 'AutoML Model'\n", + "tags = None\n", + "remote_run.register_model(description=description, tags=tags)\n", + "remote_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Model \n", + "\n", + "#### Load Test Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import datasets\n", + "\n", + "digits = datasets.load_digits()\n", + "X_digits = digits.data[:10, :]\n", + "y_digits = digits.target[:10]\n", + "images = digits.images[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Testing our best pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Randomly select digits and test\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import random\n", + "import numpy as np\n", + "\n", + "for index in np.random.choice(len(y_digits), 2):\n", + " print(index)\n", + " predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n", + " label = y_digits[index]\n", + " title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n", + " fig = plt.figure(1, figsize=(3,3))\n", + " ax1 = fig.add_axes((0,0,.8,.8))\n", + " ax1.set_title(title)\n", + " plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/04.auto-ml-remote-execution-text-data-blob-store.ipynb b/automl/04.auto-ml-remote-execution-text-data-blob-store.ipynb new file mode 100644 index 000000000..df3a5fd20 --- /dev/null +++ b/automl/04.auto-ml-remote-execution-text-data-blob-store.ipynb @@ -0,0 +1,545 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Auto ML : Remote Execution with Text data from Blobstorage\n", + "\n", + "In this example we use the [Burning Man 2016 dataset](https://innovate.burningman.org/datasets-page/) to showcase how you can use the AutoML Classifier to handle text data from a Azure blobstorage.\n", + "\n", + "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Creating an Experiment using an existing Workspace\n", + "2. Attaching an existing DSVM to a workspace\n", + "3. Instantiating AutoMLConfig \n", + "4. Training the Model using the DSVM\n", + "5. Exploring the results\n", + "6. Testing the fitted model\n", + "\n", + "In addition this notebook showcases the following features\n", + "- **Parallel** Executions for iterations\n", + "- Asyncronous tracking of progress\n", + "- **Cancelling** individual iterations or the entire run\n", + "- Retrieving models for any iteration or logged metric\n", + "- specify automl settings as **kwargs**\n", + "- handling **text** data with **preprocess** flag\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a Workspace. For AutoML you would need to create an Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for the run history container in the workspace\n", + "experiment_name = 'automl-remote-dsvm-blobstore-3'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-remote-dsvm-blobstore-3'\n", + "\n", + "experiment = Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data=output, index=['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Attach a Remote Linux DSVM\n", + "To use remote docker commpute target:\n", + "1. Create a Linux DSVM in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS. Make sure that disk space is available under /tmp because AutoML creates files under /tmp/azureml_runs. The DSVM should have more cores than the number of parallel runs that you plan to enable. It should also have at least 4Gb per core.\n", + "2. Enter the IP address, username and password below\n", + "\n", + "**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you can switch to a different port (such as 5022), you can append the port number to the address. [Read more](https://render.githubusercontent.com/documentation/sdk/ssh-issue.md) on this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import RemoteCompute\n", + "\n", + "# Add your VM information below\n", + "dsvm_name = 'mydsvm1'\n", + "dsvm_ip_addr = '<>'\n", + "dsvm_username = '<>'\n", + "dsvm_password = '<>'\n", + "\n", + "dsvm_compute = RemoteCompute.attach(workspace=ws, name=dsvm_name, address=dsvm_ip_addr, username=dsvm_username, password=dsvm_password, ssh_port=22)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a RunConfiguration with DSVM name\n", + "Run the below code to tell the runconfiguration the name of your dsvm." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "\n", + "run_config = RunConfiguration()\n", + "run_config.target = dsvm_compute" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change index to use master packages\n", + "If you want to use master rather than preview run the below code. Once Public preview is launched we would not need this cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import CondaDependencies\n", + "\n", + "cd = CondaDependencies()\n", + "\n", + "cd.remove_pip_option(pip_option=\"--index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1\")\n", + "cd.set_pip_index_url(index_url=\"--extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/master/588E708E0DF342C4A80BD954289657CF\")\n", + "\n", + "run_config.environment.python.conda_dependencies = cd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Get Data File\n", + "For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n", + "\n", + "The *get_data()* function returns a [dictionary](README.md#getdata)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.exists(project_folder):\n", + " os.makedirs(project_folder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $project_folder/get_data.py\n", + "\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import LabelEncoder\n", + "\n", + "def get_data():\n", + " # Burning man 2016 data\n", + " df = pd.read_csv(\"https://automldemods.blob.core.windows.net/datasets/PlayaEvents2016,_1.6MB,_3.4k-rows.cleaned.2.tsv\",\n", + " delimiter=\"\\t\", quotechar='\"')\n", + " # get integer labels\n", + " le = LabelEncoder()\n", + " le.fit(df[\"Label\"].values)\n", + " y = le.transform(df[\"Label\"].values)\n", + " df = df.drop([\"Label\"], axis=1)\n", + "\n", + " df_train, _, y_train, _ = train_test_split(df, y, test_size=0.1, random_state=42)\n", + "\n", + " return { \"X\" : df, \"y\" : y }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### View data\n", + "\n", + "You can execute the *get_data()* function locally to view the *train* data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%run $project_folder/get_data.py\n", + "data_dict = get_data()\n", + "df = data_dict[\"X\"]\n", + "y = data_dict[\"y\"]\n", + "pd.set_option('display.max_colwidth', 15)\n", + "df['Label'] = pd.Series(y, index=df.index)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate AutoML \n", + "\n", + "You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n", + "\n", + "Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to the fit method.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**primary_metric**|This is the metric that you want to optimize.
Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted|\n", + "|**max_time_sec**|Time limit in seconds for each iterations|\n", + "|**iterations**|Number of iterations. In each iteration Auto ML Classifier trains the data with a specific pipeline|\n", + "|**n_cross_validations**|Number of cross validation splits|\n", + "|**concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM\n", + "|**preprocess**| *True/False*
Setting this to *True* enables Auto ML Classifier to perform preprocessing
on the input to handle *missing data*, and perform some common *feature extraction*|\n", + "|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.
Default is *1*, you can set it to *-1* to use all cores|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_settings = {\n", + " \"max_time_sec\": 12000,\n", + " \"iterations\": 10,\n", + " \"n_cross_validations\": 5,\n", + " \"primary_metric\": 'AUC_weighted',\n", + " \"preprocess\": True,\n", + " \"max_cores_per_iteration\": 2\n", + "}\n", + "\n", + "automl_config = AutoMLConfig(task = 'classification',\n", + " path=project_folder,\n", + " run_configuration = run_config,\n", + " data_script = project_folder + \"./get_data.py\",\n", + " **automl_settings\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Model \n", + "\n", + "You can call the *fit* method on the AutoML instance and pass the dsvm runconfig name. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets/models even when the experiment is running to retreive the best model up to that point. Once you are satisfied with the model you can cancel a particular iteration or the whole run.\n", + "\n", + "\n", + "*fit* method on Auto ML Classifier triggers the training of the model. It can be called with the following parameters\n", + "\n", + "**Note**: You cannot pass Numpy arrays directly to the fit method in case of remote executions.\n", + "\n", + "|**Parameter**|**Description**|\n", + "|-|-|\n", + "|**compute_target**|Indicates the compute used for training. local indicates train on the same compute which hosts the jupyter notebook.
For DSVM and Batch AI please refer to the relevant notebooks.|\n", + "|**show_output**| True/False to turn on/off console output|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_run = experiment.submit(automl_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the Results \n", + "#### Widget for monitoring runs\n", + "\n", + "The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under /tmp/azureml_run/{iterationid}/azureml-logs\n", + "\n", + "NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(remote_run).show() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Retrieve All Child Runs\n", + "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(remote_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + "\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "cm = sns.light_palette(\"lightgreen\", as_cmap = True)\n", + "s = rundata.style.background_gradient(cmap = cm)\n", + "s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Canceling runs\n", + "You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cancel the ongoing experiment and stop scheduling new iterations\n", + "remote_run.cancel()\n", + "\n", + "# Cancel iteration 1 and move onto iteration 2\n", + "# remote_run.cancel_iteration(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = remote_run.get_output()\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any other metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# lookup_metric = \"accuracy\"\n", + "# best_run, fitted_model = remote_run.get_output(metric=lookup_metric)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any iteration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 0\n", + "best_run, fitted_model = remote_run.get_output(iteration=iteration)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register fitted model for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "description = 'AutoML Model'\n", + "tags = None\n", + "remote_run.register_model(description=description, tags=tags)\n", + "remote_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Model \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sklearn\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from pandas_ml import ConfusionMatrix\n", + "\n", + "df = pd.read_csv(\"https://automldemods.blob.core.windows.net/datasets/PlayaEvents2016,_1.6MB,_3.4k-rows.cleaned.2.tsv\",\n", + " delimiter=\"\\t\", quotechar='\"')\n", + "\n", + "# get integer labels\n", + "le = LabelEncoder()\n", + "le.fit(df[\"Label\"].values)\n", + "y = le.transform(df[\"Label\"].values)\n", + "df = df.drop([\"Label\"], axis=1)\n", + "\n", + "_, df_test, _, y_test = train_test_split(df, y, test_size=0.1, random_state=42)\n", + "\n", + "\n", + "ypred = fitted_model.predict(df_test.values)\n", + "\n", + "\n", + "ypred_strings = le.inverse_transform(ypred)\n", + "ytest_strings = le.inverse_transform(y_test)\n", + "\n", + "cm = ConfusionMatrix(ytest_strings, ypred_strings)\n", + "\n", + "print(cm)\n", + "\n", + "cm.plot()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/05.auto-ml-missing-data-Blacklist-Early-Termination.ipynb b/automl/05.auto-ml-missing-data-Blacklist-Early-Termination.ipynb new file mode 100644 index 000000000..62c9f2e4a --- /dev/null +++ b/automl/05.auto-ml-missing-data-Blacklist-Early-Termination.ipynb @@ -0,0 +1,399 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 05 : Blacklisting models, Early termination and handling missing data\n", + "\n", + "In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use the AutoML Classifier for handling missing values in data. We also provide a stopping metrics indicating a target for the primary metrics so that AutoML can terminate the run without necessarly going through all the iterations. Finally, If you want to avoid a certain pipeline, we allow you to specify a black list of algos that AutoML will ignore for this run.\n", + "\n", + "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Creating an Experiment using an existing Workspace\n", + "2. Instantiating AutoMLConfig\n", + "4. Training the Model\n", + "5. Exploring the results\n", + "6. Testing the fitted model\n", + "\n", + "In addition this notebook showcases the following features\n", + "- **Blacklist** certain pipelines\n", + "- Specify a **target metrics** to indicate stopping criteria\n", + "- Handling **Missing Data** in the input\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a Workspace. For AutoML you would need to create an Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for the experiment\n", + "experiment_name = 'automl-local-missing-data'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-local-missing-data'\n", + "\n", + "experiment=Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data=output, index=['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating Missing Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy import sparse\n", + "\n", + "digits = datasets.load_digits()\n", + "X_digits = digits.data[10:,:]\n", + "y_digits = digits.target[10:]\n", + "\n", + "# Add missing values in 75% of the lines\n", + "missing_rate = 0.75\n", + "n_missing_samples = int(np.floor(X_digits.shape[0] * missing_rate))\n", + "missing_samples = np.hstack((np.zeros(X_digits.shape[0] - n_missing_samples, dtype=np.bool), np.ones(n_missing_samples, dtype=np.bool)))\n", + "rng = np.random.RandomState(0)\n", + "rng.shuffle(missing_samples)\n", + "missing_features = rng.randint(0, X_digits.shape[1], n_missing_samples)\n", + "X_digits[np.where(missing_samples)[0], missing_features] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data=X_digits)\n", + "df['Label'] = pd.Series(y_digits, index=df.index)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate Auto ML Config\n", + "\n", + "\n", + "Instantiate a AutoML Object This creates an Experiment in Azure ML. You can reuse this objects to trigger multiple runs. Each run will be part of the same experiment.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**task**|classification or regression|\n", + "|**primary_metric**|This is the metric that you want to optimize.
Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted|\n", + "|**max_time_sec**|Time limit in seconds for each iterations|\n", + "|**iterations**|Number of iterations. In each iteration Auto ML Classifier trains the data with a specific pipeline|\n", + "|**n_cross_validations**|Number of cross validation splits|\n", + "|**preprocess**| *True/False*
Setting this to *True* enables Auto ML Classifier to perform preprocessing
on the input to handle *missing data*, and perform some common *feature extraction*|\n", + "|**exit_score**|*double* value indicating the target for *primary_metric*.
Once the target is surpassed the run terminates|\n", + "|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.

Allowed values for **Classification**
LogisticRegression
SGDClassifierWrapper
NBWrapper
BernoulliNB
SVCWrapper
LinearSVMWrapper
KNeighborsClassifier
DecisionTreeClassifier
RandomForestClassifier
ExtraTreesClassifier
LightGBMClassifier

Allowed values for **Regression**
ElasticNet
GradientBoostingRegressor
DecisionTreeRegressor
KNeighborsRegressor
LassoLars
SGDRegressor
RandomForestRegressor
ExtraTreesRegressor|\n", + "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", + "|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]
Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n", + "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " primary_metric = 'AUC_weighted',\n", + " max_time_sec = 12000,\n", + " iterations = 20,\n", + " n_cross_validations = 5,\n", + " preprocess = True,\n", + " exit_score = 0.994,\n", + " blacklist_algos = ['KNeighborsClassifier','LinearSVMWrapper'],\n", + " verbosity = logging.INFO,\n", + " X = X_digits, \n", + " y = y_digits,\n", + " path=project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Model\n", + "\n", + "You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n", + "You will see the currently running iterations printing to the console." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = experiment.submit(automl_config, show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for monitoring runs\n", + "\n", + "The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "NOTE: The widget will display a link at the bottom. This will not currently work, but will eventually link to a web-ui to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(local_run).show() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Retrieve All Child Runs\n", + "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(local_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + "\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "cm = sns.light_palette(\"lightgreen\", as_cmap = True)\n", + "s = rundata.style.background_gradient(cmap = cm)\n", + "s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. Each pipeline is a tuple of three elements. The first element is the score for the pipeline the second element is the string description of the pipeline and the last element are the pipeline objects used for each fold in the cross-validation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = local_run.get_output()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any other metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# lookup_metric = \"accuracy\"\n", + "# best_run, fitted_model = local_run.get_output(metric=lookup_metric)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any iteration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# iteration = 3\n", + "# best_run, fitted_model = local_run.get_output(iteration=iteration)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register fitted model for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "description = 'AutoML Model'\n", + "tags = None\n", + "local_run.register_model(description=description, tags=tags)\n", + "local_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Model " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()\n", + "X_digits = digits.data[:10, :]\n", + "y_digits = digits.target[:10]\n", + "images = digits.images[:10]\n", + "\n", + "#Randomly select digits and test\n", + "for index in np.random.choice(len(y_digits), 2):\n", + " print(index)\n", + " predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n", + " label = y_digits[index]\n", + " title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n", + " fig = plt.figure(1, figsize=(3,3))\n", + " ax1 = fig.add_axes((0,0,.8,.8))\n", + " ax1.set_title(title)\n", + " plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n", + " plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/06.auto-ml-sparse-data-custom-cv-split.ipynb b/automl/06.auto-ml-sparse-data-custom-cv-split.ipynb new file mode 100644 index 000000000..e1b864482 --- /dev/null +++ b/automl/06.auto-ml-sparse-data-custom-cv-split.ipynb @@ -0,0 +1,424 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 06: Custom CV splits, handling sparse data\n", + "\n", + "In this example we use the scikit learn's [20newsgroup](In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use the AutoML Classifier for handling sparse data and specify custom cross validations splits.\n", + "\n", + "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Creating an Experiment using an existing Workspace\n", + "2. Instantiating AutoMLConfig\n", + "4. Training the Model\n", + "5. Exploring the results\n", + "6. Testing the fitted model\n", + "\n", + "In addition this notebook showcases the following features\n", + "- **Custom CV** splits \n", + "- Handling **Sparse Data** in the input" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a Workspace. For AutoML you would need to create an Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for the experiment\n", + "experiment_name = 'automl-local-missing-data'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-local-missing-data'\n", + "\n", + "experiment = Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data=output, index=['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Sparse Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import fetch_20newsgroups\n", + "from sklearn.feature_extraction.text import HashingVectorizer\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "remove = ('headers', 'footers', 'quotes')\n", + "categories = [\n", + " 'alt.atheism',\n", + " 'talk.religion.misc',\n", + " 'comp.graphics',\n", + " 'sci.space',\n", + "]\n", + "data_train = fetch_20newsgroups(subset='train', categories=categories,\n", + " shuffle=True, random_state=42,\n", + " remove=remove)\n", + "\n", + "X_train, X_validation, y_train, y_validation = train_test_split(data_train.data, data_train.target, test_size=0.33, random_state=42)\n", + "\n", + "\n", + "vectorizer = HashingVectorizer(stop_words='english', alternate_sign=False,\n", + " n_features=2**16)\n", + "X_train = vectorizer.transform(X_train)\n", + "X_validation = vectorizer.transform(X_validation)\n", + "\n", + "summary_df = pd.DataFrame(index = ['No of Samples', 'No of Features'])\n", + "summary_df['Train Set'] = [X_train.shape[0], X_train.shape[1]]\n", + "summary_df['Validation Set'] = [X_validation.shape[0], X_validation.shape[1]]\n", + "summary_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate Auto ML Config\n", + "\n", + "\n", + "Instantiate a AutoML Object This creates an Experiment in Azure ML. You can reuse this objects to trigger multiple runs. Each run will be part of the same experiment.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**task**|classification or regression|\n", + "|**primary_metric**|This is the metric that you want to optimize.
Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted|\n", + "|**max_time_sec**|Time limit in seconds for each iterations|\n", + "|**iterations**|Number of iterations. In each iteration Auto ML Classifier trains the data with a specific pipeline|\n", + "|**preprocess**| *True/False*
Setting this to *True* enables Auto ML Classifier to perform preprocessing
on the input to handle *missing data*, and perform some common *feature extraction*
*Note: If input data is Sparse you cannot use preprocess=True*|\n", + "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", + "|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]
Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n", + "|**X_valid**|(sparse) array-like, shape = [n_samples, n_features] for the custom Validation set|\n", + "|**y_valid**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]
Multi-class targets. An indicator matrix turns on multilabel classification. for the custom Validation set|\n", + "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log='automl_errors.log',\n", + " primary_metric='AUC_weighted',\n", + " max_time_sec=12000,\n", + " iterations=5,\n", + " preprocess=False,\n", + " verbosity=logging.INFO,\n", + " X = X_train, \n", + " y = y_train,\n", + " X_valid = X_validation, \n", + " y_valid = y_validation, \n", + " path=project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Model\n", + "\n", + "You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n", + "You will see the currently running iterations printing to the console." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = experiment.submit(automl_config, show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for monitoring runs\n", + "\n", + "The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(local_run).show() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Retrieve All Child Runs\n", + "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(local_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + " \n", + "import pandas as pd\n", + "import seaborn as sns\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "cm = sns.light_palette(\"lightgreen\", as_cmap = True)\n", + "s = rundata.style.background_gradient(cmap = cm)\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = local_run.get_output()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any other metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# lookup_metric = \"accuracy\"\n", + "# best_run, fitted_model = local_run.get_output(metric=lookup_metric)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any iteration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# iteration = 3\n", + "# best_run, fitted_model = local_run.get_output(iteration=iteration)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register fitted model for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "description = 'AutoML Model'\n", + "tags = None\n", + "local_run.register_model(description=description, tags=tags)\n", + "local_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Model " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()### Testing the Fitted Model\n", + "\n", + "#### Load Test Data\n", + "import sklearn\n", + "from pandas_ml import ConfusionMatrix\n", + "\n", + "remove = ('headers', 'footers', 'quotes')\n", + "categories = [\n", + " 'alt.atheism',\n", + " 'talk.religion.misc',\n", + " 'comp.graphics',\n", + " 'sci.space',\n", + "]\n", + "\n", + "\n", + "data_test = fetch_20newsgroups(subset='test', categories=categories,\n", + " shuffle=True, random_state=42,\n", + " remove=remove)\n", + "\n", + "vectorizer = HashingVectorizer(stop_words='english', alternate_sign=False,\n", + " n_features=2**16)\n", + "\n", + "X_test = vectorizer.transform(data_test.data)\n", + "y_test = data_test.target\n", + "\n", + "#### Testing our best pipeline\n", + "\n", + "ypred = fitted_model.predict(X_test)\n", + "ypred_strings = [categories[i] for i in ypred]\n", + "ytest_strings = [categories[i] for i in y_test]\n", + "\n", + "cm = ConfusionMatrix(ytest_strings, ypred_strings)\n", + "print(cm)\n", + "cm.plot()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/07.auto-ml-exploring-previous-runs.ipynb b/automl/07.auto-ml-exploring-previous-runs.ipynb new file mode 100644 index 000000000..ee6a5bb15 --- /dev/null +++ b/automl/07.auto-ml-exploring-previous-runs.ipynb @@ -0,0 +1,329 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 07: Exploring previous runs\n", + "\n", + "In this example we present some examples on navigating previously executed runs. We also show how you can download a fitted model for any previous run.\n", + "\n", + "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. List all Experiments for the workspace\n", + "2. List all AutoML Runs for a given project\n", + "3. Get details for a AutoML Run. (Automl settings, run widget & all metrics)\n", + "4. Downlaod fitted pipeline for any iteration\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# List all AutoML Experiments in a Workspace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "import re\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.run import Run\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "experiment_list = Experiment.list(workspace=ws)\n", + "\n", + "summary_df = pd.DataFrame(index = ['No of Runs'])\n", + "pattern = re.compile('^AutoML_[^_]*$')\n", + "for experiment in experiment_list:\n", + " all_runs = list(experiment.get_runs())\n", + " automl_runs = []\n", + " for run in all_runs:\n", + " if(pattern.match(run.id)):\n", + " automl_runs.append(run) \n", + " summary_df[experiment.name] = [len(automl_runs)]\n", + " \n", + "pd.set_option('display.max_colwidth', -1)\n", + "summary_df.T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# List AutoML runs for an Experiment\n", + "You can set Experiment name with any experiment name from the result of the previous cell to load the AutoML runs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment_name = 'automl-local-classification' # Replace this with any project name from previous cell\n", + "\n", + "proj = ws.experiments()[experiment_name]\n", + "summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name'])\n", + "pattern = re.compile('^AutoML_[^_]*$')\n", + "all_runs = list(proj.get_runs(properties={'azureml.runsource': 'automl'}))\n", + "for run in all_runs:\n", + " if(pattern.match(run.id)):\n", + " properties = run.get_properties()\n", + " tags = run.get_tags()\n", + " amlsettings = eval(properties['RawAMLSettingsString'])\n", + " if 'iterations' in tags:\n", + " iterations = tags['iterations']\n", + " else:\n", + " iterations = properties['num_iterations']\n", + " summary_df[run.id] = [amlsettings['task_type'], run.get_details()['status'], properties['primary_metric'], iterations, properties['target'], amlsettings['name']]\n", + " \n", + "from IPython.display import HTML\n", + "projname_html = HTML(\"

{}

\".format(proj.name))\n", + "\n", + "from IPython.display import display\n", + "display(projname_html)\n", + "display(summary_df.T)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get Details for a Auto ML Run\n", + "\n", + "Copy the project name and run id from the previous cell output to find more details on a particular run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_id = 'AutoML_b7c4076b-181d-4ef4-ab9f-36bb44c1e36c'#<> # Replace with a Auto ML Run ID like 'AutoML_a5a90841-8915-4fba-91a4-bbbfc07dd132'\n", + "\n", + "from azureml.train.widgets import RunDetails\n", + "\n", + "experiment = Experiment(ws, experiment_name)\n", + "ml_run = AutoMLRun(experiment=experiment, run_id=run_id)\n", + "\n", + "summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name', 'Start Time', 'End Time'])\n", + "properties = ml_run.get_properties()\n", + "tags = ml_run.get_tags()\n", + "status = ml_run.get_details()\n", + "amlsettings = eval(properties['RawAMLSettingsString'])\n", + "if 'iterations' in tags:\n", + " iterations = tags['iterations']\n", + "else:\n", + " iterations = properties['num_iterations']\n", + "start_time = None\n", + "if 'startTimeUtc' in status:\n", + " start_time = status['startTimeUtc']\n", + "end_time = None\n", + "if 'endTimeUtc' in status:\n", + " end_time = status['endTimeUtc']\n", + "summary_df[ml_run.id] = [amlsettings['task_type'], status['status'], properties['primary_metric'], iterations, properties['target'], amlsettings['name'], start_time, end_time]\n", + "display(HTML('

Runtime Details

'))\n", + "display(summary_df)\n", + "\n", + "#settings_df = pd.DataFrame(data=amlsettings, index=[''])\n", + "display(HTML('

AutoML Settings

'))\n", + "display(amlsettings)\n", + "\n", + "display(HTML('

Iterations

'))\n", + "RunDetails(ml_run).show() \n", + "\n", + "children = list(ml_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + "\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "cm = sns.light_palette(\"lightgreen\", as_cmap=True)\n", + "s = rundata.style.background_gradient(cmap=cm)\n", + "display(HTML('

Metrics

'))\n", + "display(s)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download fitted models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download best model for any given metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metric = 'AUC_weighted' # Replace with a metric name\n", + "best_run, fitted_model = ml_run.get_output(metric=metric)\n", + "fitted_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download model for any given iteration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 4 # Replace with a interation number\n", + "best_run, fitted_model = ml_run.get_output(iteration=iteration)\n", + "fitted_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Register fitted model for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "description = 'AutoML Model'\n", + "tags = None\n", + "ml_run.register_model(description=description, tags=tags)\n", + "ml_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Register best model for any given metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "metric = 'AUC_weighted' # Replace with a metric name\n", + "description = 'AutoML Model'\n", + "tags = None\n", + "ml_run.register_model(description=description, tags=tags, metric=metric)\n", + "ml_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Register model for any given iteration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 4 # Replace with a interation number\n", + "description = 'AutoML Model'\n", + "tags = None\n", + "ml_run.register_model(description=description, tags=tags, iteration=iteration)\n", + "ml_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/08.auto-ml-remote-execution-with-text-file-on-DSVM.ipynb b/automl/08.auto-ml-remote-execution-with-text-file-on-DSVM.ipynb new file mode 100644 index 000000000..7d00aba48 --- /dev/null +++ b/automl/08.auto-ml-remote-execution-with-text-file-on-DSVM.ipynb @@ -0,0 +1,541 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 08: Remote Execution with Text file\n", + "\n", + "In this sample accesses a data file on a remote DSVM. This is more efficient than reading the file from Blob storage in the get_data method.\n", + "\n", + "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Configuring the DSVM to allow files to be access directly by the get_data method.\n", + "2. get_data returning data from a local file.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a Workspace. For AutoML you would need to create an Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for experiment\n", + "experiment_name = 'automl-remote-dsvm-file'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-remote-dsvm-file'\n", + "\n", + "experiment=Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data=output, index=['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a Remote Linux DSVM\n", + "Note: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n", + "\n", + "**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you can switch to a different port (such as 5022), you can append the port number to the address. [Read more](https://render.githubusercontent.com/documentation/sdk/ssh-issue.md) on this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cws.compute_targets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import DsvmCompute\n", + "\n", + "dsvm_name = 'mydsvm'\n", + "try:\n", + " dsvm_compute = DsvmCompute(ws, dsvm_name)\n", + " print('found existing dsvm.')\n", + "except:\n", + " print('creating new dsvm.')\n", + " dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n", + " dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n", + " dsvm_compute.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a RunConfiguration with DSVM name\n", + "Run the below code to tell the runconfiguration the name of your dsvm." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "\n", + "run_config = RunConfiguration()\n", + "run_config.target = dsvm_compute" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Change index to use master packages\n", + "If you want to use master rather than preview run the below code. Once Public preview is launched we would not need this cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import CondaDependencies\n", + "\n", + "cd = CondaDependencies()\n", + "\n", + "cd.remove_pip_option(pip_option=\"--index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1\")\n", + "cd.set_pip_index_url(index_url=\"--extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/master/588E708E0DF342C4A80BD954289657CF\")\n", + "\n", + "run_config.environment.python.conda_dependencies = cd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Copy data file to the DSVM\n", + "Download the data file.\n", + "Copy the data file to the DSVM under the folder:\n", + "\t\t~/.azureml/share/{workspacename}/{projectname}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"https://automldemods.blob.core.windows.net/datasets/PlayaEvents2016,_1.6MB,_3.4k-rows.cleaned.2.tsv\",\n", + " delimiter=\"\\t\", quotechar='\"')\n", + "df.to_csv(\"data.tsv\", sep=\"\\t\", quotechar='\"', index=False)\n", + "\n", + "# Now copy the file data.tsv to the folder /tmp/data on the DSVM" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Get Data File\n", + "For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n", + "\n", + "The *get_data()* function returns a [dictionary](README.md#getdata)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.exists(project_folder):\n", + " os.makedirs(project_folder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $project_folder/get_data.py\n", + "\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import LabelEncoder\n", + "import os\n", + "\n", + "def get_data():\n", + " # Burning man 2016 data\n", + " df = pd.read_csv('/tmp/data/data.tsv',\n", + " delimiter=\"\\t\", quotechar='\"')\n", + " # get integer labels\n", + " le = LabelEncoder()\n", + " le.fit(df[\"Label\"].values)\n", + " y = le.transform(df[\"Label\"].values)\n", + " df = df.drop([\"Label\"], axis=1)\n", + "\n", + " df_train, _, y_train, _ = train_test_split(df, y, test_size=0.1, random_state=42)\n", + "\n", + " return { \"X\" : df.values, \"y\" : y }" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate AutoML \n", + "\n", + "You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n", + "\n", + "Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to the fit method.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**primary_metric**|This is the metric that you want to optimize.
Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted|\n", + "|**max_time_sec**|Time limit in seconds for each iterations|\n", + "|**iterations**|Number of iterations. In each iteration Auto ML Classifier trains the data with a specific pipeline|\n", + "|**n_cross_validations**|Number of cross validation splits|\n", + "|**concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM\n", + "|**preprocess**| *True/False*
Setting this to *True* enables Auto ML Classifier to perform preprocessing
on the input to handle *missing data*, and perform some common *feature extraction*|\n", + "|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.
Default is *1*, you can set it to *-1* to use all cores|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_settings = {\n", + " \"name\": \"AutoML_Demo_Experiment_{0}\".format(time.time()),\n", + " \"max_time_sec\": 12000,\n", + " \"iterations\": 10,\n", + " \"n_cross_validations\": 5,\n", + " \"primary_metric\": 'AUC_weighted',\n", + " \"preprocess\": True,\n", + " \"max_cores_per_iteration\": 2,\n", + " \"verbosity\": logging.INFO\n", + "}\n", + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " path=project_folder,\n", + " run_configuration = run_config,\n", + " data_script = project_folder + \"./get_data.py\",\n", + " **automl_settings\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Model \n", + "\n", + "You can call the *fit* method on the AutoML instance and pass the dsvm runconfig name. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets/models even when the experiment is running to retreive the best model up to that point. Once you are satisfied with the model you can cancel a particular iteration or the whole run.\n", + "\n", + "\n", + "*fit* method on Auto ML Classifier triggers the training of the model. It can be called with the following parameters\n", + "\n", + "**Note**: You cannot pass Numpy arrays directly to the fit method in case of remote executions.\n", + "\n", + "|**Parameter**|**Description**|\n", + "|-|-|\n", + "|**compute_target**|Indicates the compute used for training. local indicates train on the same compute which hosts the jupyter notebook.
For DSVM and Batch AI please refer to the relevant notebooks.|\n", + "|**show_output**| True/False to turn on/off console output|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_run = experiment.submit(automl_config, show_output=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the Results \n", + "#### Widget for monitoring runs\n", + "\n", + "The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under /tmp/azureml_run/{iterationid}/azureml-logs\n", + "\n", + "NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(remote_run).show() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Retrieve All Child Runs\n", + "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(remote_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + "\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "cm = sns.light_palette(\"lightgreen\", as_cmap = True)\n", + "s = rundata.style.background_gradient(cmap = cm)\n", + "s\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Canceling runs\n", + "You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cancel the ongoing experiment and stop scheduling new iterations\n", + "# remote_run.cancel()\n", + "\n", + "# Cancel iteration 1 and move onto iteration 2\n", + "# remote_run.cancel_iteration(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = remote_run.get_output()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any other metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# lookup_metric = \"accuracy\"\n", + "# best_run, fitted_model = remote_run.get_output(metric=lookup_metric)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any iteration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# iteration = 1\n", + "# best_run, fitted_model = remote_run.get_output(iteration=iteration)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register fitted model for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "description = 'AutoML Model'\n", + "tags = None\n", + "remote_run.register_model(description=description, tags=tags)\n", + "remote_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Model \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sklearn\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from pandas_ml import ConfusionMatrix\n", + "\n", + "df = pd.read_csv(\"https://automldemods.blob.core.windows.net/datasets/PlayaEvents2016,_1.6MB,_3.4k-rows.cleaned.2.tsv\",\n", + " delimiter=\"\\t\", quotechar='\"')\n", + "\n", + "# get integer labels\n", + "le = LabelEncoder()\n", + "le.fit(df[\"Label\"].values)\n", + "y = le.transform(df[\"Label\"].values)\n", + "df = df.drop([\"Label\"], axis=1)\n", + "\n", + "_, df_test, _, y_test = train_test_split(df, y, test_size=0.1, random_state=42)\n", + "\n", + "ypred = fitted_model.predict(df_test.values)\n", + "\n", + "ypred_strings = le.inverse_transform(ypred)\n", + "ytest_strings = le.inverse_transform(y_test)\n", + "\n", + "cm = ConfusionMatrix(ytest_strings, ypred_strings)\n", + "\n", + "print(cm)\n", + "\n", + "cm.plot()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/09.auto-ml-classification-with-deployment.ipynb b/automl/09.auto-ml-classification-with-deployment.ipynb new file mode 100644 index 000000000..a80233479 --- /dev/null +++ b/automl/09.auto-ml-classification-with-deployment.ipynb @@ -0,0 +1,480 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 09: Classification with deployment\n", + "\n", + "In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use the AutoML Classifier for a simple classification problem.\n", + "\n", + "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Creating an Experiment using an existing Workspace\n", + "2. Instantiating AutoMLConfig\n", + "3. Training the Model using local compute\n", + "4. Exploring the results\n", + "5. Registering the model\n", + "6. Creating Image and creating aci service\n", + "7. Testing the aci service\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a Workspace. For AutoML you would need to create an Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for experiment\n", + "experiment_name = 'automl-local-classification'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-local-classification'\n", + "\n", + "experiment=Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data=output, index=['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate Auto ML Config\n", + "\n", + "Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**task**|classification or regression|\n", + "|**primary_metric**|This is the metric that you want to optimize.
Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted|\n", + "|**max_time_sec**|Time limit in seconds for each iterations|\n", + "|**iterations**|Number of iterations. In each iteration Auto ML trains the data with a specific pipeline|\n", + "|**n_cross_validations**|Number of cross validation splits|\n", + "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", + "|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]
Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n", + "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. |" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()\n", + "X_digits = digits.data[10:,:]\n", + "y_digits = digits.target[10:]\n", + "\n", + "automl_config = AutoMLConfig(task = 'classification',\n", + " name=experiment_name,\n", + " debug_log='automl_errors.log',\n", + " primary_metric='AUC_weighted',\n", + " max_time_sec=1200,\n", + " iterations=10,\n", + " n_cross_validations=2,\n", + " verbosity=logging.INFO,\n", + " X = X_digits, \n", + " y = y_digits,\n", + " path=project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Model\n", + "\n", + "You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n", + "You will see the currently running iterations printing to the console." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = experiment.submit(automl_config, show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = local_run.get_output()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register fitted model for deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "description = 'AutoML Model'\n", + "tags = None\n", + "model = local_run.register_model(description=description, tags=tags, iteration=8)\n", + "local_run.model_id # Use this id to deploy the model as a web service in Azure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Scoring script ###" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile score.py\n", + "import pickle\n", + "import json\n", + "import numpy\n", + "from sklearn.externals import joblib\n", + "from azureml.core.model import Model\n", + "\n", + "\n", + "def init():\n", + " global model\n", + " model_path = Model.get_model_path(model_name = 'AutoML3d05a78138') # this name is model.id of model that we want to deploy\n", + " # deserialize the model file back into a sklearn model\n", + " model = joblib.load(model_path)\n", + "\n", + "def run(rawdata):\n", + " try:\n", + " data = json.loads(rawdata)['data']\n", + " data = numpy.array(data)\n", + " result = model.predict(data)\n", + " except Exception as e:\n", + " result = str(e)\n", + " return json.dumps({\"error\": result})\n", + " return json.dumps({\"result\":result.tolist()})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create yml file for env" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To ensure the consistence the fit results with the training results, the sdk dependence versions need to be the same as the environment that trains the model. Details about retrieving the versions can be found in notebook 12.auto-ml-retrieve-the-training-sdk-versions.ipynb." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment_name = 'automl-local-classification'\n", + "\n", + "experiment = Experiment(ws, experiment_name)\n", + "ml_run = AutoMLRun(experiment=experiment, run_id=local_run.id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dependencies = ml_run.get_run_sdk_dependencies(iteration=7)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n", + " print('{}\\t{}'.format(p, dependencies[p]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then copy the version " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile myenv.yml\n", + "name: myenv\n", + "channels:\n", + " - defaults\n", + "dependencies:\n", + " - pip:\n", + " - numpy==1.14.2\n", + " - scikit-learn==0.19.2\n", + " - --index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1\n", + " - --extra-index-url https://pypi.python.org/simple\n", + " - azureml-requirements\n", + " - azureml-train-automl==0.1.13\n", + " - azureml-sdk==0.1.13\n", + " - azureml-core==0.1.13" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Image ###" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.image import ContainerImage\n", + "\n", + "image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n", + " runtime = \"python\",\n", + " conda_file = \"myenv.yml\",\n", + " description = \"Image for automl classification sample\",\n", + " tags = [\"AutoML\",\"classification\", \"version_1\"]\n", + " )\n", + "\n", + "image = ContainerImage.create(name = \"automlsampleimage2\",\n", + " # this is the model object\n", + " models = [model],\n", + " image_config = image_config,\n", + " workspace = ws)\n", + "\n", + "image.wait_for_creation(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deploy Image as web service on Azure Container Instance ###" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.webservice import AciWebservice\n", + "\n", + "aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n", + " memory_gb = 1, \n", + " tags = ['automl','classification'], \n", + " description = 'sample service for Automl Classification')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.webservice import Webservice\n", + "\n", + "aci_service_name = 'automl-sample-3'\n", + "print(aci_service_name)\n", + "aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n", + " image = image,\n", + " name = aci_service_name,\n", + " workspace = ws)\n", + "aci_service.wait_for_deployment(True)\n", + "print(aci_service.state)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### To delete a service ##" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#aci_service.delete()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### To get logs from deployed service ###" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#aci_service.get_logs()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test Web Service ###" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Randomly select digits and test\n", + "digits = datasets.load_digits()\n", + "X_digits = digits.data[:10, :]\n", + "y_digits = digits.target[:10]\n", + "images = digits.images[:10]\n", + "\n", + "for index in np.random.choice(len(y_digits), 3):\n", + " print(index)\n", + " test_sample = json.dumps({'data':X_digits[index:index + 1].tolist()})\n", + " predicted = aci_service.run(input_data = test_sample)\n", + " label = y_digits[index]\n", + " predictedDict = json.loads(predicted)\n", + " title = \"Label value = %d Predicted value = %s \" % ( label,predictedDict['result'][0])\n", + " fig = plt.figure(1, figsize=(3,3))\n", + " ax1 = fig.add_axes((0,0,.8,.8))\n", + " ax1.set_title(title)\n", + " plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n", + " plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/10.auto-ml-multi-output-example.ipynb b/automl/10.auto-ml-multi-output-example.ipynb new file mode 100644 index 000000000..6e050ee21 --- /dev/null +++ b/automl/10.auto-ml-multi-output-example.ipynb @@ -0,0 +1,286 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 10: Multi output Example for AutoML" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook shows an example to use AutoML to train the multi output problems by leveraging the correlation between the outputs using indicator vectors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Transformer functions\n", + "The transformation of the input are happening for input X and Y as following, e.g. Y = {y_1, y_2}, then X becomes\n", + " \n", + "X 1 0\n", + " \n", + "X 0 1\n", + "\n", + "and Y becomes,\n", + "\n", + "y_1\n", + "\n", + "y_2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy import sparse\n", + "from scipy import linalg\n", + "\n", + "#Transformer functions\n", + "def multi_output_transform_x_y(X, Y):\n", + " X_new = multi_output_transformer_x(X, Y.shape[1])\n", + " y_new = multi_output_transform_y(Y)\n", + " return X_new, y_new\n", + "\n", + "def multi_output_transformer_x(X, number_of_columns_Y):\n", + " indicator_vecs = linalg.block_diag(*([np.ones((X.shape[0], 1))] * number_of_columns_Y))\n", + " if sparse.issparse(X):\n", + " X_new = sparse.vstack(np.tile(X, number_of_columns_Y))\n", + " indicator_vecs = sparse.coo_matrix(indicator_vecs)\n", + " X_new = sparse.hstack((X_new, indicator_vecs))\n", + " else:\n", + " X_new = np.tile(X, (number_of_columns_Y, 1))\n", + " X_new = np.hstack((X_new, indicator_vecs))\n", + " return X_new\n", + "\n", + "def multi_output_transform_y(Y):\n", + " return Y.reshape(-1, order=\"F\")\n", + " \n", + "def multi_output_inverse_transform_y(y, number_of_columns_y):\n", + " return y.reshape((-1, number_of_columns_y), order=\"F\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## AutoML experiment set up" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for experiment\n", + "experiment_name = 'automl-local-classification'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-local-classification'\n", + "\n", + "experiment=Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data=output, index=['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a random dataset for the test purpose " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rng = np.random.RandomState(1)\n", + "X_train = np.sort(200 * rng.rand(600, 1) - 100, axis=0)\n", + "Y_train = np.array([np.pi * np.sin(X_train).ravel(), np.pi * np.cos(X_train).ravel()]).T\n", + "Y_train += (0.5 - rng.rand(*Y_train.shape))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Perform X and Y transformation using transformer function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_transformed, y_train_transformed = multi_output_transform_x_y(X_train, Y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_config = AutoMLConfig(task = 'regression',\n", + " debug_log='automl_errors_multi.log',\n", + " primary_metric='r2_score',\n", + " iterations=10,\n", + " n_cross_validations=2,\n", + " verbosity=logging.INFO,\n", + " X=X_train_transformed,\n", + " y=y_train_transformed,\n", + " path=project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fit the transformed data " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = experiment.submit(automl_config, show_output=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the best fit model\n", + "best_run, fitted_model = local_run.get_output()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate random data set for predicting\n", + "X_predict = np.sort(200 * rng.rand(200, 1) - 100, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Transform predict data\n", + "X_predict_transformed = multi_output_transformer_x(X_predict, Y_train.shape[1])\n", + "# Predict and inverse transform the prediction\n", + "y_predict = fitted_model.predict(X_predict_transformed)\n", + "Y_predict = multi_output_inverse_transform_y(y_predict, Y_train.shape[1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(Y_predict)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/11.auto-ml-sample-weight.ipynb b/automl/11.auto-ml-sample-weight.ipynb new file mode 100644 index 000000000..1206a049a --- /dev/null +++ b/automl/11.auto-ml-sample-weight.ipynb @@ -0,0 +1,262 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 11: Sample weight\n", + "\n", + "In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use sample weight with the AutoML Classifier.\n", + "Sample weight is used where some sample values are more important than others.\n", + "\n", + "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. How to specifying sample_weight\n", + "2. The difference that it makes to test results\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a Workspace. For AutoML you would need to create an Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for experiment\n", + "experiment_name = 'non_sample_weight_experiment'\n", + "sample_weight_experiment_name = 'sample_weight_experiment'\n", + "\n", + "# project folder\n", + "project_folder = './sample_projects/automl-local-classification'\n", + "\n", + "experiment=Experiment(ws, experiment_name)\n", + "sample_weight_experiment=Experiment(ws, sample_weight_experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace Name'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data = output, index = ['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate Auto ML Config\n", + "\n", + "Instantiate two AutoMLConfig Objects. One will be used with sample_weight and one without." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()\n", + "X_digits = digits.data[100:,:]\n", + "y_digits = digits.target[100:]\n", + "\n", + "# The example makes the sample weight 0.9 for the digit 4 and 0.1 for all other digits.\n", + "# This makes the model more likely to classify as 4 if the image it not clear.\n", + "sample_weight = np.array([(0.9 if x == 4 else 0.01) for x in y_digits])\n", + "\n", + "automl_classifier = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " primary_metric = 'AUC_weighted',\n", + " max_time_sec = 12000,\n", + " iterations = 10,\n", + " n_cross_validations = 2,\n", + " verbosity = logging.INFO,\n", + " X = X_digits, \n", + " y = y_digits,\n", + " path=project_folder)\n", + "\n", + "automl_sample_weight = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " primary_metric = 'AUC_weighted',\n", + " max_time_sec = 12000,\n", + " iterations = 10,\n", + " n_cross_validations = 2,\n", + " verbosity = logging.INFO,\n", + " X = X_digits, \n", + " y = y_digits,\n", + " sample_weight = sample_weight,\n", + " path=project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Models\n", + "\n", + "You can call the fit method on the AutoML instance and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n", + "You will see the currently running iterations printing to the console.\n", + "\n", + "*fit* method on Auto ML Classifier triggers the training of the model. It can be called with the following parameters\n", + "\n", + "|**Parameter**|**Description**|\n", + "|-|-|\n", + "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n", + "|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]
Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n", + "|**sample_weight**|(sparse) array-like, shape must be the same as **y**.
A weight value for each label. Higher values indicate that the sample is more important.|\n", + "|**compute_target**|Indicates the compute used for training. local indicates train on the same compute which hosts the jupyter notebook.
For DSVM and Batch AI please refer to the relevant notebooks.|\n", + "|**show_output**| True/False to turn on/off console output|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = experiment.submit(automl_classifier, show_output=True)\n", + "sample_weight_run = sample_weight_experiment.submit(automl_sample_weight, show_output=True)\n", + "\n", + "best_run, fitted_model = local_run.get_output()\n", + "best_run_sample_weight, fitted_model_sample_weight = sample_weight_run.get_output()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Models\n", + "\n", + "#### Load Test Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()\n", + "X_digits = digits.data[:100, :]\n", + "y_digits = digits.target[:100]\n", + "images = digits.images[:100]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Compare the pipelines\n", + "The prediction from the sample weight model correctly predicts all 4's including one that the model without sample weights does not. However, it also predicts 4 for two images that are not labelled as 4." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Randomly select digits and test\n", + "for index in range(0,len(y_digits)):\n", + " predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n", + " predicted_sample_weight = fitted_model_sample_weight.predict(X_digits[index:index + 1])[0]\n", + " label = y_digits[index]\n", + " if predicted == 4 or predicted_sample_weight == 4 or label == 4:\n", + " title = \"Label value = %d Predicted value = %d Prediced with sample weight = %d\" % ( label,predicted,predicted_sample_weight)\n", + " fig = plt.figure(1, figsize=(3,3))\n", + " ax1 = fig.add_axes((0,0,.8,.8))\n", + " ax1.set_title(title)\n", + " plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n", + " plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/12.auto-ml-retrieve-the-training-sdk-versions.ipynb b/automl/12.auto-ml-retrieve-the-training-sdk-versions.ipynb new file mode 100644 index 000000000..359c75c51 --- /dev/null +++ b/automl/12.auto-ml-retrieve-the-training-sdk-versions.ipynb @@ -0,0 +1,241 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 12: Retrieving Training SDK Versions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "import random\n", + "\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from sklearn import datasets\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.train.automl import AutoMLConfig\n", + "from azureml.train.automl.run import AutoMLRun\n", + "from azureml.train.automl.utilities import get_sdk_dependencies" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Retrieve the SDK versions in the current env" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To retrieve the SDK versions in the current env, simple running get_sdk_dependencies()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "get_sdk_dependencies()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. Training Model Using AutoML" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for experiment\n", + "experiment_name = 'automl-local-classification'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-local-classification'\n", + "\n", + "experiment=Experiment(ws, experiment_name)\n", + "\n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data=output, index=['']).T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits = datasets.load_digits()\n", + "X_digits = digits.data[10:,:]\n", + "y_digits = digits.target[10:]\n", + "\n", + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log='automl_errors.log',\n", + " primary_metric='AUC_weighted',\n", + " iterations=3,\n", + " n_cross_validations=2,\n", + " verbosity=logging.INFO,\n", + " X = X_digits, \n", + " y = y_digits,\n", + " path=project_folder)\n", + "\n", + "local_run = experiment.submit(automl_config, show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Retrieve the SDK versions from RunHistory" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To get the SDK versions from RunHistory, first the RunId need to be recorded. This can either be done by copy it from the output message or retieve if after each run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_id = local_run.id\n", + "print(run_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Initialize a new AutoMLRunClass." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment_name = 'automl-local-classification'\n", + "#run_id = 'AutoML_c0585b1f-a0e6-490b-84c7-3a099468b28e'\n", + "\n", + "experiment = Experiment(ws, experiment_name)\n", + "ml_run = AutoMLRun(experiment=experiment, run_id=run_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get parent training SDK versions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ml_run.get_run_sdk_dependencies()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the traning SDK versions of a specific run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ml_run.get_run_sdk_dependencies(iteration=2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/13.auto-ml-dataprep.ipynb b/automl/13.auto-ml-dataprep.ipynb new file mode 100644 index 000000000..5da708f83 --- /dev/null +++ b/automl/13.auto-ml-dataprep.ipynb @@ -0,0 +1,570 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AutoML 13: Prepare Data using `azureml.dataprep`\n", + "In this example we showcase how you can use `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone - full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n", + "\n", + "Make sure you have executed the [setup](00.configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you would see\n", + "1. Defining data loading and preparation steps in a `Dataflow` using `azureml.dataprep`\n", + "2. Passing the `Dataflow` to AutoML for local run\n", + "3. Passing the `Dataflow` to AutoML for remote run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Install `azureml.dataprep` SDK" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please restart your kernel after the below installs.\n", + "\n", + "Tornado must be downgraded to a pre-5 version due to a known Tornado x Jupyter event loop bug." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade --extra-index-url https://dataprepdownloads.azureedge.net/pypi/monthly-AE98437A2C8F6F45842C/latest azureml-dataprep --no-cache-dir --force-reinstall\n", + "!pip install tornado==4.5.1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagnostics\n", + "\n", + "Opt-in diagnostics for better experience, quality, and security of future releases" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.telemetry import set_diagnostics_collection\n", + "set_diagnostics_collection(send_diagnostics=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "As part of the setup you have already created a Workspace. For AutoML you would need to create an Experiment. An Experiment is a named object in a Workspace, which is used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "\n", + "import pandas as pd\n", + "\n", + "import azureml.core\n", + "from azureml.core.compute import DsvmCompute\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.runconfig import CondaDependencies\n", + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.workspace import Workspace\n", + "import azureml.dataprep as dprep\n", + "from azureml.train.automl import AutoMLConfig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + " \n", + "# choose a name for experiment\n", + "experiment_name = 'automl-dataprep-classification'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-dataprep-classification'\n", + " \n", + "experiment=Experiment(ws, experiment_name)\n", + " \n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace Name'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "pd.DataFrame(data = output, index = ['']).T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading Data using DataPrep" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You can use `smart_read_file` which intelligently figures out delimiters and datatypes of a file\n", + "# data pulled from sklearn.datasets.load_digits()\n", + "simple_example_data_root = 'https://dprepdata.blob.core.windows.net/automl-notebook-data/'\n", + "X = dprep.smart_read_file(simple_example_data_root + 'X.csv').skip(1) # remove header\n", + "\n", + "# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter) \n", + "# and convert column types manually. \n", + "# Here we read a comma delimited file and convert all columns to integers.\n", + "y = dprep.read_csv(simple_example_data_root + 'y.csv').to_long(dprep.ColumnSelector(term='.*', use_regex=True))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Review the Data Preparation Result\n", + "\n", + "You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X.skip(1).head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate AutoML Settings\n", + "\n", + "This creates a general Auto ML Settings applicable for both Local and Remote runs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_settings = {\n", + " \"max_time_sec\": 600,\n", + " \"iterations\": 2,\n", + " \"primary_metric\": 'AUC_weighted',\n", + " \"preprocess\": False,\n", + " \"verbosity\": logging.INFO,\n", + " \"n_cross_validations\" : 3\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Local Run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pass data with Dataflows\n", + "\n", + "The `Dataflow` objects captured above can be passed to `submit` method for local run. AutoML will retrieve the results from the `Dataflow` for model training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " X = X,\n", + " y = y, \n", + " **automl_settings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = experiment.submit(automl_config, show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Remote Run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create or Attach a Remote Linux DSVM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dsvm_name = 'mydsvm'\n", + "try:\n", + " dsvm_compute = DsvmCompute(ws, dsvm_name)\n", + " print('found existing dsvm.')\n", + "except:\n", + " print('creating new dsvm.')\n", + " dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n", + " dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n", + " dsvm_compute.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Update Conda Dependency file to have AutoML and DataPrep SDK\n", + "\n", + "Currently AutoML and DataPrep SDK is not installed with Azure ML SDK by default. Due to this we update the conda dependency file to add such dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd = CondaDependencies()\n", + "cd.set_pip_index_url(index_url=\"--index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/master/588E708E0DF342C4A80BD954289657CF\")\n", + "cd.set_pip_index_url(index_url=\"--extra-index-url https://dataprepdownloads.azureedge.net/pypi/monthly-AE98437A2C8F6F45842C/latest --extra-index-url https://pypi.python.org/simple\")\n", + "cd.remove_pip_package(pip_package=\"azureml-defaults\")\n", + "cd.add_pip_package(pip_package='azureml-core')\n", + "cd.add_pip_package(pip_package='azureml-telemetry')\n", + "cd.add_pip_package(pip_package='azureml-train-automl')\n", + "cd.add_pip_package(pip_package='azureml-dataprep')\n", + "cd.add_pip_package(pip_package='tornado==4.5.1')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a RunConfiguration with DSVM name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run_config = RunConfiguration(conda_dependencies=cd)\n", + "run_config.target = dsvm_compute\n", + "run_config.auto_prepare_environment = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pass data with Dataflows\n", + "\n", + "The `Dataflow` objects captured above can also be passed to `submit` method for remote run. AutoML will serialize the `Dataflow` and send to remote compute target. The `Dataflow` will not be evaluated locally." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " path=project_folder,\n", + " run_configuration = run_config,\n", + " X = X,\n", + " y = y,\n", + " **automl_settings)\n", + "remote_run = experiment.submit(automl_config, show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for monitoring runs\n", + "\n", + "The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(local_run).show() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Retrieve All Child Runs\n", + "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(local_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metricslist[int(properties['iteration'])] = metrics\n", + " \n", + "import pandas as pd\n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "rundata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = local_run.get_output()\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any other metric\n", + "Give me the run and the model that has the smallest `log_loss`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lookup_metric = \"log_loss\"\n", + "best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model based on any iteration\n", + "Give me the run and the model from the 1st iteration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 0\n", + "best_run, fitted_model = local_run.get_output(iteration = iteration)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing the Fitted Model \n", + "\n", + "#### Load Test Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import datasets\n", + "\n", + "digits = datasets.load_digits()\n", + "X_digits = digits.data[:10, :]\n", + "y_digits = digits.target[:10]\n", + "images = digits.images[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Testing our best pipeline\n", + "We will try to predict 2 digits and see how our model works." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Randomly select digits and test\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.pyplot import imshow\n", + "import random\n", + "import numpy as np\n", + "\n", + "for index in np.random.choice(len(y_digits), 2):\n", + " print(index)\n", + " predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n", + " label = y_digits[index]\n", + " title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n", + " fig = plt.figure(1, figsize=(3,3))\n", + " ax1 = fig.add_axes((0,0,.8,.8))\n", + " ax1.set_title(title)\n", + " plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Appendix" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Capture the Dataflows to use for AutoML later\n", + "\n", + "`Dataflow` objects are immutable. Each of them is composed of a list of data preparation steps. A `Dataflow` can be branched at any point for further usage." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# sklearn.digits.data + target\n", + "digits_complete = dprep.smart_read_file('https://dprepdata.blob.core.windows.net/automl-notebook-data/digits-complete.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`digits_complete` (sourced from `sklearn.datasets.load_digits()`)is forked into `dflow_X` to capture all the feature columns and `dflow_y` to capture the label column." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "digits_complete.to_pandas_dataframe().shape\n", + "labels_column = 'Column64'\n", + "dflow_X = digits_complete.drop_columns(columns=[labels_column])\n", + "dflow_y = digits_complete.keep_columns(columns=[labels_column])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/automl/README.md b/automl/README.md new file mode 100644 index 000000000..c2743277c --- /dev/null +++ b/automl/README.md @@ -0,0 +1,230 @@ +# Table of Contents +1. [Auto ML Introduction](#introduction) +2. [Prerequisites](#prerequisites) +3. [Running samples in Azure Notebooks](#azurenotebooks) +4. [Running samples in a Local Conda environment](#localconda) +5. [Auto ML SDK Sample Notebooks](#samples) +6. [Documentation](#documentation) +7. [Running using python command](#pythoncommand) +8. [Troubleshooting](#troubleshooting) + +# Auto ML Introduction +AutoML builds high quality Machine Learning model for you by automating model selection and hyper parameter selection for you. Bring a labelled dataset that you want to build a model for, AutoML will give you a high quality machine learning model that you can use for predictions. + +If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyper parameter selection and in one step creates a high quality trained model for you to use. + +If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model selection, hyper parameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models and help you further tune the pipeline if you desire. + +# Prerequisites +### 1. Whitelist your subscription +The first thing you need is to get your subscription whitelisted. Please email your Azure Subscription Id (NOT your alias) to viennawhitelist@service.microsoft.com. Expect to receive response within 2 business days. + +### 2. Enable Your Subscription to access EUAP (optional) +Some SDK functionalities might initially be only available in the Azure Canary Region (eastus2euap, aka EUAP). To gain access to this region, please submit a request here: https://aka.ms/canaryintwhitelist. + +Note it appears that only subscriptions belonging to Microsoft tenant are approved. MSDN-based personal Azure subscriptions appeared to be not allowed. + +# Running samples in Azure Notebooks + +The simplest way to get started with using Auto ML and trying out the sample notebooks is with [Azure Notebooks](https://notebooks.azure.com/). + +### 1. Sign up with Azure Notebooks +- Browse to https://notebooks.azure.com and login using your [Microsoft account](https://account.microsoft.com/account). If you are a Microsoft employee you can use your @microsoft account. + +### 2. Create a Library +- Create a new library. This will host the sample notebooks. **Important:** Mark the library private. The default is public. + +### 3. Upload the samples to the Library +- [Download the samples](https://github.com/Azure/ViennaDocs/blob/master/PrivatePreview/notebooks/downloads/auto-ml-scenarios.zip) as zip and extract the contents to a local directory +- Click on **+New** link to Add items to the library and choose to upload **From Computer**. Upload all the files from the zip to the library. + +### 4. Running setup.ipynb +- Before running any samples you would need to run the configuration notebook. Click on 00.configuration.ipynb notebook +- If asked set the Kernel to Python 3.6 +- Execute the cells in the notebook to install the SDK and create a workspace. (*instructions in notebook*) + +### 5. Running Samples +- Follow the instructions in the individual notebooks to explore various features in AutoML + +# Running samples in a Local Conda environment + +It is best if you create a new conda environment locally to try this SDK, so it doesn't mess up with your existing Python environment. + +### 1. Install mini-conda from [here](https://conda.io/miniconda.html), choose Python 3.7 or higher. +- **Note**: if you already have conda installed, you can keep using it but it must be version 5.2 or later. If you have an previous version installed, you can update it using the command: conda update conda. +There's no need to install mini-conda specifically. + +### 2. Dowloading the sample notebooks +- [Download the samples](https://github.com/Azure/ViennaDocs/blob/master/PrivatePreview/notebooks/downloads/auto-ml-scenarios.zip) as zip and extract the contents to a local directory + +### 3. Setup a new conda environment +The automl_setup script creates a new conda environment, installs the necessary packages, configures the widget and starts jupyter notebook. +It takes the conda environment name as an optional parameter. The default conda environment name is azure_automl. The exact command depends on the operating system. It can take about 30 minutes to execute. +## Windows +Start a conda command windows, cd to the folder where the sample notebooks were extracted and then run: automl_setup +## Mac +Install "Command line developer tools" if it is not already installed (you can use the command: xcode-select --install). +Start a Terminal windows, cd to the folder where the sample notebooks were extracted and then run: bash automl_setup_mac.sh +## Linux +cd to the folder where the sample notebooks were extracted and then run: automl_setup_linux.sh + +### 4. Running configuration.ipynb +- Before running any samples you would need to run the configuration notebook. Click on 00.configuration.ipynb notebook +- Please make sure you use the Python [conda env:azure_automl] kernel when running this notebook. +- Execute the cells in the notebook to Register Machine Learning Services Resource Provider and create a workspace. (*instructions in notebook*) + +### 5. Running Samples +- Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks. +- Follow the instructions in the individual notebooks to explore various features in AutoML + +# Auto ML SDK Sample Notebooks +- [00.configuration.ipynb](00.configuration.ipynb) + - Register Machine Learning Services Resource Provider + - Create new Azure ML Workspace + - Save Workspace configuration file + +- [01.auto-ml-classification.ipynb](01.auto-ml-classification.ipynb) + - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits) + - Simple example of using Auto ML for classification + - Uses local compute for training + +- [02.auto-ml-regression.ipynb](02.auto-ml-regression.ipynb) + - Dataset: scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) + - Simple example of using Auto ML for regression + - Uses local compute for training + +- [03.auto-ml-remote-execution.ipynb](03.auto-ml-remote-execution.ipynb) + - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits) + - Example of using Auto ML for classification using a remote linux DSVM for training + - Parallel execution of iterations + - Async tracking of progress + - Cancelling individual iterations or entire run + - Retrieving models for any iteration or logged metric + - Specify automl settings as kwargs + +- [03b.auto-ml-remote-batchai.ipynb](03b.auto-ml-remote-batchai.ipynb) + - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits) + - Example of using Auto ML for classification using a remote Batch AI compute for training + - Parallel execution of iterations + - Async tracking of progress + - Cancelling individual iterations or entire run + - Retrieving models for any iteration or logged metric + - Specify automl settings as kwargs + +- [04.auto-ml-remote-execution-text-data-blob-store.ipynb](04.auto-ml-remote-execution-text-data-blob-store.ipynb) + - Dataset: [Burning Man 2016 dataset](https://innovate.burningman.org/datasets-page/) + - handling text data with preprocess flag + - Reading data from a blob store for remote executions + - using pandas dataframes for reading data + +- [05.auto-ml-missing-data-blacklist-early-termination.ipynb](05.auto-ml-missing-data-blacklist-early-termination.ipynb) + - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits) + - Blacklist certain pipelines + - Specify a target metrics to indicate stopping criteria + - Handling Missing Data in the input + +- [06.auto-ml-sparse-data-custom-cv-split.ipynb](06.auto-ml-sparse-data-custom-cv-split.ipynb) + - Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html) + - Handle sparse datasets + - Specify custom train and validation set + +- [07.auto-ml-exploring-previous-runs.ipynb](07.auto-ml-exploring-previous-runs) + - List all projects for the workspace + - List all AutoML Runs for a given project + - Get details for a AutoML Run. (Automl settings, run widget & all metrics) + - Downlaod fitted pipeline for any iteration + +- [08.auto-ml-remote-execution-with-text-file-on-DSVM](08.auto-ml-remote-execution-with-text-file-on-DSVM.ipynb) + - Dataset: scikit learn's [digit dataset](https://innovate.burningman.org/datasets-page/) + - Download the data and store it in the DSVM to improve performance. + +- [09.auto-ml-classification-with-deployment.ipynb](09.auto-ml-classification-with-deployment.ipynb) + - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits) + - Simple example of using Auto ML for classification + - Registering the model + - Creating Image and creating aci service + - Testing the aci service + +- [10.auto-ml-multi-output-example.ipynb](10.auto-ml-multi-output-example.ipynb) + - Dataset: scikit learn's random example using multi-output pipeline(http://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py) + - Simple example of using Auto ML for multi output regression + - Handle both the dense and sparse metrix + +- [11.auto-ml-sample-weight.ipynb](11.auto-ml-sample-weight.ipynb) + - How to specifying sample_weight + - The difference that it makes to test results + +- [12.auto-ml-retrieve-the-training-sdk-versions.ipynb](12.auto-ml-retrieve-the-training-sdk-versions.ipynb) + - How to get current and training env SDK versions + +- [13.auto-ml-dataprep.ipynb](13.auto-ml-dataprep.ipynb) + - Using DataPrep for reading data + +# Documentation +## Table of Contents +1. [Auto ML Settings ](#automlsettings) +2. [Cross validation split options](#cvsplits) +3. [Get Data Syntax](#getdata) + +## Auto ML Settings +|Property|Description|Default| +|-|-|-| +|**primary_metric**|This is the metric that you want to optimize.

Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted

Regression supports the following primary metrics
spearman_correlation
normalized_root_mean_squared_error
r2_score
normalized_mean_absolute_error
normalized_root_mean_squared_log_error| Classification: accuracy

Regression: spearman_correlation +|**max_time_sec**|Time limit in seconds for each iterations|None| +|**iterations**|Number of iterations. In each iteration trains the data with a specific pipeline|25| +|**n_cross_validations**|Number of cross validation splits|None| +|**validation_size**|Size of validation set as percentage of all training samples|None| +|**concurrent_iterations**|Max number of iterations that would be executed in parallel|1| +|**preprocess**|*True/False*
Setting this to *True* enables preprocessing
on the input to handle *missing data*, and perform some common *feature extraction*
*Note: If input data is Sparse you cannot use preprocess=True*|False| +|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.
You can set it to *-1* to use all cores|1| +|**exit_score**|*double* value indicating the target for *primary_metric*.
Once the target is surpassed the run terminates|None| +|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.

Allowed values for **Classification**
logistic regression
SGD classifier
MultinomialNB
BernoulliNB
SVM
LinearSVM
kNN
DT
RF
extra trees
gradient boosting
lgbm_classifier

Allowed values for **Regression**
Elastic net
Gradient boosting regressor
DT regressor
kNN regressor
Lasso lars
SGD regressor
RF regressor
extra trees regressor
lightGBM regressor|None| + +## Cross validation split options +### K-Folds Cross Validation +Use *n_cross_validations* setting to specify the number of cross validations. The training data set will be randomly split into *n_cross_validations* folds of equal size. During each cross validation round, one of the folds will be used for validation of the model trained on the remaining folds. This process repeats for *n_cross_validations* rounds until each fold is used once as validation set. Finally, the average scores accross all *n_cross_validations* rounds will be reported, and the corresponding model will be retrained on the whole training data set. + +### Monte Carlo Cross Validation (a.k.a. Repeated Random Sub-Sampling) +Use *validation_size* to specify the percentage of the training data set that should be used for validation, and use *n_cross_validations* to specify the number of cross validations. During each cross validation round, a subset of size *validation_size* will be randomly selected for validation of the model trained on the remaining data. Finally, the average scores accross all *n_cross_validations* rounds will be reported, and the corresponding model will be retrained on the whole training data set. + +### Custom train and validation set +You can specify seperate train and validation set either through the get_data() or directly to the fit method. + +## get_data() syntax +The *get_data()* function can be used to return a dictionary with these values: + +|Key|Type|Dependency|Mutually Exclusive with|Description| +|:-|:-|:-|:-|:-| +|X|Pandas Dataframe or Numpy Array|y|data_train, label, columns|All features to train with| +|y|Pandas Dataframe or Numpy Array|X|label|Label data to train with. For classification, this should be an array of integers. | +|X_valid|Pandas Dataframe or Numpy Array|X, y, y_valid|data_train, label|*Optional* All features to validate with. If this is not specified, X is split between train and validate| +|y_valid|Pandas Dataframe or Numpy Array|X, y, X_valid|data_train, label|*Optional* The label data to validate with. If this is not specified, y is split between train and validate| +|sample_weight|Pandas Dataframe or Numpy Array|y|data_train, label, columns|*Optional*A weight value for each label. Higher values indicate that the sample is more important.| +|sample_weight_valid|Pandas Dataframe or Numpy Array|y_valid|data_train, label, columns|*Optional*A weight value for each validation label. Higher values indicate that the sample is more important. If this is not specified, sample_weight is split between train and validate| +|data_train|Pandas Dataframe|label|X, y, X_valid, y_valid|All data (features+label) to train with| +|label|string|data_train|X, y, X_valid, y_valid|Which column in data_train represents the label| +|columns|Array of strings|data_train||*Optional* Whitelist of columns to use for features| +|cv_splits_indices|Array of integers|data_train||*Optional* List of indexes to split the data for cross validation| + +# Running using python command +Jupyter notebook provides a File / Download as / Python (.py) option for saving the notebook as a Python file. +You can then run this file using the python command. +However, on Windows the file needs to be modified before it can be run. +The following condition must be added to the main code in the file: + + if __name__ == "__main__": + +The main code of the file must be indented so that it is under this condition. + +# Troubleshooting +## Iterations fail and the log contains "MemoryError" +This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size. +If you are using a remote DSVM, memory is needed for each concurrent iteration. The concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the trinaing data size is 8Gb and concurrent_iterations is set to 10, the minimum memory required is at least 80Gb. +To resolve this issue, allocate a DSVM with more memory or reduce the value specified for concurrent_iterations. + +## Iterations show as "Not Responding" in the RunDetails widget. +This can be caused by too many concurrent iterations for a remote DSVM. Each concurrent iteration usually takes 100% of a core when it is running. Some iterations can use multiple cores. So, the concurrent_iterations setting should always be less than the number of cores of the DSVM. +To resolve this issue, try reducing the value specified for the concurrent_iterations setting. + +## Workspace.create gives the error "The resource type could not be found in the namespace 'Microsoft.MachineLearningServices' for api version '2018-03-01-preview'." +This can indicate that the Azure Subscription has not been whitelisted for AutoML. diff --git a/automl/automl_env.yml b/automl/automl_env.yml new file mode 100644 index 000000000..0d8af0695 --- /dev/null +++ b/automl/automl_env.yml @@ -0,0 +1,22 @@ +name: azure_automl2 +dependencies: + # The python interpreter version. + # Currently Azure ML only supports 3.5.2 and later. +- python=3.6 +- nb_conda +- matplotlib +- seaborn +- numpy>=1.11.0,<1.16.0 +- scipy>=0.19.0,<0.20.0 +- scikit-learn>=0.18.0,<=0.19.1 +- pandas>=0.19.0,<0.23.0 + +- pip: + # Required packages for AzureML execution, history, and data preparation. + - --index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Candidate/604C89A437BA41BD942B4F46D9A3591D + - --extra-index-url https://pypi.python.org/simple + - azureml-sdk[automl] + - azureml-train-widgets + - azure-cli + - pandas_ml + diff --git a/automl/automl_setup.cmd b/automl/automl_setup.cmd new file mode 100644 index 000000000..6d82a9072 --- /dev/null +++ b/automl/automl_setup.cmd @@ -0,0 +1,42 @@ +@echo off +set conda_env_name=%1 + +IF "%conda_env_name%"=="" SET conda_env_name="azure_automl" + +call conda activate %conda_env_name% 2>nul: + +if not errorlevel 1 ( + call conda env update -f automl_env.yml -n %conda_env_name% + if errorlevel 1 goto ErrorExit +) else ( + call conda env create -f automl_env.yml -n %conda_env_name% + if errorlevel 1 goto ErrorExit +) + +call conda activate %conda_env_name% 2>nul: +if errorlevel 1 goto ErrorExit + +call pip install psutil + +call jupyter nbextension install --py azureml.train.widgets +if errorlevel 1 goto ErrorExit + +call jupyter nbextension enable --py azureml.train.widgets +if errorlevel 1 goto ErrorExit + +echo. +echo. +echo *************************************** +echo * AutoML setup completed successfully * +echo *************************************** +echo. +echo Starting jupyter notebook - please run notebook 00.configuration +echo. +jupyter notebook --log-level=50 + +goto End + +:ErrorExit +echo Install failed + +:End \ No newline at end of file diff --git a/automl/automl_setup_linux.sh b/automl/automl_setup_linux.sh new file mode 100644 index 000000000..288e09cb7 --- /dev/null +++ b/automl/automl_setup_linux.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +CONDA_ENV_NAME=$1 + +if [ "$CONDA_ENV_NAME" == "" ] +then + CONDA_ENV_NAME="azure_automl" +fi + +if source activate $CONDA_ENV_NAME 2> /dev/null +then + conda env update -f automl_env.yml -n $CONDA_ENV_NAME +else + conda env create -f automl_env.yml -n $CONDA_ENV_NAME && + source activate $CONDA_ENV_NAME && + jupyter nbextension install --py azureml.train.widgets --user && + jupyter nbextension enable --py azureml.train.widgets --user && + echo "" && + echo "" && + echo "***************************************" && + echo "* AutoML setup completed successfully *" && + echo "***************************************" && + echo "" && + echo "Starting jupyter notebook - please run notebook 00.configuration" && + echo "" && + jupyter notebook --log-level=50 +fi + +if [ $? -gt 0 ] +then + echo "Installation failed" +fi + + diff --git a/automl/automl_setup_mac.sh b/automl/automl_setup_mac.sh new file mode 100644 index 000000000..6d0049020 --- /dev/null +++ b/automl/automl_setup_mac.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +CONDA_ENV_NAME=$1 + +if [ "$CONDA_ENV_NAME" == "" ] +then + CONDA_ENV_NAME="azure_automl" +fi + +if source activate $CONDA_ENV_NAME 2> /dev/null +then + conda env update -f automl_env.yml -n $CONDA_ENV_NAME +else + conda env create -f automl_env.yml -n $CONDA_ENV_NAME && + source activate $CONDA_ENV_NAME && + conda install lightgbm -c conda-forge -y && + jupyter nbextension install --py azureml.train.widgets --user && + jupyter nbextension enable --py azureml.train.widgets --user && + echo "" && + echo "" && + echo "***************************************" && + echo "* AutoML setup completed successfully *" && + echo "***************************************" && + echo "" && + echo "Starting jupyter notebook - please run notebook 00.configuration" && + echo "" && + jupyter notebook --log-level=50 +fi + +if [ $? -gt 0 ] +then + echo "Installation failed" +fi + + diff --git a/tutorials/01.train-models.ipynb b/tutorials/01.train-models.ipynb index 38205bdfe..e8a1e896b 100644 --- a/tutorials/01.train-models.ipynb +++ b/tutorials/01.train-models.ipynb @@ -560,7 +560,7 @@ "metadata": {}, "outputs": [], "source": [ - "run.wait_for_completion(show_output=True) # specify True for a verbose log" + "run.wait_for_completion(show_output=False) # specify True for a verbose log" ] }, { diff --git a/tutorials/02.deploy-models.ipynb b/tutorials/02.deploy-models.ipynb index 616f48e0f..2a92a8db3 100644 --- a/tutorials/02.deploy-models.ipynb +++ b/tutorials/02.deploy-models.ipynb @@ -196,7 +196,7 @@ "\n", "conf_mx = confusion_matrix(y_test, y_hat)\n", "print(conf_mx)\n", - "print('Overall accuracy:', np.average(y_hat == y_test))" + "print('Overall accuracy:', np.average(y_hat==y_test))" ] }, { @@ -313,7 +313,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Review the content of the file" + "Review the content of the `myenv.yml` file." ] }, { @@ -322,7 +322,8 @@ "metadata": {}, "outputs": [], "source": [ - "%pfile myenv.yml" + "with open(\"myenv.yml\",\"r\") as f:\n", + " print(f.read())" ] }, { @@ -383,7 +384,7 @@ " conda_file=\"myenv.yml\")\n", "\n", "service = Webservice.deploy_from_model(workspace=ws,\n", - " name='sklearn-mnist-model',\n", + " name='sklearn-mnist-svc',\n", " deployment_config=aciconfig,\n", " models=[model],\n", " image_config=image_config)\n", @@ -463,6 +464,40 @@ "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also send raw HTTP request to test the web service." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "\n", + "# send a random row from the test set to score\n", + "random_index = np.random.randint(0, len(X_test)-1)\n", + "input_data = \"{\\\"data\\\": [\" + str(list(X_test[random_index])) + \"]}\"\n", + "\n", + "headers = {'Content-Type':'application/json'}\n", + "\n", + "# for AKS deployment you'd need to the service key in the header as well\n", + "# api_key = service.get_key()\n", + "# headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)} \n", + "\n", + "resp = requests.post(service.scoring_uri, input_data, headers=headers)\n", + "\n", + "print(\"POST to url\", service.scoring_uri)\n", + "#print(\"input data:\", input_data)\n", + "print(\"label:\", y_test[random_index])\n", + "print(\"prediction:\", resp.text)" + ] + }, { "cell_type": "markdown", "metadata": {},