diff --git a/00.Getting Started/01.train-within-notebook/01.train-within-notebook.ipynb b/00.Getting Started/01.train-within-notebook/01.train-within-notebook.ipynb new file mode 100644 index 000000000..9dabc23c6 --- /dev/null +++ b/00.Getting Started/01.train-within-notebook/01.train-within-notebook.ipynb @@ -0,0 +1,864 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 01. Train in the Notebook & Deploy Model to ACI\n", + "\n", + "* Load workspace\n", + "* Train a simple regression model directly in the Notebook python kernel\n", + "* Record run history\n", + "* Find the best model in run history and download it.\n", + "* Deploy the model as an Azure Container Instance (ACI)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "1. Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't. \n", + "\n", + "2. Install following pre-requisite libraries to your conda environment and restart notebook.\n", + "```shell\n", + "(myenv) $ conda install -y matplotlib tqdm scikit-learn\n", + "```\n", + "\n", + "3. Check that ACI is registered for your Azure Subscription. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!az provider show -n Microsoft.ContainerInstance -o table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If ACI is not registered, run following command to register it. Note that you have to be a subscription owner, or this command will fail." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!az provider register -n Microsoft.ContainerInstance" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Validate Azure ML SDK installation and get version number for debugging purposes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "install" + ] + }, + "outputs": [], + "source": [ + "from azureml.core import Experiment, Run, Workspace\n", + "import azureml.core\n", + "\n", + "# Check core SDK version number\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set experiment name\n", + "Choose a name for experiment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment_name = 'train-in-notebook'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start a training run in local Notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load diabetes dataset, a well-known small dataset that comes with scikit-learn\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.externals import joblib\n", + "\n", + "X, y = load_diabetes(return_X_y = True)\n", + "columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n", + "data = {\n", + " \"train\":{\"X\": X_train, \"y\": y_train}, \n", + " \"test\":{\"X\": X_test, \"y\": y_test}\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train a simple Ridge model\n", + "Train a very simple Ridge regression model in scikit-learn, and save it as a pickle file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "reg = Ridge(alpha = 0.03)\n", + "reg.fit(data['train']['X'], data['train']['y'])\n", + "preds = reg.predict(data['test']['X'])\n", + "print('Mean Squared Error is', mean_squared_error(preds, data['test']['y']))\n", + "joblib.dump(value = reg, filename = 'model.pkl');" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add experiment tracking\n", + "Now, let's add Azure ML experiment logging, and upload persisted model into run record as well." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "local run", + "outputs upload" + ] + }, + "outputs": [], + "source": [ + "experiment = Experiment(workspace = ws, name = experiment_name)\n", + "run = experiment.start_logging()\n", + "run.tag(\"Description\",\"My first run!\")\n", + "run.log('alpha', 0.03)\n", + "reg = Ridge(alpha = 0.03)\n", + "reg.fit(data['train']['X'], data['train']['y'])\n", + "preds = reg.predict(data['test']['X'])\n", + "run.log('mse', mean_squared_error(preds, data['test']['y']))\n", + "joblib.dump(value = reg, filename = 'model.pkl')\n", + "run.upload_file(name = 'outputs/model.pkl', path_or_stream = './model.pkl')\n", + "\n", + "run.complete()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can browse to the recorded run. Please make sure you use Chrome to navigate the run history page." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Simple parameter sweep\n", + "Sweep over alpha values of a sklearn ridge model, and capture metrics and trained model in the Azure ML experiment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "local run", + "outputs upload" + ] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import os\n", + "from tqdm import tqdm\n", + "\n", + "model_name = \"model.pkl\"\n", + "\n", + "# start a training run\n", + "root_run = experiment.start_logging()\n", + "\n", + "# list of numbers from 0 to 1.0 with a 0.05 interval\n", + "alphas = np.arange(0.0, 1.0, 0.05)\n", + "\n", + "# try a bunch of alpha values in a Linear Regression (Ridge) model\n", + "for alpha in tqdm(alphas):\n", + " # create a bunch of child runs\n", + " with root_run.child_run(\"alpha-\" + str(alpha)) as run:\n", + " # Use Ridge algorithm to build a regression model\n", + " reg = Ridge(alpha=alpha)\n", + " reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n", + " preds = reg.predict(data[\"test\"][\"X\"])\n", + " mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n", + "\n", + " # log alpha, mean_squared_error and feature names in run history\n", + " run.log(\"alpha\", alpha)\n", + " run.log(\"mse\", mse)\n", + " run.log_list(\"columns\", columns)\n", + "\n", + " with open(model_name, \"wb\") as file:\n", + " joblib.dump(value=reg, filename=file)\n", + " \n", + " # upload the serialized model into run history record\n", + " run.upload_file(name=\"outputs/\" + model_name, path_or_stream=model_name)\n", + "\n", + " # now delete the serialized model from local folder since it is already uploaded to run history \n", + " os.remove(model_name)\n", + " \n", + "# Declare run completed\n", + "root_run.complete()\n", + "root_run_id = root_run.id\n", + "print (\"run id:\", root_run.id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now you can reconstruct this run object from captured run id in a different Notebook session." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "rr = Run(experiment=experiment, run_id=root_run_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Select best model from the experiment\n", + "Load all child run metrics recursively from the experiment into a dictionary object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history", + "get metrics" + ] + }, + "outputs": [], + "source": [ + "child_run_metrics = rr.get_metrics(recursive=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now find the run with the lowest Mean Squared Error value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run_id = min(child_run_metrics, key = lambda k: child_run_metrics[k]['mse'])\n", + "best_run = Run(experiment=experiment, run_id=best_run_id)\n", + "print('Best run is:', best_run_id)\n", + "print('Metrics:', child_run_metrics[best_run_id])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can add tags to your runs to make them easier to catalog" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "best_run.tag(key=\"Description\", value=\"The best one\")\n", + "best_run.get_tags()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot MSE over alpha\n", + "\n", + "Let's observe the best model visually by plotting the MSE values over alpha values:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "\n", + "best_alpha = child_run_metrics[best_run_id]['alpha']\n", + "min_mse = child_run_metrics[best_run_id]['mse']\n", + "\n", + "alpha_mse = np.array([(child_run_metrics[k]['alpha'], child_run_metrics[k]['mse']) for k in child_run_metrics.keys()])\n", + "sorted_alpha_mse = alpha_mse[alpha_mse[:,0].argsort()]\n", + "\n", + "plt.plot(sorted_alpha_mse[:,0], sorted_alpha_mse[:,1], 'r--')\n", + "plt.plot(sorted_alpha_mse[:,0], sorted_alpha_mse[:,1], 'bo')\n", + "\n", + "plt.xlabel('alpha', fontsize = 14)\n", + "plt.ylabel('mean squared error', fontsize = 14)\n", + "plt.title('MSE over alpha', fontsize = 16)\n", + "\n", + "# plot arrow\n", + "plt.arrow(x = best_alpha, y = min_mse + 39, dx = 0, dy = -26, ls = '-', lw = 0.4,\n", + " width = 0, head_width = .03, head_length = 8)\n", + "\n", + "# plot \"best run\" text\n", + "plt.text(x = best_alpha - 0.08, y = min_mse + 50, s = 'Best Run', fontsize = 14)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Register the best model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Find the model file saved in the run record of best run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "for f in best_run.get_file_names():\n", + " print(f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can register this model in the model registry of the workspace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "register model from history" + ] + }, + "outputs": [], + "source": [ + "model = best_run.register_model(model_name='best_model', model_path='outputs/model.pkl')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Verify that the model has been registered properly. If you have done this several times you'd see the version number auto-increases each time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "register model from history" + ] + }, + "outputs": [], + "source": [ + "for m in ws.models(name='best_model'):\n", + " print(m.name, m.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also download the registered model. Afterwards, you should see a `model.pkl` file in the current directory. You can then use it for local testing if you'd like." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "download file" + ] + }, + "outputs": [], + "source": [ + "model.download(target_dir='.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create scoring script\n", + "\n", + "The scoring script consists of two functions: `init` that is used to load the model to memory when starting the container, and `run` that makes the prediction when web service is called." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `%%writefile` cell magic is used to write the scoring function to a local file. Pay special attention to how the model is loaded in the `init()` function. When Docker image is built for this model, the actual model file is downloaded and placed on disk, and `get_model_path` function returns the local path where the model is placed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile score.py\n", + "import pickle\n", + "import json\n", + "import numpy as np\n", + "from sklearn.externals import joblib\n", + "from sklearn.linear_model import Ridge\n", + "from azureml.core.model import Model\n", + "\n", + "\n", + "def init():\n", + " global model\n", + " # note here \"best_model\" is the name of the model registered under the workspace\n", + " # this call should return the path to the model.pkl file on the local disk.\n", + " model_path = Model.get_model_path(model_name='best_model')\n", + " # deserialize the model file back into a sklearn model\n", + " model = joblib.load(model_path)\n", + "\n", + " \n", + "# note you can pass in multiple rows for scoring\n", + "def run(raw_data):\n", + " try:\n", + " data = json.loads(raw_data)['data']\n", + " data = np.array(data)\n", + " result = model.predict(data)\n", + " return json.dumps({\"result\": result.tolist()})\n", + " except Exception as e:\n", + " result = str(e)\n", + " return json.dumps({\"error\": result})\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create conda dependency file\n", + "\n", + "This `myenv.yml` file is used to specify which library dependencies to install on the web service. Note that the CondaDependencies API automatically adds necessary Azure ML dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.conda_dependencies import CondaDependencies \n", + "\n", + "myenv = CondaDependencies()\n", + "myenv.add_conda_package(\"scikit-learn\")\n", + "\n", + "with open(\"myenv.yml\",\"w\") as f:\n", + " f.write(myenv.serialize_to_string())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "View the `myenv.yml` file written." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pfile myenv.yml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Deploy web service into an Azure Container Instance\n", + "The deployment process takes the registered model and your scoring scrip, and builds a Docker image. It then deploys the Docker image into Azure Container Instance as a running container with an HTTP endpoint readying for scoring calls. Read more about [Azure Container Instance](https://azure.microsoft.com/en-us/services/container-instances/).\n", + "\n", + "Note ACI is great for quick and cost-effective dev/test deployment scenarios. For production workloads, please use [Azure Kubernentes Service (AKS)](https://azure.microsoft.com/en-us/services/kubernetes-service/) instead. Please follow in struction in [this notebook](11.production-deploy-to-aks.ipynb) to see how that can be done from Azure ML.\n", + " \n", + "** Note: ** The web service creation can take 6-7 minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.webservice import AciWebservice, Webservice\n", + "\n", + "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", + " memory_gb=1, \n", + " tags={'sample name': 'AML 101'}, \n", + " description='This is a great example.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note the below `WebService.deploy_from_model()` function takes a model object registered under the workspace. It then bakes the model file in the Docker image so it can be looked-up using the `Model.get_model_path()` function in `score.py`. \n", + "\n", + "If you have a local model file instead of a registered model object, you can also use the `WebService.deploy()` function which would register the model and then deploy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.image import ContainerImage\n", + "image_config = ContainerImage.image_configuration(execution_script=\"score.py\", \n", + " runtime=\"python\", \n", + " conda_file=\"myenv.yml\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "%%time\n", + "# this will take 5-10 minutes to finish\n", + "# you can also use \"az container list\" command to find the ACI being deployed\n", + "service = Webservice.deploy_from_model(name='my-aci-svc',\n", + " deployment_config=aciconfig,\n", + " models=[model],\n", + " image_config=image_config,\n", + " workspace=ws)\n", + "\n", + "service.wait_for_deployment(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Test web service" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "print('web service is hosted in ACI:', service.scoring_uri)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the `run` API to call the web service with one row of data to get a prediction." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "import json\n", + "# score the first row from the test set.\n", + "test_samples = json.dumps({\"data\": X_test[0:1, :].tolist()})\n", + "service.run(input_data = test_samples)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Feed the entire test set and calculate the errors (residual values)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "# score the entire test set.\n", + "test_samples = json.dumps({'data': X_test.tolist()})\n", + "\n", + "result = json.loads(service.run(input_data = test_samples))['result']\n", + "residual = result - y_test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also send raw HTTP request to test the web service." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "\n", + "# 2 rows of input data, each with 10 made-up numerical features\n", + "input_data = \"{\\\"data\\\": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]]}\"\n", + "\n", + "headers = {'Content-Type':'application/json'}\n", + "\n", + "# for AKS deployment you'd need to the service key in the header as well\n", + "# api_key = service.get_key()\n", + "# headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)} \n", + "\n", + "resp = requests.post(service.scoring_uri, input_data, headers = headers)\n", + "print(resp.text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Residual graph\n", + "Plot a residual value graph to chart the errors on the entire test set. Observe the nice bell curve." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f, (a0, a1) = plt.subplots(1, 2, gridspec_kw={'width_ratios':[3, 1], 'wspace':0, 'hspace': 0})\n", + "f.suptitle('Residual Values', fontsize = 18)\n", + "\n", + "f.set_figheight(6)\n", + "f.set_figwidth(14)\n", + "\n", + "a0.plot(residual, 'bo', alpha=0.4);\n", + "a0.plot([0,90], [0,0], 'r', lw=2)\n", + "a0.set_ylabel('residue values', fontsize=14)\n", + "a0.set_xlabel('test data set', fontsize=14)\n", + "\n", + "a1.hist(residual, orientation='horizontal', color='blue', bins=10, histtype='step');\n", + "a1.hist(residual, orientation='horizontal', color='blue', alpha=0.2, bins=10);\n", + "a1.set_yticklabels([])\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Delete ACI to clean up" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Deleting ACI is super fast!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "%%time\n", + "service.delete()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/02.train-on-local/02.train-on-local.ipynb b/00.Getting Started/02.train-on-local/02.train-on-local.ipynb new file mode 100644 index 000000000..e3ce4f9c0 --- /dev/null +++ b/00.Getting Started/02.train-on-local/02.train-on-local.ipynb @@ -0,0 +1,432 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 02. Train locally\n", + "* Create or load workspace.\n", + "* Create scripts locally.\n", + "* Create `train.py` in a folder, along with a `my.lib` file.\n", + "* Configure & execute a local run in a user-managed Python environment.\n", + "* Configure & execute a local run in a system-managed Python environment.\n", + "* Configure & execute a local run in a Docker environment.\n", + "* Query run metrics to find the best model\n", + "* Register model for operationalization." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create An Experiment\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "experiment_name = 'train-on-local'\n", + "exp = Experiment(workspace=ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a folder to store the training script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "script_folder = './samples/train-on-local'\n", + "os.makedirs(script_folder, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create `train.py`\n", + "\n", + "Use `%%writefile` magic to write training code to `train.py` file under your script folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $script_folder/train.py\n", + "\n", + "import os\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "from azureml.core.run import Run\n", + "from sklearn.externals import joblib\n", + "\n", + "# example of referencing another script\n", + "import mylib\n", + "\n", + "X, y = load_diabetes(return_X_y=True)\n", + "\n", + "run = Run.get_submitted_run()\n", + "\n", + "X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)\n", + "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n", + " \"test\": {\"X\": X_test, \"y\": y_test}}\n", + "\n", + "# example of referencing another script\n", + "alphas = mylib.get_alphas()\n", + "\n", + "for alpha in alphas:\n", + " # Use Ridge algorithm to create a regression model\n", + " reg = Ridge(alpha=alpha)\n", + " reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n", + "\n", + " preds = reg.predict(data[\"test\"][\"X\"])\n", + " mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n", + " run.log('alpha', alpha)\n", + " run.log('mse', mse)\n", + "\n", + " model_file_name='ridge_{0:.2f}.pkl'.format(alpha)\n", + " # save model in the outputs folder so it automatically get uploaded\n", + " with open(model_file_name, \"wb\") as file:\n", + " joblib.dump(value=reg, filename=model_file_name)\n", + " \n", + " # upload the model file explicitly into artifacts \n", + " run.upload_file(name=model_file_name, path_or_stream=model_file_name)\n", + " \n", + " # register the model\n", + " run.register_model(model_name='diabetes-model', model_path=model_file_name)\n", + "\n", + " print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`train.py` also references a `mylib.py` file. So let's create that too." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $script_folder/mylib.py\n", + "import numpy as np\n", + "\n", + "def get_alphas():\n", + " # list of numbers from 0.0 to 1.0 with a 0.05 interval\n", + " return np.arange(0.0, 1.0, 0.05)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure & Run\n", + "### User-managed environment\n", + "Below, we use a user-managed run, which means you are responsible to ensure all the necessary packages are available in the Python environment you choose to run the script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "\n", + "# Editing a run configuration property on-fly.\n", + "run_config_user_managed = RunConfiguration()\n", + "\n", + "run_config_user_managed.environment.python.user_managed_dependencies = True\n", + "\n", + "# You can choose a specific Python environment by pointing to a Python path \n", + "#run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Submit script to run in the user-managed environment\n", + "Note whole script folder is submitted for execution, including the `mylib.py` file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import ScriptRunConfig\n", + "\n", + "src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_user_managed)\n", + "run = exp.submit(src)\n", + "run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Get run history details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### System-managed environment\n", + "You can also ask the system to build a new conda environment and execute your scripts in it. The environment is built once and will be reused in subsequent executions as long as the conda dependencies remain unchanged. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "run_config_system_managed = RunConfiguration()\n", + "\n", + "run_config_system_managed.environment.python.user_managed_dependencies = False\n", + "run_config_system_managed.prepare_environment = True\n", + "\n", + "# Specify conda dependencies with scikit-learn\n", + "cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n", + "run_config_system_managed.environment.python.conda_dependencies = cd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Submit script to run in the system-managed environment\n", + "A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_system_managed)\n", + "run = exp.submit(src)\n", + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Get run history details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query run metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history", + "get metrics" + ] + }, + "outputs": [], + "source": [ + "# get all metris logged in the run\n", + "run.get_metrics()\n", + "metrics = run.get_metrics()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's find the model that has the lowest MSE value logged." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "best_alpha = metrics['alpha'][np.argmin(metrics['mse'])]\n", + "\n", + "print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n", + " min(metrics['mse']), \n", + " best_alpha\n", + "))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also list all the files that are associated with this run record" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.get_file_names()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We know the model `ridge_0.40.pkl` is the best performing model from the eariler queries. So let's register it with the workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# supply a model name, and the full path to the serialized model file.\n", + "model = run.register_model(model_name='best_ridge_model', model_path='ridge_0.40.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(model.name, model.version, model.url)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now you can deploy this model following the example in the 01 notebook." + ] + } + ], + "metadata": { + "celltoolbar": "Edit Metadata", + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/02.train-on-local/train.py b/00.Getting Started/02.train-on-local/train.py new file mode 100644 index 000000000..5eba27eb2 --- /dev/null +++ b/00.Getting Started/02.train-on-local/train.py @@ -0,0 +1,45 @@ +from sklearn.datasets import load_diabetes +from sklearn.linear_model import Ridge +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from azureml.core.run import Run +from sklearn.externals import joblib + +import numpy as np + +# os.makedirs('./outputs', exist_ok = True) + +X, y = load_diabetes(return_X_y=True) + +run = Run.get_submitted_run() + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) +data = {"train": {"X": X_train, "y": y_train}, + "test": {"X": X_test, "y": y_test}} + +# list of numbers from 0.0 to 1.0 with a 0.05 interval +alphas = np.arange(0.0, 1.0, 0.05) + +for alpha in alphas: + # Use Ridge algorithm to create a regression model + reg = Ridge(alpha=alpha) + reg.fit(data["train"]["X"], data["train"]["y"]) + + preds = reg.predict(data["test"]["X"]) + mse = mean_squared_error(preds, data["test"]["y"]) + run.log('alpha', alpha) + run.log('mse', mse) + + model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha) + # save model in the outputs folder so it automatically get uploaded + with open(model_file_name, "wb") as file: + joblib.dump(value=reg, filename=model_file_name) + + # upload the model file explicitly into artifacts + run.upload_file(name=model_file_name, path_or_stream=model_file_name) + + # register the model + # commented out for now until a bug is fixed + # run.register_model(file_name = model_file_name) + + print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse)) diff --git a/00.Getting Started/03.train-on-aci/03.train-on-aci.ipynb b/00.Getting Started/03.train-on-aci/03.train-on-aci.ipynb new file mode 100644 index 000000000..1c59bd985 --- /dev/null +++ b/00.Getting Started/03.train-on-aci/03.train-on-aci.ipynb @@ -0,0 +1,342 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 03. Train on Azure Container Instance (EXPERIMENTAL)\n", + "\n", + "* Create Workspace\n", + "* Create Project\n", + "* Create `train.py` in the project folder.\n", + "* Configure an ACI (Azure Container Instance) run\n", + "* Execute in ACI" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create An Experiment\n", + "\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "experiment_name = 'train-on-aci'\n", + "experiment = Experiment(workspace = ws, name = experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a folder to store the training script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "script_folder = './samples/train-on-aci'\n", + "os.makedirs(script_folder, exist_ok = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Remote execution on ACI\n", + "\n", + "Use `%%writefile` magic to write training code to `train.py` file under the project folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $script_folder/train.py\n", + "\n", + "import os\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "from azureml.core.run import Run\n", + "from sklearn.externals import joblib\n", + "\n", + "import numpy as np\n", + "\n", + "os.makedirs('./outputs', exist_ok=True)\n", + "\n", + "X, y = load_diabetes(return_X_y = True)\n", + "\n", + "run = Run.get_submitted_run()\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n", + "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n", + " \"test\": {\"X\": X_test, \"y\": y_test}}\n", + "\n", + "# list of numbers from 0.0 to 1.0 with a 0.05 interval\n", + "alphas = np.arange(0.0, 1.0, 0.05)\n", + "\n", + "for alpha in alphas:\n", + " # Use Ridge algorithm to create a regression model\n", + " reg = Ridge(alpha = alpha)\n", + " reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n", + "\n", + " preds = reg.predict(data[\"test\"][\"X\"])\n", + " mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n", + " run.log('alpha', alpha)\n", + " run.log('mse', mse)\n", + " \n", + " model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n", + " with open(model_file_name, \"wb\") as file:\n", + " joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n", + "\n", + " print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure for using ACI\n", + "Linux-based ACI is available in `westus`, `eastus`, `westeurope`, `northeurope`, `westus2` and `southeastasia` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "# create a new runconfig object\n", + "run_config = RunConfiguration()\n", + "\n", + "# signal that you want to use ACI to execute script.\n", + "run_config.target = \"containerinstance\"\n", + "\n", + "# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n", + "run_config.container_instance.region = 'eastus'\n", + "\n", + "# set the ACI CPU and Memory \n", + "run_config.container_instance.cpu_cores = 1\n", + "run_config.container_instance.memory_gb = 2\n", + "\n", + "# enable Docker \n", + "run_config.environment.docker.enabled = True\n", + "\n", + "# set Docker base image to the default CPU-based image\n", + "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", + "#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n", + "\n", + "# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n", + "run_config.environment.python.user_managed_dependencies = False\n", + "\n", + "# auto-prepare the Docker image when used for execution (if it is not already prepared)\n", + "run_config.auto_prepare_environment = True\n", + "\n", + "# specify CondaDependencies obj\n", + "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit the Experiment\n", + "Finally, run the training job on the ACI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "aci" + ] + }, + "outputs": [], + "source": [ + "%%time \n", + "from azureml.core.script_run_config import ScriptRunConfig\n", + "\n", + "script_run_config = ScriptRunConfig(source_directory = script_folder,\n", + " script= 'train.py',\n", + " run_config = run_config)\n", + "\n", + "run = experiment.submit(script_run_config)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "aci" + ] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Shows output of the run on stdout.\n", + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "# Show run details\n", + "\n", + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Navigate to the above URL using Chrome, and you should see a graph of alpha values, and a graph of MSE.\n", + "\n", + "![graphs](../images/mse.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "get metrics" + ] + }, + "outputs": [], + "source": [ + "# get all metris logged in the run\n", + "run.get_metrics()\n", + "metrics = run.get_metrics()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n", + " min(metrics['mse']), \n", + " metrics['alpha'][np.argmin(metrics['mse'])]\n", + "))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb b/00.Getting Started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb new file mode 100644 index 000000000..59ba86fde --- /dev/null +++ b/00.Getting Started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb @@ -0,0 +1,347 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 04. Train in a remote VM (MLC managed DSVM)\n", + "* Create Workspace\n", + "* Create Project\n", + "* Create `train.py` file\n", + "* Create DSVM as Machine Learning Compute (MLC) resource\n", + "* Configure & execute a run in a conda environment in the default miniconda Docker container on DSVM" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment_name = 'train-on-remote-vm'\n", + "script_folder = './samples/train-on-remote-vm'\n", + "\n", + "import os\n", + "os.makedirs(script_folder, exist_ok = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "exp = Experiment(workspace = ws, name = experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create `train.py`\n", + "\n", + "Use `%%writefile` magic to write training code to `train.py` file under your project folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $script_folder/train.py\n", + "\n", + "import os\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "from azureml.core.run import Run\n", + "from sklearn.externals import joblib\n", + "\n", + "import numpy as np\n", + "\n", + "os.makedirs('./outputs', exist_ok=True)\n", + "\n", + "X, y = load_diabetes(return_X_y = True)\n", + "\n", + "run = Run.get_submitted_run()\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n", + "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n", + " \"test\": {\"X\": X_test, \"y\": y_test}}\n", + "\n", + "# list of numbers from 0.0 to 1.0 with a 0.05 interval\n", + "alphas = np.arange(0.0, 1.0, 0.05)\n", + "\n", + "for alpha in alphas:\n", + " # Use Ridge algorithm to create a regression model\n", + " reg = Ridge(alpha = alpha)\n", + " reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n", + "\n", + " preds = reg.predict(data[\"test\"][\"X\"])\n", + " mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n", + " run.log('alpha', alpha)\n", + " run.log('mse', mse)\n", + " \n", + " model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n", + " with open(model_file_name, \"wb\") as file:\n", + " joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n", + "\n", + " print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Linux DSVM as a compute target\n", + "\n", + "**Note**: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n", + " \n", + "**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below. [Read more](../../documentation/sdk/ssh-issue.md) on this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import DsvmCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "compute_target_name = 'mydsvm'\n", + "\n", + "try:\n", + " dsvm_compute = DsvmCompute(workspace = ws, name = compute_target_name)\n", + " print('found existing:', dsvm_compute.name)\n", + "except ComputeTargetException:\n", + " print('creating new.')\n", + " dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n", + " dsvm_compute = DsvmCompute.create(ws, name = compute_target_name, provisioning_configuration = dsvm_config)\n", + " dsvm_compute.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure & Run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure a Docker run with new conda environment on the VM\n", + "You can execute in a Docker container in the VM. If you choose this route, you don't need to install anything on the VM yourself. Azure ML execution service will take care of it for you." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "\n", + "# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n", + "run_config = RunConfiguration(framework = \"python\")\n", + "\n", + "# Set compute target to the Linux DSVM\n", + "run_config.target = compute_target_name\n", + "\n", + "# Use Docker in the remote VM\n", + "run_config.environment.docker.enabled = True\n", + "\n", + "# Use CPU base image from DockerHub\n", + "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", + "print('Base Docker image is:', run_config.environment.docker.base_image)\n", + "\n", + "# Ask system to provision a new one based on the conda_dependencies.yml file\n", + "run_config.environment.python.user_managed_dependencies = False\n", + "\n", + "# Prepare the Docker and conda environment automatically when executingfor the first time.\n", + "run_config.prepare_environment = True\n", + "\n", + "# specify CondaDependencies obj\n", + "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the Experiment\n", + "Submit script to run in the Docker image in the remote VM. If you run this for the first time, the system will download the base image, layer in packages specified in the `conda_dependencies.yml` file on top of the base image, create a container and then execute the script in the container." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Run\n", + "from azureml.core import ScriptRunConfig\n", + "\n", + "src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_config)\n", + "run = exp.submit(src)\n", + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### View run history details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find the best run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get all metris logged in the run\n", + "run.get_metrics()\n", + "metrics = run.get_metrics()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n", + " min(metrics['mse']), \n", + " metrics['alpha'][np.argmin(metrics['mse'])]\n", + "))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up compute resource" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dsvm_compute.delete()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/04.train-on-remote-vm/train.py b/00.Getting Started/04.train-on-remote-vm/train.py new file mode 100644 index 000000000..e140e85f6 --- /dev/null +++ b/00.Getting Started/04.train-on-remote-vm/train.py @@ -0,0 +1,39 @@ + +import os +from sklearn.datasets import load_diabetes +from sklearn.linear_model import Ridge +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from azureml.core import Run +from sklearn.externals import joblib + +import numpy as np + +os.makedirs('./outputs', exist_ok=True) + +X, y = load_diabetes(return_X_y=True) + +run = Run.get_submitted_run() + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) +data = {"train": {"X": X_train, "y": y_train}, + "test": {"X": X_test, "y": y_test}} + +# list of numbers from 0.0 to 1.0 with a 0.05 interval +alphas = np.arange(0.0, 1.0, 0.05) + +for alpha in alphas: + # Use Ridge algorithm to create a regression model + reg = Ridge(alpha=alpha) + reg.fit(data["train"]["X"], data["train"]["y"]) + + preds = reg.predict(data["test"]["X"]) + mse = mean_squared_error(preds, data["test"]["y"]) + run.log('alpha', alpha) + run.log('mse', mse) + + model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha) + with open(model_file_name, "wb") as file: + joblib.dump(value=reg, filename='outputs/' + model_file_name) + + print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse)) diff --git a/00.Getting Started/05.train-in-spark/05.train-in-spark.ipynb b/00.Getting Started/05.train-in-spark/05.train-in-spark.ipynb new file mode 100644 index 000000000..83ad121e4 --- /dev/null +++ b/00.Getting Started/05.train-in-spark/05.train-in-spark.ipynb @@ -0,0 +1,470 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 05. Train in Spark\n", + "* Create Workspace\n", + "* Create Project\n", + "* Create `train-spark.py` file in the project folder\n", + "* Execute a PySpark script in ACI.\n", + "* Execute a PySpark script in a Docker container on remote DSVM\n", + "* Execute a PySpark script in HDI" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Project and Associate with Run History\n", + "**Project** is a local folder that contains files for your Azure ML experiments. It is associated with a **run history**, a cloud container of run metrics and output artifacts from your experiments. You can either attach a local folder as a new project, or load a local folder as a project if it has been attached before." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# choose a name for the run history container in the workspace\n", + "experiment_name = 'train-on-spark'\n", + "\n", + "# project folder\n", + "project_folder = './sample_projects/train-on-spark'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from azureml.project.project import Project\n", + "\n", + "project = Project.attach(workspace_object = ws,\n", + " experiment_name = experiment_name,\n", + " directory = project_folder)\n", + "\n", + "print(project.project_directory, project.history.name, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Copy files\n", + "\n", + "\n", + "Copy `train-spark.py` and `iris.csv` into the project folde" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from shutil import copyfile\n", + "\n", + "# copy iris dataset in to project folder\n", + "copyfile('./iris.csv', os.path.join(project_folder, 'iris.csv'))\n", + "\n", + "# copy train-spark.py file into project folder\n", + "# train-spark.py trains a simple LogisticRegression model using Spark.ML algorithm\n", + "copyfile('./train-spark.py', os.path.join(project_folder, 'train-spark.py'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Review the train-spark.py file in the project folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(os.path.join(project_folder, 'train-spark.py'), 'r') as fin:\n", + " print(fin.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure & Run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure ACI target" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "# create a new runconfig object\n", + "run_config = RunConfiguration()\n", + "\n", + "# signal that you want to use ACI to execute script.\n", + "run_config.target = \"containerinstance\"\n", + "\n", + "# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n", + "run_config.container_instance.region = 'eastus'\n", + "\n", + "# set the ACI CPU and Memory \n", + "run_config.container_instance.cpu_cores = 1\n", + "run_config.container_instance.memory_gb = 2\n", + "\n", + "# enable Docker \n", + "run_config.environment.docker.enabled = True\n", + "\n", + "# set Docker base image to the default CPU-based image\n", + "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_MMLSPARK_CPU_IMAGE\n", + "print('base image is', run_config.environment.docker.base_image)\n", + "#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n", + "\n", + "# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n", + "# please update this file if you need additional packages.\n", + "run_config.environment.python.user_managed_dependencies = False\n", + "\n", + "# auto-prepare the Docker image when used for execution (if it is not already prepared)\n", + "run_config.auto_prepare_environment = True\n", + "\n", + "cd = CondaDependencies()\n", + "# add numpy as a dependency\n", + "cd.add_conda_package('numpy')\n", + "# overwrite the default conda_dependencies.yml file\n", + "cd.save_to_file(base_directory = project_folder, conda_file_path='aml_config/conda_dependencies.yml')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run Spark job in ACI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time \n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.script_run_config import ScriptRunConfig\n", + "\n", + "experiment = Experiment(project_object.workspace_object, project_object.history.name)\n", + "script_run_config = ScriptRunConfig(source_directory = project.project_directory,\n", + " script= 'train-spark.py',\n", + " run_config = run_config)\n", + "run = experiment.submit(script_run_config)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Show the run in the web UI\n", + "**IMPORTANT**: Please use Chrome to navigate to the URL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import helpers.py\n", + "import helpers\n", + "\n", + "# get the URL of the run history web page\n", + "print(helpers.get_run_history_url(run))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Attach a remote Linux VM\n", + "To use remote docker commpute target:\n", + " 1. Create a Linux DSVM in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS.\n", + " 2. Enter the IP address, username and password below\n", + " \n", + "**Note**: the below example use port 5022. By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below. [Read more](../../documentation/sdk/ssh-issue.md) on this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import RemoteCompute\n", + "\n", + "try:\n", + " # Attaches a remote docker on a remote vm as a compute target.\n", + " RemoteCompute.attach(workspace,name = \"cpu-dsvm\", username = \"ninghai\", \n", + " address = \"hai2.eastus2.cloudapp.azure.com:5022\", \n", + " ssh-port=22\n", + " password = \"\"))\n", + "except UserErrorException as e:\n", + " print(\"Caught = {}\".format(e.message))\n", + " print(\"Compute config already attached.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure a Spark Docker run on the VM\n", + "Execute in the Spark engine in a Docker container in the VM. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n", + "run_config = RunConfiguration.load(path = project_folder, name = \"cpu-dsvm\")\n", + "\n", + "# set framework to PySpark\n", + "run_config.framework = \"PySpark\"\n", + "\n", + "# Use Docker in the remote VM\n", + "run_config.environment.docker.enabled = True\n", + "\n", + "# Use the MMLSpark CPU based image.\n", + "# https://hub.docker.com/r/microsoft/mmlspark/\n", + "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_MMLSPARK_CPU_IMAGE\n", + "print('base image is:', run_config.environment.docker.base_image)\n", + "\n", + "# signal use the user-managed environment\n", + "# do NOT provision a new one based on the conda.yml file\n", + "run_config.environment.python.user_managed_dependencies = False\n", + "\n", + "# Prepare the Docker and conda environment automatically when execute for the first time.\n", + "run_config.auto_prepare_environment = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the Experiment\n", + "Submit script to run in the Spark engine in the Docker container in the remote VM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "script_run_config = ScriptRunConfig(source_directory = project.project_directory,\n", + " script= 'train-spark.py',\n", + " run_config = run_config)\n", + "run = experiment.submit(script_run_config)\n", + "\n", + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get the URL of the run history web page\n", + "print(helpers.get_run_history_url(run))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Attach an HDI cluster\n", + "To use HDI commpute target:\n", + " 1. Create an Spark for HDI cluster in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS.\n", + " 2. Enter the IP address, username and password below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import HDInsightCompute\n", + "\n", + "try:\n", + " # Attaches a HDI cluster as a compute target.\n", + " HDInsightCompute.attach(ws, name = \"myhdi\",\n", + " username = \"ninghai\", \n", + " address = \"sparkhai-ssh.azurehdinsight.net\", \n", + " password = \"\"))\n", + "except UserErrorException as e:\n", + " print(\"Caught = {}\".format(e.message))\n", + " print(\"Compute config already attached.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure HDI run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load the runconfig object from the \"myhdi.runconfig\" file generated by the attach operaton above.\n", + "run_config = RunConfiguration.load(path = project_folder, name = 'myhdi')\n", + "\n", + "# ask system to prepare the conda environment automatically when executed for the first time\n", + "run_config.auto_prepare_environment = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the script to HDI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "script_run_config = ScriptRunConfig(source_directory = project.project_directory,\n", + " script= 'train-spark.py',\n", + " run_config = run_config)\n", + "run = experiment.submit(script_run_config)\n", + "\n", + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get the URL of the run history web page\n", + "print(helpers.get_run_history_url(run))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get all metris logged in the run\n", + "metrics = run.get_metrics()\n", + "print(metrics)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/05.train-in-spark/iris.csv b/00.Getting Started/05.train-in-spark/iris.csv new file mode 100644 index 000000000..396653cc9 --- /dev/null +++ b/00.Getting Started/05.train-in-spark/iris.csv @@ -0,0 +1,150 @@ +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica \ No newline at end of file diff --git a/00.Getting Started/05.train-in-spark/train-spark.py b/00.Getting Started/05.train-in-spark/train-spark.py new file mode 100644 index 000000000..6fc7718eb --- /dev/null +++ b/00.Getting Started/05.train-in-spark/train-spark.py @@ -0,0 +1,92 @@ + +import numpy as np +import pyspark +import os +import urllib +import sys + +from pyspark.sql.functions import * +from pyspark.ml.classification import * +from pyspark.ml.evaluation import * +from pyspark.ml.feature import * +from pyspark.sql.types import StructType, StructField +from pyspark.sql.types import DoubleType, IntegerType, StringType + + +from azureml.core.run import Run + +# initialize logger +run = Run.get_submitted_run() + +# start Spark session +spark = pyspark.sql.SparkSession.builder.appName('Iris').getOrCreate() + +# print runtime versions +print('****************') +print('Python version: {}'.format(sys.version)) +print('Spark version: {}'.format(spark.version)) +print('****************') + +# load iris.csv into Spark dataframe +schema = StructType([ + StructField("sepal-length", DoubleType()), + StructField("sepal-width", DoubleType()), + StructField("petal-length", DoubleType()), + StructField("petal-width", DoubleType()), + StructField("class", StringType()) +]) + +data = spark.read.csv('iris.csv', header=False, schema=schema) +print("First 10 rows of Iris dataset:") +data.show(10) + +# vectorize all numerical columns into a single feature column +feature_cols = data.columns[:-1] +assembler = pyspark.ml.feature.VectorAssembler( + inputCols=feature_cols, outputCol='features') +data = assembler.transform(data) + +# convert text labels into indices +data = data.select(['features', 'class']) +label_indexer = pyspark.ml.feature.StringIndexer( + inputCol='class', outputCol='label').fit(data) +data = label_indexer.transform(data) + +# only select the features and label column +data = data.select(['features', 'label']) +print("Reading for machine learning") +data.show(10) + +# change regularization rate and you will likely get a different accuracy. +reg = 0.01 +# load regularization rate from argument if present +if len(sys.argv) > 1: + reg = float(sys.argv[1]) + +# log regularization rate +run.log("Regularization Rate", reg) + +# use Logistic Regression to train on the training set +train, test = data.randomSplit([0.70, 0.30]) +lr = pyspark.ml.classification.LogisticRegression(regParam=reg) +model = lr.fit(train) + +# predict on the test set +prediction = model.transform(test) +print("Prediction") +prediction.show(10) + +# evaluate the accuracy of the model using the test set +evaluator = pyspark.ml.evaluation.MulticlassClassificationEvaluator( + metricName='accuracy') +accuracy = evaluator.evaluate(prediction) + +print() +print('#####################################') +print('Regularization rate is {}'.format(reg)) +print("Accuracy is {}".format(accuracy)) +print('#####################################') +print() + +# log accuracy +run.log('Accuracy', accuracy) diff --git a/00.Getting Started/07.hyperdrive-with-sklearn/07.hyperdrive-with-sklearn.ipynb b/00.Getting Started/07.hyperdrive-with-sklearn/07.hyperdrive-with-sklearn.ipynb new file mode 100644 index 000000000..5f0e460f6 --- /dev/null +++ b/00.Getting Started/07.hyperdrive-with-sklearn/07.hyperdrive-with-sklearn.ipynb @@ -0,0 +1,4018 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 07. HyperDrive with scikit-learn\n", + "- Create Batch AI cluster\n", + "- Train on a single node\n", + "- Set up Hyperdrive\n", + "- Parameter sweep with Hyperdrive on Batch AI cluster\n", + "- Monitor parameter sweep runs with run history widget\n", + "- Find best model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create An Experiment\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "experiment_name = 'hyperdrive-with-sklearn'\n", + "experiment = Experiment(workspace = ws, name = experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a folder to store the training script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "script_folder = './samples/hyperdrive-with-sklearn'\n", + "os.makedirs(script_folder, exist_ok = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Provision New Cluster\n", + "Create a new Batch AI cluster using the following Python code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create mlc", + "batchai" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "# choose a name for your cluster\n", + "batchai_cluster_name = ws.name + \"cpu\"\n", + "\n", + "found = False\n", + "# see if this compute target already exists in the workspace\n", + "for ct in ws.compute_targets():\n", + " print(ct.name, ct.type)\n", + " if (ct.name == batchai_cluster_name and ct.type == 'BatchAI'):\n", + " found = True\n", + " print('found compute target. just use it.')\n", + " compute_target = ct\n", + " break\n", + " \n", + "if not found:\n", + " print('creating a new compute target...')\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n", + " #vm_priority = 'lowpriority', # optional\n", + " autoscale_enabled = True,\n", + " cluster_min_nodes = 1, \n", + " cluster_max_nodes = 4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws,batchai_cluster_name, provisioning_config)\n", + " \n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + " \n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + " print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ridge Regression with scikit-learn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from shutil import copyfile\n", + "# copy the diabetes_sklearn.py file to the project folder\n", + "copyfile('./diabetes_sklearn.py', os.path.join(script_folder, 'diabetes_sklearn.py'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# review the diabetes_sklearn.py file if you'd like\n", + "with open(os.path.join(script_folder, 'diabetes_sklearn.py'), 'r') as fin:\n", + " print (fin.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create an estimator for the sklearn script\n", + "You can use an estimator pattern to run the script. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.estimator import Estimator\n", + "script_params = {\n", + " '--alpha': 0.1\n", + "}\n", + "\n", + "sk_est = Estimator(source_directory = script_folder,\n", + " script_params = script_params,\n", + " compute_target = compute_target,\n", + " entry_script = 'diabetes_sklearn.py',\n", + " conda_packages = ['scikit-learn'])\n", + " #custom_docker_base_image = 'ninghai/azureml:0.3') # use a custom image here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "batchai" + ] + }, + "outputs": [], + "source": [ + "# start the job\n", + "from azureml.core.experiment import Experiment\n", + "\n", + "run = experiment.submit(sk_est)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### View run details\n", + "**IMPORTANT**: please use Chrome to navigate the below URL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "batchai" + ] + }, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "use notebook widget" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also check the Batch AI cluster and job status using az-cli commands:\n", + "\n", + "```shell\n", + "# check cluster status. You can see how many nodes are running.\n", + "$ az batchai cluster list\n", + "\n", + "# check job status. You can see how many jobs are running\n", + "$ az batchai job list\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Now Try a Hyperdrive run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.hyperdrive import *\n", + "\n", + "# parameter space to sweep over\n", + "ps = RandomParameterSampling(\n", + " {\n", + " \"alpha\": uniform(0.0, 1.0)\n", + " }\n", + ")\n", + "\n", + "# early termniation policy\n", + "# check every 2 iterations and if the primary metric (epoch_val_acc) falls\n", + "# outside of the range of 10% of the best recorded run so far, terminate it.\n", + "etp = BanditPolicy(slack_factor = 0.1, evaluation_interval = 2)\n", + "\n", + "# Hyperdrive run configuration\n", + "hrc = HyperDriveRunConfig(\n", + " estimator = sk_est,\n", + " hyperparameter_sampling = ps,\n", + " policy = etp,\n", + " # metric to watch (for early termination)\n", + " primary_metric_name = 'mse',\n", + " # terminate if metric falls below threshold\n", + " primary_metric_goal = PrimaryMetricGoal.MINIMIZE,\n", + " max_total_runs = 20,\n", + " max_concurrent_runs = 4,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hyperdrive run", + "batchai" + ] + }, + "outputs": [], + "source": [ + "# Start Hyperdrive run\n", + "\n", + "hr = experiment.submit(hrc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use a widget to show runs\n", + "Runs will automatically start to show in the following widget once rendered. You can keep the Notebook open and watch them \"grow\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "use notebook widget" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(hr).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note**: This is a sample image with 200 runs. Your result might look different.\n", + "![img](../images/hyperdrive-sklearn.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# check cluster status, pay attention to the # of running nodes\n", + "# !az batchai cluster list -o table\n", + "\n", + "# check the Batch AI job queue. Notice the Job name is the run history Id. Pay attention to the State of the job.\n", + "# !az batchai job list -o table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find best run\n", + "Please wait till all Hyperdrive runs finish before running the below cells." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "hr.get_status()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history", + "get metrics" + ] + }, + "outputs": [], + "source": [ + "from tqdm import tqdm\n", + "\n", + "runs = {}\n", + "\n", + "for r in tqdm(hr.get_children()):\n", + " metrics = r.get_metrics()\n", + " if ('mse' in metrics.keys()):\n", + " runs[r.id] = metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "best_run_id = min(runs, key = lambda k: runs[k]['mse'])\n", + "best_run = runs[best_run_id]\n", + "print('Best Run: alpha = {0:.4f}, MSE = {1:.4f}'.format(best_run['alpha'], best_run['mse']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot the best run [Optional] \n", + "Note you will need to install `matplotlib` for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get metrics of alpha and mse for all runs\n", + "metrics = np.array([[runs[r]['alpha'], runs[r]['mse']] for r in runs])\n", + "\n", + "# sort the metrics by alpha values\n", + "metrics = np.array(sorted(metrics, key = lambda m: m[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.title('MSE over alpha', fontsize = 16)\n", + "\n", + "plt.plot(metrics[:,0], metrics[:,1], 'r--')\n", + "plt.plot(metrics[:,0], metrics[:,1], 'bo')\n", + "\n", + "plt.xlabel('alpha', fontsize = 14)\n", + "plt.ylabel('mean squared error', fontsize = 14)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "7cf278f65a36435fb03137ca56bcd263": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "83e0767b6c3a41a2833d0f8fcf690c72": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "aa3181a75ca34d729b0dce89e779ec0e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_train_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowHyperDriveRunsView", + "layout": "IPY_MODEL_7cf278f65a36435fb03137ca56bcd263", + "value": [ + { + "run_id": "hyperdrive-sklearn-diabetes_1526126138942", + "status": "Running", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-e2e-rg%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2euapws/projects/hyperdrive-sklearn-diabetes/run-history/run-details/hyperdrive-sklearn-diabetes_1526126138942?type=HyperDrive" + }, + [ + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.411237364035508", + "created_time": "2018-05-12 12:12:51.261530+00:00", + "created_time_dt": "2018-05-12T12:12:51.261530", + "duration": "0:00:12", + "end_time": "2018-05-12 12:13:03.803382+00:00", + "hyperdrive_id": "8382", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8382_0beb029b", + "metric": 3295.672, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.409690853942327", + "created_time": "2018-05-12 13:13:23.950880+00:00", + "created_time_dt": "2018-05-12T13:13:23.950880", + "duration": "0:00:28", + "end_time": "2018-05-12 13:13:52.707230+00:00", + "hyperdrive_id": "8479", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8479_5832d5b1", + "metric": 3295.6743, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.417026424561565", + "created_time": "2018-05-12 12:08:27.465571+00:00", + "created_time_dt": "2018-05-12T12:08:27.465571", + "duration": "0:00:12", + "end_time": "2018-05-12 12:08:39.848811+00:00", + "hyperdrive_id": "8370", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8370_c9260eec", + "metric": 3295.6834, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.417442688875759", + "created_time": "2018-05-12 13:03:26.849626+00:00", + "created_time_dt": "2018-05-12T13:03:26.849626", + "duration": "0:00:14", + "end_time": "2018-05-12 13:03:41.499999+00:00", + "hyperdrive_id": "8468", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8468_5e92ff25", + "metric": 3295.6854, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.40612547022086", + "created_time": "2018-05-12 12:40:16.219170+00:00", + "created_time_dt": "2018-05-12T12:40:16.219170", + "duration": "0:00:12", + "end_time": "2018-05-12 12:40:29.063299+00:00", + "hyperdrive_id": "8436", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8436_8d7667d7", + "metric": 3295.6882, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.404098690541524", + "created_time": "2018-05-12 12:28:12.532204+00:00", + "created_time_dt": "2018-05-12T12:28:12.532204", + "duration": "0:00:12", + "end_time": "2018-05-12 12:28:25.123133+00:00", + "hyperdrive_id": "8415", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8415_cd663398", + "metric": 3295.7016, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.422501486914154", + "created_time": "2018-05-12 12:06:04.978496+00:00", + "created_time_dt": "2018-05-12T12:06:04.978496", + "duration": "0:00:13", + "end_time": "2018-05-12 12:06:18.355669+00:00", + "hyperdrive_id": "8363", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8363_b1db4981", + "metric": 3295.7227, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.423906941816355", + "created_time": "2018-05-12 12:27:06.723050+00:00", + "created_time_dt": "2018-05-12T12:27:06.723050", + "duration": "0:00:14", + "end_time": "2018-05-12 12:27:20.746252+00:00", + "hyperdrive_id": "8414", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8414_8f74b802", + "metric": 3295.7372, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.431026531225767", + "created_time": "2018-05-12 12:25:04.908855+00:00", + "created_time_dt": "2018-05-12T12:25:04.908855", + "duration": "0:00:14", + "end_time": "2018-05-12 12:25:19.602114+00:00", + "hyperdrive_id": "8409", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8409_5345e6b2", + "metric": 3295.8375, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.393032827195967", + "created_time": "2018-05-12 12:07:13.020312+00:00", + "created_time_dt": "2018-05-12T12:07:13.020312", + "duration": "0:00:34", + "end_time": "2018-05-12 12:07:47.944409+00:00", + "hyperdrive_id": "8367", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8367_c39107f9", + "metric": 3295.8465, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.436567624426189", + "created_time": "2018-05-12 13:04:49.897871+00:00", + "created_time_dt": "2018-05-12T13:04:49.897871", + "duration": "0:00:14", + "end_time": "2018-05-12 13:05:04.491673+00:00", + "hyperdrive_id": "8470", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8470_a73b2d7b", + "metric": 3295.9462, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.383629052679639", + "created_time": "2018-05-12 12:12:37.531581+00:00", + "created_time_dt": "2018-05-12T12:12:37.531581", + "duration": "0:00:12", + "end_time": "2018-05-12 12:12:50.210199+00:00", + "hyperdrive_id": "8381", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8381_b638e983", + "metric": 3296.0679, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.382111348518354", + "created_time": "2018-05-12 11:55:54.563179+00:00", + "created_time_dt": "2018-05-12T11:55:54.563179", + "duration": "0:00:13", + "end_time": "2018-05-12 11:56:07.888796+00:00", + "hyperdrive_id": "8327", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8327_a045606f", + "metric": 3296.1124, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.381187002593777", + "created_time": "2018-05-12 12:09:12.703228+00:00", + "created_time_dt": "2018-05-12T12:09:12.703228", + "duration": "0:00:17", + "end_time": "2018-05-12 12:09:29.741640+00:00", + "hyperdrive_id": "8373", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8373_053f25c6", + "metric": 3296.1406, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.379109970937149", + "created_time": "2018-05-12 13:02:00.253981+00:00", + "created_time_dt": "2018-05-12T13:02:00.253981", + "duration": "0:00:12", + "end_time": "2018-05-12 13:02:12.909525+00:00", + "hyperdrive_id": "8466", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8466_a379787d", + "metric": 3296.2076, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.448204115660274", + "created_time": "2018-05-12 13:44:11.787530+00:00", + "created_time_dt": "2018-05-12T13:44:11.787530", + "duration": "0:00:13", + "end_time": "2018-05-12 13:44:25.111437+00:00", + "hyperdrive_id": "8514", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8514_3f6ef25a", + "metric": 3296.2587, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.448713062576673", + "created_time": "2018-05-12 12:49:55.612577+00:00", + "created_time_dt": "2018-05-12T12:49:55.612577", + "duration": "0:00:30", + "end_time": "2018-05-12 12:50:26.163813+00:00", + "hyperdrive_id": "8449", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8449_d0363c5b", + "metric": 3296.275, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.451812447966424", + "created_time": "2018-05-12 13:46:02.887675+00:00", + "created_time_dt": "2018-05-12T13:46:02.887675", + "duration": "0:00:13", + "end_time": "2018-05-12 13:46:15.984786+00:00", + "hyperdrive_id": "8515", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8515_131388fa", + "metric": 3296.3782, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.454660892639694", + "created_time": "2018-05-12 12:08:27.411796+00:00", + "created_time_dt": "2018-05-12T12:08:27.411796", + "duration": "0:00:45", + "end_time": "2018-05-12 12:09:12.989012+00:00", + "hyperdrive_id": "8372", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8372_94748f49", + "metric": 3296.4798, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.45772812795942", + "created_time": "2018-05-12 13:15:05.575428+00:00", + "created_time_dt": "2018-05-12T13:15:05.575428", + "duration": "0:00:28", + "end_time": "2018-05-12 13:15:34.181749+00:00", + "hyperdrive_id": "8481", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8481_7b4d7aae", + "metric": 3296.5964, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.459845797045424", + "created_time": "2018-05-12 12:14:11.140060+00:00", + "created_time_dt": "2018-05-12T12:14:11.140060", + "duration": "0:00:13", + "end_time": "2018-05-12 12:14:24.996486+00:00", + "hyperdrive_id": "8386", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8386_f396881b", + "metric": 3296.6811, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.366571747107722", + "created_time": "2018-05-12 12:08:27.253377+00:00", + "created_time_dt": "2018-05-12T12:08:27.253377", + "duration": "0:00:29", + "end_time": "2018-05-12 12:08:56.307309+00:00", + "hyperdrive_id": "8371", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8371_cd392eb6", + "metric": 3296.7127, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.462046659150171", + "created_time": "2018-05-12 12:42:50.088268+00:00", + "created_time_dt": "2018-05-12T12:42:50.088268", + "duration": "0:00:29", + "end_time": "2018-05-12 12:43:19.725900+00:00", + "hyperdrive_id": "8440", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8440_2809b87a", + "metric": 3296.7729, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.466061327723322", + "created_time": "2018-05-12 13:32:48.214641+00:00", + "created_time_dt": "2018-05-12T13:32:48.214641", + "duration": "0:00:33", + "end_time": "2018-05-12 13:33:21.400760+00:00", + "hyperdrive_id": "8502", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8502_f26e1d7a", + "metric": 3296.9498, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.360764179714059", + "created_time": "2018-05-12 12:52:59.773606+00:00", + "created_time_dt": "2018-05-12T12:52:59.773606", + "duration": "0:00:29", + "end_time": "2018-05-12 12:53:29.269383+00:00", + "hyperdrive_id": "8454", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8454_d048fd67", + "metric": 3297.0072, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.357261239177847", + "created_time": "2018-05-12 12:00:06.195143+00:00", + "created_time_dt": "2018-05-12T12:00:06.195143", + "duration": "0:00:15", + "end_time": "2018-05-12 12:00:21.894647+00:00", + "hyperdrive_id": "8343", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8343_4b4ee27d", + "metric": 3297.2039, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.357032645286086", + "created_time": "2018-05-12 12:29:17.822255+00:00", + "created_time_dt": "2018-05-12T12:29:17.822255", + "duration": "0:00:13", + "end_time": "2018-05-12 12:29:31.531549+00:00", + "hyperdrive_id": "8418", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8418_17853174", + "metric": 3297.2173, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.478523449448336", + "created_time": "2018-05-12 12:11:12.128813+00:00", + "created_time_dt": "2018-05-12T12:11:12.128813", + "duration": "0:00:12", + "end_time": "2018-05-12 12:11:24.470896+00:00", + "hyperdrive_id": "8378", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8378_137b1616", + "metric": 3297.5751, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.478800785224639", + "created_time": "2018-05-12 13:48:14.367164+00:00", + "created_time_dt": "2018-05-12T13:48:14.367164", + "duration": "0:00:32", + "end_time": "2018-05-12 13:48:46.834752+00:00", + "hyperdrive_id": "8518", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8518_729c4598", + "metric": 3297.5903, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.344791270146422", + "created_time": "2018-05-12 13:00:23.018949+00:00", + "created_time_dt": "2018-05-12T13:00:23.018949", + "duration": "0:00:31", + "end_time": "2018-05-12 13:00:54.639071+00:00", + "hyperdrive_id": "8464", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8464_7857e9c2", + "metric": 3298.0249, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.338103667119868", + "created_time": "2018-05-12 13:21:55.155823+00:00", + "created_time_dt": "2018-05-12T13:21:55.155823", + "duration": "0:00:30", + "end_time": "2018-05-12 13:22:26.141154+00:00", + "hyperdrive_id": "8489", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8489_b7ffca04", + "metric": 3298.5454, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.332097037995421", + "created_time": "2018-05-12 13:42:09.096805+00:00", + "created_time_dt": "2018-05-12T13:42:09.096805", + "duration": "0:00:18", + "end_time": "2018-05-12 13:42:27.483311+00:00", + "hyperdrive_id": "8511", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8511_ec3cc7c9", + "metric": 3299.0623, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.331939228048221", + "created_time": "2018-05-12 12:45:34.049683+00:00", + "created_time_dt": "2018-05-12T12:45:34.049683", + "duration": "0:00:14", + "end_time": "2018-05-12 12:45:48.102359+00:00", + "hyperdrive_id": "8444", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8444_46d2f35f", + "metric": 3299.0766, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.505093423597719", + "created_time": "2018-05-12 13:18:18.551188+00:00", + "created_time_dt": "2018-05-12T13:18:18.551188", + "duration": "0:00:13", + "end_time": "2018-05-12 13:18:31.915661+00:00", + "hyperdrive_id": "8486", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8486_c437b7f7", + "metric": 3299.2713, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.506815861512464", + "created_time": "2018-05-12 12:27:06.875070+00:00", + "created_time_dt": "2018-05-12T12:27:06.875070", + "duration": "0:00:12", + "end_time": "2018-05-12 12:27:19.538922+00:00", + "hyperdrive_id": "8413", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8413_a969f346", + "metric": 3299.3974, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.509599014162797", + "created_time": "2018-05-12 12:02:10.749542+00:00", + "created_time_dt": "2018-05-12T12:02:10.749542", + "duration": "0:00:22", + "end_time": "2018-05-12 12:02:32.904491+00:00", + "hyperdrive_id": "8351", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8351_08acc5b3", + "metric": 3299.605, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.511754766725497", + "created_time": "2018-05-12 12:31:33.630006+00:00", + "created_time_dt": "2018-05-12T12:31:33.630006", + "duration": "0:00:13", + "end_time": "2018-05-12 12:31:46.932013+00:00", + "hyperdrive_id": "8421", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8421_d66288a6", + "metric": 3299.7693, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.517357778454532", + "created_time": "2018-05-12 13:32:47.999414+00:00", + "created_time_dt": "2018-05-12T13:32:47.999414", + "duration": "0:00:16", + "end_time": "2018-05-12 13:33:04.221248+00:00", + "hyperdrive_id": "8501", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8501_f6988ef5", + "metric": 3300.2095, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.319331797643165", + "created_time": "2018-05-12 13:23:42.824307+00:00", + "created_time_dt": "2018-05-12T13:23:42.824307", + "duration": "0:00:14", + "end_time": "2018-05-12 13:23:56.961000+00:00", + "hyperdrive_id": "8491", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8491_f97ca369", + "metric": 3300.3225, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.319074585293429", + "created_time": "2018-05-12 13:32:34.003659+00:00", + "created_time_dt": "2018-05-12T13:32:34.003659", + "duration": "0:00:14", + "end_time": "2018-05-12 13:32:48.094689+00:00", + "hyperdrive_id": "8500", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8500_d491060c", + "metric": 3300.3502, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.318218570599629", + "created_time": "2018-05-12 12:05:05.927100+00:00", + "created_time_dt": "2018-05-12T12:05:05.927100", + "duration": "0:00:18", + "end_time": "2018-05-12 12:05:23.961623+00:00", + "hyperdrive_id": "8360", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8360_8d3ad717", + "metric": 3300.4432, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.310459620980281", + "created_time": "2018-05-12 13:01:59.639205+00:00", + "created_time_dt": "2018-05-12T13:01:59.639205", + "duration": "0:00:30", + "end_time": "2018-05-12 13:02:29.644690+00:00", + "hyperdrive_id": "8465", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8465_a642fe78", + "metric": 3301.3332, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.308825721895932", + "created_time": "2018-05-12 12:26:00.849703+00:00", + "created_time_dt": "2018-05-12T12:26:00.849703", + "duration": "0:00:14", + "end_time": "2018-05-12 12:26:14.900534+00:00", + "hyperdrive_id": "8411", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8411_59c695f4", + "metric": 3301.5318, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.306982685343673", + "created_time": "2018-05-12 13:25:29.647713+00:00", + "created_time_dt": "2018-05-12T13:25:29.647713", + "duration": "0:00:12", + "end_time": "2018-05-12 13:25:42.574230+00:00", + "hyperdrive_id": "8494", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8494_5b797969", + "metric": 3301.7606, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.306861470957928", + "created_time": "2018-05-12 12:04:02.669871+00:00", + "created_time_dt": "2018-05-12T12:04:02.669871", + "duration": "0:00:11", + "end_time": "2018-05-12 12:04:14.471005+00:00", + "hyperdrive_id": "8357", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8357_d468e6ae", + "metric": 3301.7758, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.539137316483953", + "created_time": "2018-05-12 13:15:05.900228+00:00", + "created_time_dt": "2018-05-12T13:15:05.900228", + "duration": "0:00:43", + "end_time": "2018-05-12 13:15:49.159519+00:00", + "hyperdrive_id": "8482", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8482_d29d0b57", + "metric": 3302.0981, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.54004647672626", + "created_time": "2018-05-12 12:32:49.211373+00:00", + "created_time_dt": "2018-05-12T12:32:49.211373", + "duration": "0:00:14", + "end_time": "2018-05-12 12:33:03.460382+00:00", + "hyperdrive_id": "8423", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8423_88f12348", + "metric": 3302.1828, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.543570780620328", + "created_time": "2018-05-12 12:41:33.851103+00:00", + "created_time_dt": "2018-05-12T12:41:33.851103", + "duration": "0:00:13", + "end_time": "2018-05-12 12:41:47.611145+00:00", + "hyperdrive_id": "8438", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8438_9402e974", + "metric": 3302.5156, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.289360077382501", + "created_time": "2018-05-12 12:06:44.051674+00:00", + "created_time_dt": "2018-05-12T12:06:44.051674", + "duration": "0:00:30", + "end_time": "2018-05-12 12:07:14.422703+00:00", + "hyperdrive_id": "8365", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8365_1677e84b", + "metric": 3304.2101, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.568847163331273", + "created_time": "2018-05-12 13:29:05.740663+00:00", + "created_time_dt": "2018-05-12T13:29:05.740663", + "duration": "0:00:44", + "end_time": "2018-05-12 13:29:49.857716+00:00", + "hyperdrive_id": "8497", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8497_7c3c2de2", + "metric": 3305.0952, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.283304106660311", + "created_time": "2018-05-12 12:17:26.855111+00:00", + "created_time_dt": "2018-05-12T12:17:26.855111", + "duration": "0:00:13", + "end_time": "2018-05-12 12:17:40.339685+00:00", + "hyperdrive_id": "8394", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8394_7d55e673", + "metric": 3305.1657, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.569536449260573", + "created_time": "2018-05-12 12:02:44.927994+00:00", + "created_time_dt": "2018-05-12T12:02:44.927994", + "duration": "0:00:21", + "end_time": "2018-05-12 12:03:06.734426+00:00", + "hyperdrive_id": "8352", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8352_3e2d86ef", + "metric": 3305.1701, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.571288125093237", + "created_time": "2018-05-12 12:23:04.038356+00:00", + "created_time_dt": "2018-05-12T12:23:04.038356", + "duration": "0:00:13", + "end_time": "2018-05-12 12:23:17.679064+00:00", + "hyperdrive_id": "8405", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8405_39ff6a35", + "metric": 3305.3615, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.571422319269393", + "created_time": "2018-05-12 13:29:04.794872+00:00", + "created_time_dt": "2018-05-12T13:29:04.794872", + "duration": "0:00:14", + "end_time": "2018-05-12 13:29:19.297193+00:00", + "hyperdrive_id": "8498", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8498_d7b81242", + "metric": 3305.3762, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.277607345545865", + "created_time": "2018-05-12 12:01:28.481497+00:00", + "created_time_dt": "2018-05-12T12:01:28.481497", + "duration": "0:00:14", + "end_time": "2018-05-12 12:01:42.740559+00:00", + "hyperdrive_id": "8348", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8348_5aef9466", + "metric": 3306.1203, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.275710729664475", + "created_time": "2018-05-12 12:23:03.837019+00:00", + "created_time_dt": "2018-05-12T12:23:03.837019", + "duration": "0:00:13", + "end_time": "2018-05-12 12:23:17.610482+00:00", + "hyperdrive_id": "8406", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8406_223e6751", + "metric": 3306.4502, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.581090311656388", + "created_time": "2018-05-12 13:00:22.858936+00:00", + "created_time_dt": "2018-05-12T13:00:22.858936", + "duration": "0:00:14", + "end_time": "2018-05-12 13:00:37.036926+00:00", + "hyperdrive_id": "8463", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8463_6eabc2c8", + "metric": 3306.4598, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.584829683488196", + "created_time": "2018-05-12 12:16:37.017705+00:00", + "created_time_dt": "2018-05-12T12:16:37.017705", + "duration": "0:00:12", + "end_time": "2018-05-12 12:16:49.387251+00:00", + "hyperdrive_id": "8392", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8392_a2a09d84", + "metric": 3306.8908, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.586916241458208", + "created_time": "2018-05-12 12:55:57.923024+00:00", + "created_time_dt": "2018-05-12T12:55:57.923024", + "duration": "0:00:13", + "end_time": "2018-05-12 12:56:11.355696+00:00", + "hyperdrive_id": "8457", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8457_a77f0d12", + "metric": 3307.134, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.590137507820594", + "created_time": "2018-05-12 12:04:42.201135+00:00", + "created_time_dt": "2018-05-12T12:04:42.201135", + "duration": "0:00:26", + "end_time": "2018-05-12 12:05:08.306074+00:00", + "hyperdrive_id": "8358", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8358_dbe58158", + "metric": 3307.5135, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.594865526074826", + "created_time": "2018-05-12 12:29:17.694994+00:00", + "created_time_dt": "2018-05-12T12:29:17.694994", + "duration": "0:00:12", + "end_time": "2018-05-12 12:29:30.169420+00:00", + "hyperdrive_id": "8417", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8417_badf4e15", + "metric": 3308.079, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.597001755207444", + "created_time": "2018-05-12 12:32:49.199564+00:00", + "created_time_dt": "2018-05-12T12:32:49.199564", + "duration": "0:00:13", + "end_time": "2018-05-12 12:33:03.126949+00:00", + "hyperdrive_id": "8424", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8424_54ed03ea", + "metric": 3308.3377, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.598431037514433", + "created_time": "2018-05-12 12:37:43.626336+00:00", + "created_time_dt": "2018-05-12T12:37:43.626336", + "duration": "0:00:31", + "end_time": "2018-05-12 12:38:15.527333+00:00", + "hyperdrive_id": "8432", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8432_46629b39", + "metric": 3308.512, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.599236029376656", + "created_time": "2018-05-12 11:57:07.392595+00:00", + "created_time_dt": "2018-05-12T11:57:07.392595", + "duration": "0:00:41", + "end_time": "2018-05-12 11:57:48.659976+00:00", + "hyperdrive_id": "8333", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8333_52ae9b26", + "metric": 3308.6105, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.601084397755814", + "created_time": "2018-05-12 11:57:34.200139+00:00", + "created_time_dt": "2018-05-12T11:57:34.200139", + "duration": "0:01:23", + "end_time": "2018-05-12 11:58:58.143110+00:00", + "hyperdrive_id": "8335", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8335_cce61bd6", + "metric": 3308.8378, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.262097025735337", + "created_time": "2018-05-12 12:19:16.917910+00:00", + "created_time_dt": "2018-05-12T12:19:16.917910", + "duration": "0:00:13", + "end_time": "2018-05-12 12:19:30.324225+00:00", + "hyperdrive_id": "8397", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8397_dd81df0e", + "metric": 3309.0036, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.604705870252792", + "created_time": "2018-05-12 13:48:00.448137+00:00", + "created_time_dt": "2018-05-12T13:48:00.448137", + "duration": "0:00:13", + "end_time": "2018-05-12 13:48:14.174291+00:00", + "hyperdrive_id": "8516", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8516_b74e4c8b", + "metric": 3309.2874, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.260351772485082", + "created_time": "2018-05-12 12:40:16.140757+00:00", + "created_time_dt": "2018-05-12T12:40:16.140757", + "duration": "0:00:29", + "end_time": "2018-05-12 12:40:46.115691+00:00", + "hyperdrive_id": "8435", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8435_f6976bb9", + "metric": 3309.355, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.260086732484324", + "created_time": "2018-05-12 12:23:59.882354+00:00", + "created_time_dt": "2018-05-12T12:23:59.882354", + "duration": "0:00:14", + "end_time": "2018-05-12 12:24:14.855275+00:00", + "hyperdrive_id": "8408", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8408_9c5acb0f", + "metric": 3309.4088, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.258916574615307", + "created_time": "2018-05-12 13:36:21.596115+00:00", + "created_time_dt": "2018-05-12T13:36:21.596115", + "duration": "0:00:50", + "end_time": "2018-05-12 13:37:12.123919+00:00", + "hyperdrive_id": "8504", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8504_0c6a00e8", + "metric": 3309.6482, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.611064223594442", + "created_time": "2018-05-12 12:44:18.250013+00:00", + "created_time_dt": "2018-05-12T12:44:18.250013", + "duration": "0:00:29", + "end_time": "2018-05-12 12:44:47.391810+00:00", + "hyperdrive_id": "8442", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8442_abd96bbf", + "metric": 3310.0902, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.256461605043284", + "created_time": "2018-05-12 12:54:31.251204+00:00", + "created_time_dt": "2018-05-12T12:54:31.251204", + "duration": "0:00:29", + "end_time": "2018-05-12 12:55:00.767057+00:00", + "hyperdrive_id": "8455", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8455_b1c25c7c", + "metric": 3310.1585, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.612949039336068", + "created_time": "2018-05-12 12:41:33.890361+00:00", + "created_time_dt": "2018-05-12T12:41:33.890361", + "duration": "0:00:30", + "end_time": "2018-05-12 12:42:04.062774+00:00", + "hyperdrive_id": "8437", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8437_6c50e94e", + "metric": 3310.3315, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.255526064542372", + "created_time": "2018-05-12 13:21:55.194489+00:00", + "created_time_dt": "2018-05-12T13:21:55.194489", + "duration": "0:00:13", + "end_time": "2018-05-12 13:22:08.712290+00:00", + "hyperdrive_id": "8488", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8488_c295f3e2", + "metric": 3310.356, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.2549933776509", + "created_time": "2018-05-12 13:52:12.028365+00:00", + "created_time_dt": "2018-05-12T13:52:12.028365", + "duration": "0:00:13", + "end_time": "2018-05-12 13:52:25.432736+00:00", + "hyperdrive_id": "8520", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8520_0caa9d1d", + "metric": 3310.4692, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.614709777900403", + "created_time": "2018-05-12 13:18:18.431023+00:00", + "created_time_dt": "2018-05-12T13:18:18.431023", + "duration": "0:00:30", + "end_time": "2018-05-12 13:18:48.651732+00:00", + "hyperdrive_id": "8485", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8485_4590c651", + "metric": 3310.5581, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.253919017785107", + "created_time": "2018-05-12 13:39:56.741734+00:00", + "created_time_dt": "2018-05-12T13:39:56.741734", + "duration": "0:01:03", + "end_time": "2018-05-12 13:41:00.308668+00:00", + "hyperdrive_id": "8510", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8510_49564c20", + "metric": 3310.699, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.616902331561374", + "created_time": "2018-05-12 11:56:01.551240+00:00", + "created_time_dt": "2018-05-12T11:56:01.551240", + "duration": "0:01:12", + "end_time": "2018-05-12 11:57:13.590773+00:00", + "hyperdrive_id": "8330", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8330_f4b61ef0", + "metric": 3310.8422, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.25086514487739", + "created_time": "2018-05-12 13:36:21.728793+00:00", + "created_time_dt": "2018-05-12T13:36:21.728793", + "duration": "0:00:33", + "end_time": "2018-05-12 13:36:54.777053+00:00", + "hyperdrive_id": "8505", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8505_cea12178", + "metric": 3311.3645, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.621860856811298", + "created_time": "2018-05-12 12:33:54.903277+00:00", + "created_time_dt": "2018-05-12T12:33:54.903277", + "duration": "0:00:13", + "end_time": "2018-05-12 12:34:08.188676+00:00", + "hyperdrive_id": "8426", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8426_10eb543c", + "metric": 3311.4917, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.24900598383144", + "created_time": "2018-05-12 12:13:25.069763+00:00", + "created_time_dt": "2018-05-12T12:13:25.069763", + "duration": "0:00:25", + "end_time": "2018-05-12 12:13:50.748571+00:00", + "hyperdrive_id": "8383", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8383_c3046cd8", + "metric": 3311.7784, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.6241675621646", + "created_time": "2018-05-12 12:06:44.216070+00:00", + "created_time_dt": "2018-05-12T12:06:44.216070", + "duration": "0:00:12", + "end_time": "2018-05-12 12:06:56.959070+00:00", + "hyperdrive_id": "8366", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8366_f69aa45d", + "metric": 3311.7972, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.625703892831805", + "created_time": "2018-05-12 12:03:08.955336+00:00", + "created_time_dt": "2018-05-12T12:03:08.955336", + "duration": "0:00:31", + "end_time": "2018-05-12 12:03:40.562848+00:00", + "hyperdrive_id": "8353", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8353_a9ca61ba", + "metric": 3312.0018, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.628276137144992", + "created_time": "2018-05-12 13:10:08.091192+00:00", + "created_time_dt": "2018-05-12T13:10:08.091192", + "duration": "0:00:13", + "end_time": "2018-05-12 13:10:21.613075+00:00", + "hyperdrive_id": "8475", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8475_849afdb5", + "metric": 3312.3465, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.628845460109139", + "created_time": "2018-05-12 12:57:35.276994+00:00", + "created_time_dt": "2018-05-12T12:57:35.276994", + "duration": "0:00:13", + "end_time": "2018-05-12 12:57:48.651279+00:00", + "hyperdrive_id": "8459", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8459_6791263c", + "metric": 3312.4231, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.63976851775956", + "created_time": "2018-05-12 13:11:47.176422+00:00", + "created_time_dt": "2018-05-12T13:11:47.176422", + "duration": "0:00:44", + "end_time": "2018-05-12 13:12:31.806029+00:00", + "hyperdrive_id": "8477", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8477_e415c4e4", + "metric": 3313.9173, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.64333587616801", + "created_time": "2018-05-12 11:58:56.044779+00:00", + "created_time_dt": "2018-05-12T11:58:56.044779", + "duration": "0:01:09", + "end_time": "2018-05-12 12:00:05.332499+00:00", + "hyperdrive_id": "8339", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8339_de3651e2", + "metric": 3314.4149, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.651378196123682", + "created_time": "2018-05-12 12:01:04.756275+00:00", + "created_time_dt": "2018-05-12T12:01:04.756275", + "duration": "0:00:53", + "end_time": "2018-05-12 12:01:58.228294+00:00", + "hyperdrive_id": "8346", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8346_bd6c414b", + "metric": 3315.5536, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.652689173629813", + "created_time": "2018-05-12 12:39:00.282391+00:00", + "created_time_dt": "2018-05-12T12:39:00.282391", + "duration": "0:00:12", + "end_time": "2018-05-12 12:39:13.084499+00:00", + "hyperdrive_id": "8434", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8434_8af5e86e", + "metric": 3315.7414, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.231071484600443", + "created_time": "2018-05-12 11:55:54.528694+00:00", + "created_time_dt": "2018-05-12T11:55:54.528694", + "duration": "0:00:29", + "end_time": "2018-05-12 11:56:23.827901+00:00", + "hyperdrive_id": "8328", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8328_21815c4b", + "metric": 3316.1244, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.227069222503092", + "created_time": "2018-05-12 12:35:12.203475+00:00", + "created_time_dt": "2018-05-12T12:35:12.203475", + "duration": "0:00:13", + "end_time": "2018-05-12 12:35:25.412676+00:00", + "hyperdrive_id": "8428", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8428_4bc7ce29", + "metric": 3317.1849, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.223418940687153", + "created_time": "2018-05-12 12:58:56.432059+00:00", + "created_time_dt": "2018-05-12T12:58:56.432059", + "duration": "0:00:14", + "end_time": "2018-05-12 12:59:10.619204+00:00", + "hyperdrive_id": "8461", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8461_cd6afd47", + "metric": 3318.1821, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.670450207932727", + "created_time": "2018-05-12 11:59:52.831246+00:00", + "created_time_dt": "2018-05-12T11:59:52.831246", + "duration": "0:00:44", + "end_time": "2018-05-12 12:00:37.246166+00:00", + "hyperdrive_id": "8342", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8342_e752fb63", + "metric": 3318.3439, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.222216466573058", + "created_time": "2018-05-12 12:10:27.604728+00:00", + "created_time_dt": "2018-05-12T12:10:27.604728", + "duration": "0:02:02", + "end_time": "2018-05-12 12:12:30.373183+00:00", + "hyperdrive_id": "8377", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8377_75383e3d", + "metric": 3318.517, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.222056103824308", + "created_time": "2018-05-12 12:01:04.816005+00:00", + "created_time_dt": "2018-05-12T12:01:04.816005", + "duration": "0:00:23", + "end_time": "2018-05-12 12:01:28.176392+00:00", + "hyperdrive_id": "8347", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8347_796e4acb", + "metric": 3318.5619, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.672122563818468", + "created_time": "2018-05-12 12:49:55.647218+00:00", + "created_time_dt": "2018-05-12T12:49:55.647218", + "duration": "0:00:13", + "end_time": "2018-05-12 12:50:09.437568+00:00", + "hyperdrive_id": "8450", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8450_4d73ae11", + "metric": 3318.5944, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.674219319793153", + "created_time": "2018-05-12 12:06:19.493627+00:00", + "created_time_dt": "2018-05-12T12:06:19.493627", + "duration": "0:00:14", + "end_time": "2018-05-12 12:06:33.612828+00:00", + "hyperdrive_id": "8364", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8364_616bf29d", + "metric": 3318.9097, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.676816186158726", + "created_time": "2018-05-12 11:57:20.761449+00:00", + "created_time_dt": "2018-05-12T11:57:20.761449", + "duration": "0:00:45", + "end_time": "2018-05-12 11:58:06.670890+00:00", + "hyperdrive_id": "8334", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8334_59867097", + "metric": 3319.3021, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.677040806205791", + "created_time": "2018-05-12 12:42:49.923029+00:00", + "created_time_dt": "2018-05-12T12:42:49.923029", + "duration": "0:00:14", + "end_time": "2018-05-12 12:43:04.705488+00:00", + "hyperdrive_id": "8439", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8439_ef9bc000", + "metric": 3319.3362, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.217998990024581", + "created_time": "2018-05-12 13:07:59.395985+00:00", + "created_time_dt": "2018-05-12T13:07:59.395985", + "duration": "0:00:46", + "end_time": "2018-05-12 13:08:45.534751+00:00", + "hyperdrive_id": "8472", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8472_350a8911", + "metric": 3319.7167, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.6799503791279", + "created_time": "2018-05-12 13:04:49.831744+00:00", + "created_time_dt": "2018-05-12T13:04:49.831744", + "duration": "0:00:30", + "end_time": "2018-05-12 13:05:20.589562+00:00", + "hyperdrive_id": "8469", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8469_8ff01e12", + "metric": 3319.7786, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.685038562031804", + "created_time": "2018-05-12 12:01:57.377138+00:00", + "created_time_dt": "2018-05-12T12:01:57.377138", + "duration": "0:00:18", + "end_time": "2018-05-12 12:02:16.176403+00:00", + "hyperdrive_id": "8349", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8349_9dd0edcd", + "metric": 3320.5588, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.213186744612845", + "created_time": "2018-05-12 11:59:28.796665+00:00", + "created_time_dt": "2018-05-12T11:59:28.796665", + "duration": "0:00:20", + "end_time": "2018-05-12 11:59:48.832949+00:00", + "hyperdrive_id": "8341", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8341_8a47e3c3", + "metric": 3321.1344, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.21256169431142", + "created_time": "2018-05-12 12:14:55.830300+00:00", + "created_time_dt": "2018-05-12T12:14:55.830300", + "duration": "0:00:13", + "end_time": "2018-05-12 12:15:08.838193+00:00", + "hyperdrive_id": "8387", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8387_4de71177", + "metric": 3321.3224, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.691031495877125", + "created_time": "2018-05-12 11:59:14.526241+00:00", + "created_time_dt": "2018-05-12T11:59:14.526241", + "duration": "0:00:17", + "end_time": "2018-05-12 11:59:31.757101+00:00", + "hyperdrive_id": "8340", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8340_f729e8a8", + "metric": 3321.4879, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.211515835622294", + "created_time": "2018-05-12 13:56:22.463994+00:00", + "created_time_dt": "2018-05-12T13:56:22.463994", + "duration": "0:00:14", + "end_time": "2018-05-12 13:56:36.879820+00:00", + "hyperdrive_id": "8526", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8526_fc03d679", + "metric": 3321.639, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.694531075877101", + "created_time": "2018-05-12 13:03:26.709852+00:00", + "created_time_dt": "2018-05-12T13:03:26.709852", + "duration": "0:00:30", + "end_time": "2018-05-12 13:03:57.298099+00:00", + "hyperdrive_id": "8467", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8467_820bf267", + "metric": 3322.0354, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.696647020776985", + "created_time": "2018-05-12 12:17:26.399107+00:00", + "created_time_dt": "2018-05-12T12:17:26.399107", + "duration": "0:00:13", + "end_time": "2018-05-12 12:17:39.855186+00:00", + "hyperdrive_id": "8393", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8393_e269fd60", + "metric": 3322.3682, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.207668270269089", + "created_time": "2018-05-12 12:23:59.802255+00:00", + "created_time_dt": "2018-05-12T12:23:59.802255", + "duration": "0:00:14", + "end_time": "2018-05-12 12:24:13.954962+00:00", + "hyperdrive_id": "8407", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8407_f600791c", + "metric": 3322.8255, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.704450171518138", + "created_time": "2018-05-12 12:51:33.047182+00:00", + "created_time_dt": "2018-05-12T12:51:33.047182", + "duration": "0:00:13", + "end_time": "2018-05-12 12:51:46.189745+00:00", + "hyperdrive_id": "8452", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8452_7a96c125", + "metric": 3323.6069, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.204555124314782", + "created_time": "2018-05-12 11:55:54.722551+00:00", + "created_time_dt": "2018-05-12T11:55:54.722551", + "duration": "0:00:45", + "end_time": "2018-05-12 11:56:40.533180+00:00", + "hyperdrive_id": "8329", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8329_3f426453", + "metric": 3323.811, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.203734687408454", + "created_time": "2018-05-12 12:07:13.094366+00:00", + "created_time_dt": "2018-05-12T12:07:13.094366", + "duration": "0:00:18", + "end_time": "2018-05-12 12:07:31.308677+00:00", + "hyperdrive_id": "8368", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8368_cb6c8945", + "metric": 3324.0745, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.709476658707224", + "created_time": "2018-05-12 12:11:12.061752+00:00", + "created_time_dt": "2018-05-12T12:11:12.061752", + "duration": "0:00:28", + "end_time": "2018-05-12 12:11:40.328129+00:00", + "hyperdrive_id": "8379", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8379_c9ae195c", + "metric": 3324.4139, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.202618610645979", + "created_time": "2018-05-12 12:52:59.522663+00:00", + "created_time_dt": "2018-05-12T12:52:59.522663", + "duration": "0:00:13", + "end_time": "2018-05-12 12:53:12.744129+00:00", + "hyperdrive_id": "8453", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8453_2704afe0", + "metric": 3324.4356, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.710489985561568", + "created_time": "2018-05-12 12:13:25.214412+00:00", + "created_time_dt": "2018-05-12T12:13:25.214412", + "duration": "0:00:13", + "end_time": "2018-05-12 12:13:38.794172+00:00", + "hyperdrive_id": "8384", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8384_29ac75d7", + "metric": 3324.5775, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.201388195391872", + "created_time": "2018-05-12 12:11:57.134173+00:00", + "created_time_dt": "2018-05-12T12:11:57.134173", + "duration": "0:00:12", + "end_time": "2018-05-12 12:12:09.855865+00:00", + "hyperdrive_id": "8380", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8380_0393567a", + "metric": 3324.8372, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.199904360954639", + "created_time": "2018-05-12 12:05:06.281666+00:00", + "created_time_dt": "2018-05-12T12:05:06.281666", + "duration": "0:00:35", + "end_time": "2018-05-12 12:05:41.415671+00:00", + "hyperdrive_id": "8361", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8361_790554e0", + "metric": 3325.3264, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.715946699073426", + "created_time": "2018-05-12 13:50:03.986351+00:00", + "created_time_dt": "2018-05-12T13:50:03.986351", + "duration": "0:00:13", + "end_time": "2018-05-12 13:50:17.671869+00:00", + "hyperdrive_id": "8519", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8519_b45b95fe", + "metric": 3325.4631, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.716295824315661", + "created_time": "2018-05-12 12:22:03.607952+00:00", + "created_time_dt": "2018-05-12T12:22:03.607952", + "duration": "0:00:12", + "end_time": "2018-05-12 12:22:16.329865+00:00", + "hyperdrive_id": "8403", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8403_111315cc", + "metric": 3325.5201, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.721309232403544", + "created_time": "2018-05-12 12:09:47.802542+00:00", + "created_time_dt": "2018-05-12T12:09:47.802542", + "duration": "0:00:12", + "end_time": "2018-05-12 12:10:00.639613+00:00", + "hyperdrive_id": "8375", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8375_083d89cd", + "metric": 3326.3413, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.722171183492785", + "created_time": "2018-05-12 13:56:22.539569+00:00", + "created_time_dt": "2018-05-12T13:56:22.539569", + "duration": "0:00:29", + "end_time": "2018-05-12 13:56:52.506291+00:00", + "hyperdrive_id": "8524", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8524_80f88880", + "metric": 3326.4832, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.726313386025429", + "created_time": "2018-05-12 12:44:18.163495+00:00", + "created_time_dt": "2018-05-12T12:44:18.163495", + "duration": "0:00:13", + "end_time": "2018-05-12 12:44:31.168855+00:00", + "hyperdrive_id": "8441", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8441_020fdf69", + "metric": 3327.1678, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.72930361168177", + "created_time": "2018-05-12 13:25:30.013247+00:00", + "created_time_dt": "2018-05-12T13:25:30.013247", + "duration": "0:00:50", + "end_time": "2018-05-12 13:26:20.789242+00:00", + "hyperdrive_id": "8492", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8492_c360744c", + "metric": 3327.6647, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.735331084674993", + "created_time": "2018-05-12 12:19:17.001645+00:00", + "created_time_dt": "2018-05-12T12:19:17.001645", + "duration": "0:00:12", + "end_time": "2018-05-12 12:19:29.055472+00:00", + "hyperdrive_id": "8398", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8398_6057c4b8", + "metric": 3328.6733, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.741333101612688", + "created_time": "2018-05-12 12:30:24.252672+00:00", + "created_time_dt": "2018-05-12T12:30:24.252672", + "duration": "0:00:16", + "end_time": "2018-05-12 12:30:40.316267+00:00", + "hyperdrive_id": "8420", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8420_6adb8838", + "metric": 3329.6868, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.742368394312335", + "created_time": "2018-05-12 13:48:00.662005+00:00", + "created_time_dt": "2018-05-12T13:48:00.662005", + "duration": "0:00:29", + "end_time": "2018-05-12 13:48:29.733162+00:00", + "hyperdrive_id": "8517", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8517_01c069cd", + "metric": 3329.8625, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.74451533044112", + "created_time": "2018-05-12 12:00:06.297211+00:00", + "created_time_dt": "2018-05-12T12:00:06.297211", + "duration": "0:01:05", + "end_time": "2018-05-12 12:01:11.612251+00:00", + "hyperdrive_id": "8344", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8344_2036cd0a", + "metric": 3330.2276, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.750274984006123", + "created_time": "2018-05-12 12:39:00.160319+00:00", + "created_time_dt": "2018-05-12T12:39:00.160319", + "duration": "0:00:29", + "end_time": "2018-05-12 12:39:29.832125+00:00", + "hyperdrive_id": "8433", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8433_2238faf6", + "metric": 3331.2128, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.757819319976897", + "created_time": "2018-05-12 12:21:08.554079+00:00", + "created_time_dt": "2018-05-12T12:21:08.554079", + "duration": "0:00:13", + "end_time": "2018-05-12 12:21:21.958887+00:00", + "hyperdrive_id": "8402", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8402_b2533d98", + "metric": 3332.5151, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.765019942269395", + "created_time": "2018-05-12 12:31:33.508670+00:00", + "created_time_dt": "2018-05-12T12:31:33.508670", + "duration": "0:00:12", + "end_time": "2018-05-12 12:31:46.245844+00:00", + "hyperdrive_id": "8422", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8422_a004deac", + "metric": 3333.7701, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.772240783940475", + "created_time": "2018-05-12 13:11:46.536678+00:00", + "created_time_dt": "2018-05-12T13:11:46.536678", + "duration": "0:00:13", + "end_time": "2018-05-12 13:12:00.351416+00:00", + "hyperdrive_id": "8478", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8478_f9b60072", + "metric": 3335.0403, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.775376593821348", + "created_time": "2018-05-12 11:57:51.483491+00:00", + "created_time_dt": "2018-05-12T11:57:51.483491", + "duration": "0:00:49", + "end_time": "2018-05-12 11:58:41.345056+00:00", + "hyperdrive_id": "8336", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8336_8d9fcbf2", + "metric": 3335.5954, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.776902840582832", + "created_time": "2018-05-12 13:16:42.209279+00:00", + "created_time_dt": "2018-05-12T13:16:42.209279", + "duration": "0:00:12", + "end_time": "2018-05-12 13:16:54.766193+00:00", + "hyperdrive_id": "8483", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8483_a61bd458", + "metric": 3335.8663, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.787404580220175", + "created_time": "2018-05-12 13:44:16.718411+00:00", + "created_time_dt": "2018-05-12T13:44:16.718411", + "duration": "0:00:41", + "end_time": "2018-05-12 13:44:58.084345+00:00", + "hyperdrive_id": "8513", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8513_9bff58a2", + "metric": 3337.7437, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.789523827771144", + "created_time": "2018-05-12 13:20:06.458660+00:00", + "created_time_dt": "2018-05-12T13:20:06.458660", + "duration": "0:00:14", + "end_time": "2018-05-12 13:20:21.078795+00:00", + "hyperdrive_id": "8487", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8487_ff1fa053", + "metric": 3338.1253, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.802056733580286", + "created_time": "2018-05-12 11:58:06.769725+00:00", + "created_time_dt": "2018-05-12T11:58:06.769725", + "duration": "0:00:17", + "end_time": "2018-05-12 11:58:23.830337+00:00", + "hyperdrive_id": "8337", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8337_27db767d", + "metric": 3340.4001, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.158706098065124", + "created_time": "2018-05-12 13:07:59.714140+00:00", + "created_time_dt": "2018-05-12T13:07:59.714140", + "duration": "0:00:12", + "end_time": "2018-05-12 13:08:12.356448+00:00", + "hyperdrive_id": "8471", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8471_4684fafc", + "metric": 3341.1883, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.81666319772656", + "created_time": "2018-05-12 12:57:35.142880+00:00", + "created_time_dt": "2018-05-12T12:57:35.142880", + "duration": "0:00:29", + "end_time": "2018-05-12 12:58:05.114772+00:00", + "hyperdrive_id": "8460", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8460_a5bfa8fb", + "metric": 3343.0888, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.153939964737971", + "created_time": "2018-05-12 12:26:01.040538+00:00", + "created_time_dt": "2018-05-12T12:26:01.040538", + "duration": "0:00:13", + "end_time": "2018-05-12 12:26:14.946367+00:00", + "hyperdrive_id": "8412", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8412_f38b79e1", + "metric": 3343.3287, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.818247357781626", + "created_time": "2018-05-12 12:51:32.713599+00:00", + "created_time_dt": "2018-05-12T12:51:32.713599", + "duration": "0:00:28", + "end_time": "2018-05-12 12:52:01.253396+00:00", + "hyperdrive_id": "8451", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8451_02f74578", + "metric": 3343.3827, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.819893819434544", + "created_time": "2018-05-12 11:58:31.362582+00:00", + "created_time_dt": "2018-05-12T11:58:31.362582", + "duration": "0:00:43", + "end_time": "2018-05-12 11:59:15.122246+00:00", + "hyperdrive_id": "8338", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8338_1d2f97a7", + "metric": 3343.6887, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.152741652399469", + "created_time": "2018-05-12 12:16:36.776970+00:00", + "created_time_dt": "2018-05-12T12:16:36.776970", + "duration": "0:00:12", + "end_time": "2018-05-12 12:16:49.408701+00:00", + "hyperdrive_id": "8391", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8391_4fee6774", + "metric": 3343.8776, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.821590167168393", + "created_time": "2018-05-12 13:27:06.766331+00:00", + "created_time_dt": "2018-05-12T13:27:06.766331", + "duration": "0:00:13", + "end_time": "2018-05-12 13:27:20.387070+00:00", + "hyperdrive_id": "8495", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8495_d4f07494", + "metric": 3344.0045, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.15225619855891", + "created_time": "2018-05-12 13:39:56.625205+00:00", + "created_time_dt": "2018-05-12T13:39:56.625205", + "duration": "0:00:13", + "end_time": "2018-05-12 13:40:10.343274+00:00", + "hyperdrive_id": "8509", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8509_0ced32a8", + "metric": 3344.1012, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.826290872584932", + "created_time": "2018-05-12 12:48:28.934533+00:00", + "created_time_dt": "2018-05-12T12:48:28.934533", + "duration": "0:00:13", + "end_time": "2018-05-12 12:48:41.996203+00:00", + "hyperdrive_id": "8447", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8447_cc33a2e2", + "metric": 3344.8821, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.150287304842608", + "created_time": "2018-05-12 12:20:11.826010+00:00", + "created_time_dt": "2018-05-12T12:20:11.826010", + "duration": "0:00:14", + "end_time": "2018-05-12 12:20:26.700669+00:00", + "hyperdrive_id": "8400", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8400_fb9de45f", + "metric": 3345.0153, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.827917639441571", + "created_time": "2018-05-12 13:30:45.754478+00:00", + "created_time_dt": "2018-05-12T13:30:45.754478", + "duration": "0:00:17", + "end_time": "2018-05-12 13:31:03.690041+00:00", + "hyperdrive_id": "8499", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8499_5da756fc", + "metric": 3345.1866, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.831400494081907", + "created_time": "2018-05-12 13:25:29.760666+00:00", + "created_time_dt": "2018-05-12T13:25:29.760666", + "duration": "0:00:28", + "end_time": "2018-05-12 13:25:58.618049+00:00", + "hyperdrive_id": "8493", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8493_1067756d", + "metric": 3345.8403, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.143633113372336", + "created_time": "2018-05-12 12:15:41.187737+00:00", + "created_time_dt": "2018-05-12T12:15:41.187737", + "duration": "0:00:13", + "end_time": "2018-05-12 12:15:54.635649+00:00", + "hyperdrive_id": "8390", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8390_6bb4c33e", + "metric": 3348.1929, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.843864112354753", + "created_time": "2018-05-12 13:11:46.478245+00:00", + "created_time_dt": "2018-05-12T13:11:46.478245", + "duration": "0:00:30", + "end_time": "2018-05-12 13:12:16.864318+00:00", + "hyperdrive_id": "8476", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8476_7d515d12", + "metric": 3348.1958, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.844834242372722", + "created_time": "2018-05-12 13:34:34.862819+00:00", + "created_time_dt": "2018-05-12T13:34:34.862819", + "duration": "0:00:18", + "end_time": "2018-05-12 13:34:53.000220+00:00", + "hyperdrive_id": "8503", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8503_22cc2459", + "metric": 3348.3802, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.847189528199022", + "created_time": "2018-05-12 12:05:30.243006+00:00", + "created_time_dt": "2018-05-12T12:05:30.243006", + "duration": "0:00:27", + "end_time": "2018-05-12 12:05:57.958523+00:00", + "hyperdrive_id": "8362", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8362_507ea67e", + "metric": 3348.8285, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.847412052270933", + "created_time": "2018-05-12 13:52:12.711262+00:00", + "created_time_dt": "2018-05-12T13:52:12.711262", + "duration": "0:00:29", + "end_time": "2018-05-12 13:52:41.855615+00:00", + "hyperdrive_id": "8522", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8522_49d2012a", + "metric": 3348.8709, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.849158168297161", + "created_time": "2018-05-12 12:35:11.088836+00:00", + "created_time_dt": "2018-05-12T12:35:11.088836", + "duration": "0:00:29", + "end_time": "2018-05-12 12:35:40.836866+00:00", + "hyperdrive_id": "8427", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8427_c559cbd5", + "metric": 3349.2039, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.858392736114089", + "created_time": "2018-05-12 12:10:27.136482+00:00", + "created_time_dt": "2018-05-12T12:10:27.136482", + "duration": "0:00:13", + "end_time": "2018-05-12 12:10:40.337157+00:00", + "hyperdrive_id": "8376", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8376_eae11b68", + "metric": 3350.9728, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.858806435401327", + "created_time": "2018-05-12 12:18:21.437029+00:00", + "created_time_dt": "2018-05-12T12:18:21.437029", + "duration": "0:00:15", + "end_time": "2018-05-12 12:18:37.238879+00:00", + "hyperdrive_id": "8395", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8395_7ad8e7cc", + "metric": 3351.0523, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.870882618277203", + "created_time": "2018-05-12 12:54:31.035593+00:00", + "created_time_dt": "2018-05-12T12:54:31.035593", + "duration": "0:00:13", + "end_time": "2018-05-12 12:54:44.090425+00:00", + "hyperdrive_id": "8456", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8456_6948cbcc", + "metric": 3353.3854, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.870957972314387", + "created_time": "2018-05-12 12:03:37.974468+00:00", + "created_time_dt": "2018-05-12T12:03:37.974468", + "duration": "0:00:54", + "end_time": "2018-05-12 12:04:32.645363+00:00", + "hyperdrive_id": "8356", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8356_c89bedd1", + "metric": 3353.4, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.874263482882214", + "created_time": "2018-05-12 13:44:12.039777+00:00", + "created_time_dt": "2018-05-12T13:44:12.039777", + "duration": "0:00:28", + "end_time": "2018-05-12 13:44:40.898814+00:00", + "hyperdrive_id": "8512", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8512_c8b83bf0", + "metric": 3354.0423, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.874529541822094", + "created_time": "2018-05-12 13:39:56.277448+00:00", + "created_time_dt": "2018-05-12T13:39:56.277448", + "duration": "0:00:29", + "end_time": "2018-05-12 13:40:25.570608+00:00", + "hyperdrive_id": "8507", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8507_1cd72ece", + "metric": 3354.094, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.875032535947787", + "created_time": "2018-05-12 12:03:09.088430+00:00", + "created_time_dt": "2018-05-12T12:03:09.088430", + "duration": "0:00:14", + "end_time": "2018-05-12 12:03:23.413857+00:00", + "hyperdrive_id": "8354", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8354_ba9b01e8", + "metric": 3354.1919, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.131482857415543", + "created_time": "2018-05-12 12:00:40.831510+00:00", + "created_time_dt": "2018-05-12T12:00:40.831510", + "duration": "0:00:13", + "end_time": "2018-05-12 12:00:54.473484+00:00", + "hyperdrive_id": "8345", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8345_e90e298a", + "metric": 3354.3567, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.876760429991959", + "created_time": "2018-05-12 11:56:29.419120+00:00", + "created_time_dt": "2018-05-12T11:56:29.419120", + "duration": "0:01:00", + "end_time": "2018-05-12 11:57:29.880644+00:00", + "hyperdrive_id": "8331", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8331_23196033", + "metric": 3354.5285, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.879527886321761", + "created_time": "2018-05-12 12:48:29.201551+00:00", + "created_time_dt": "2018-05-12T12:48:29.201551", + "duration": "0:00:28", + "end_time": "2018-05-12 12:48:57.994549+00:00", + "hyperdrive_id": "8448", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8448_0adee24d", + "metric": 3355.0683, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.882185892591819", + "created_time": "2018-05-12 12:36:28.013382+00:00", + "created_time_dt": "2018-05-12T12:36:28.013382", + "duration": "0:00:12", + "end_time": "2018-05-12 12:36:40.470870+00:00", + "hyperdrive_id": "8429", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8429_38aa4de9", + "metric": 3355.5878, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.898540073965212", + "created_time": "2018-05-12 13:52:12.001244+00:00", + "created_time_dt": "2018-05-12T13:52:12.001244", + "duration": "0:01:56", + "end_time": "2018-05-12 13:54:08.239396+00:00", + "hyperdrive_id": "8521", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8521_bdd8810c", + "metric": 3358.8047, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.898790550820804", + "created_time": "2018-05-12 12:45:34.119530+00:00", + "created_time_dt": "2018-05-12T12:45:34.119530", + "duration": "0:00:29", + "end_time": "2018-05-12 12:46:03.683003+00:00", + "hyperdrive_id": "8443", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8443_6ecf5b63", + "metric": 3358.8543, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.123019805934679", + "created_time": "2018-05-12 13:18:18.097594+00:00", + "created_time_dt": "2018-05-12T13:18:18.097594", + "duration": "0:00:48", + "end_time": "2018-05-12 13:19:06.174482+00:00", + "hyperdrive_id": "8484", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8484_90ffa59e", + "metric": 3358.9367, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.909619395012488", + "created_time": "2018-05-12 12:04:37.269556+00:00", + "created_time_dt": "2018-05-12T12:04:37.269556", + "duration": "0:00:13", + "end_time": "2018-05-12 12:04:50.677702+00:00", + "hyperdrive_id": "8359", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8359_bec7b5c7", + "metric": 3361.0033, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.116537281119354", + "created_time": "2018-05-12 12:25:05.224578+00:00", + "created_time_dt": "2018-05-12T12:25:05.224578", + "duration": "0:00:13", + "end_time": "2018-05-12 12:25:19.188493+00:00", + "hyperdrive_id": "8410", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8410_481b4e19", + "metric": 3362.6096, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.922247917060456", + "created_time": "2018-05-12 12:55:58.393210+00:00", + "created_time_dt": "2018-05-12T12:55:58.393210", + "duration": "0:00:28", + "end_time": "2018-05-12 12:56:27.202928+00:00", + "hyperdrive_id": "8458", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8458_118e67f8", + "metric": 3363.527, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.924207993821612", + "created_time": "2018-05-12 13:07:59.698584+00:00", + "created_time_dt": "2018-05-12T13:07:59.698584", + "duration": "0:00:28", + "end_time": "2018-05-12 13:08:28.412662+00:00", + "hyperdrive_id": "8473", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8473_92f27331", + "metric": 3363.9203, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.9281605775003", + "created_time": "2018-05-12 12:33:55.035789+00:00", + "created_time_dt": "2018-05-12T12:33:55.035789", + "duration": "0:00:13", + "end_time": "2018-05-12 12:34:08.214661+00:00", + "hyperdrive_id": "8425", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8425_1500a0dd", + "metric": 3364.7148, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.928925041400716", + "created_time": "2018-05-12 12:03:37.850247+00:00", + "created_time_dt": "2018-05-12T12:03:37.850247", + "duration": "0:00:19", + "end_time": "2018-05-12 12:03:57.704131+00:00", + "hyperdrive_id": "8355", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8355_42f510c9", + "metric": 3364.8687, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.929577236031663", + "created_time": "2018-05-12 13:21:55.037697+00:00", + "created_time_dt": "2018-05-12T13:21:55.037697", + "duration": "0:00:48", + "end_time": "2018-05-12 13:22:43.185540+00:00", + "hyperdrive_id": "8490", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8490_5ea3cd3f", + "metric": 3365, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.112224359908429", + "created_time": "2018-05-12 11:56:42.923129+00:00", + "created_time_dt": "2018-05-12T11:56:42.923129", + "duration": "0:00:14", + "end_time": "2018-05-12 11:56:57.464771+00:00", + "hyperdrive_id": "8332", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8332_5219ec61", + "metric": 3365.1342, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.93757819924753", + "created_time": "2018-05-12 12:28:12.472109+00:00", + "created_time_dt": "2018-05-12T12:28:12.472109", + "duration": "0:00:13", + "end_time": "2018-05-12 12:28:25.911571+00:00", + "hyperdrive_id": "8416", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8416_95953de1", + "metric": 3366.6147, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.940420194928342", + "created_time": "2018-05-12 12:22:03.666129+00:00", + "created_time_dt": "2018-05-12T12:22:03.666129", + "duration": "0:00:12", + "end_time": "2018-05-12 12:22:16.335802+00:00", + "hyperdrive_id": "8404", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8404_d0736441", + "metric": 3367.1899, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.943042536771435", + "created_time": "2018-05-12 13:54:15.260969+00:00", + "created_time_dt": "2018-05-12T13:54:15.260969", + "duration": "0:00:11", + "end_time": "2018-05-12 13:54:26.918336+00:00", + "hyperdrive_id": "8523", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8523_76882d61", + "metric": 3367.7213, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.106668849133971", + "created_time": "2018-05-12 12:09:52.702880+00:00", + "created_time_dt": "2018-05-12T12:09:52.702880", + "duration": "0:00:25", + "end_time": "2018-05-12 12:10:18.241796+00:00", + "hyperdrive_id": "8374", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8374_9e11481e", + "metric": 3368.4832, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.954067716722258", + "created_time": "2018-05-12 12:30:24.308130+00:00", + "created_time_dt": "2018-05-12T12:30:24.308130", + "duration": "0:00:16", + "end_time": "2018-05-12 12:30:40.608925+00:00", + "hyperdrive_id": "8419", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8419_1b2ded51", + "metric": 3369.9633, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.104027755017491", + "created_time": "2018-05-12 12:20:11.870461+00:00", + "created_time_dt": "2018-05-12T12:20:11.870461", + "duration": "0:00:14", + "end_time": "2018-05-12 12:20:26.823381+00:00", + "hyperdrive_id": "8399", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8399_70048e50", + "metric": 3370.114, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.104023113674115", + "created_time": "2018-05-12 12:07:53.084399+00:00", + "created_time_dt": "2018-05-12T12:07:53.084399", + "duration": "0:00:13", + "end_time": "2018-05-12 12:08:06.237712+00:00", + "hyperdrive_id": "8369", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8369_993e0dd7", + "metric": 3370.1169, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.955790693896138", + "created_time": "2018-05-12 12:47:02.252823+00:00", + "created_time_dt": "2018-05-12T12:47:02.252823", + "duration": "0:00:30", + "end_time": "2018-05-12 12:47:32.329335+00:00", + "hyperdrive_id": "8445", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8445_c70f8879", + "metric": 3370.3148, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.957619945976621", + "created_time": "2018-05-12 12:14:10.784220+00:00", + "created_time_dt": "2018-05-12T12:14:10.784220", + "duration": "0:00:13", + "end_time": "2018-05-12 12:14:23.785111+00:00", + "hyperdrive_id": "8385", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8385_8b801e22", + "metric": 3370.6882, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.958613790873744", + "created_time": "2018-05-12 12:21:08.499878+00:00", + "created_time_dt": "2018-05-12T12:21:08.499878", + "duration": "0:00:12", + "end_time": "2018-05-12 12:21:21.109495+00:00", + "hyperdrive_id": "8401", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8401_b84ee061", + "metric": 3370.8913, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0991051795076507", + "created_time": "2018-05-12 13:15:04.980450+00:00", + "created_time_dt": "2018-05-12T13:15:04.980450", + "duration": "0:00:13", + "end_time": "2018-05-12 13:15:18.786874+00:00", + "hyperdrive_id": "8480", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8480_614cf569", + "metric": 3373.221, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.970852980638722", + "created_time": "2018-05-12 12:36:27.820576+00:00", + "created_time_dt": "2018-05-12T12:36:27.820576", + "duration": "0:00:29", + "end_time": "2018-05-12 12:36:57.322649+00:00", + "hyperdrive_id": "8430", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8430_b2673e0c", + "metric": 3373.3991, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.979767975474247", + "created_time": "2018-05-12 13:07:59.532196+00:00", + "created_time_dt": "2018-05-12T13:07:59.532196", + "duration": "0:01:02", + "end_time": "2018-05-12 13:09:02.075600+00:00", + "hyperdrive_id": "8474", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8474_fcd51b10", + "metric": 3375.2343, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.984154480758112", + "created_time": "2018-05-12 12:14:55.708218+00:00", + "created_time_dt": "2018-05-12T12:14:55.708218", + "duration": "0:00:13", + "end_time": "2018-05-12 12:15:09.126182+00:00", + "hyperdrive_id": "8388", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8388_c4f98fe4", + "metric": 3376.1398, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.987309642340916", + "created_time": "2018-05-12 12:02:10.550056+00:00", + "created_time_dt": "2018-05-12T12:02:10.550056", + "duration": "0:00:39", + "end_time": "2018-05-12 12:02:50.233034+00:00", + "hyperdrive_id": "8350", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8350_73634488", + "metric": 3376.7921, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0726051008474307", + "created_time": "2018-05-12 13:36:21.997998+00:00", + "created_time_dt": "2018-05-12T13:36:21.997998", + "duration": "0:00:17", + "end_time": "2018-05-12 13:36:39.458975+00:00", + "hyperdrive_id": "8506", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8506_97095faa", + "metric": 3391.4684, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0704611645652615", + "created_time": "2018-05-12 12:37:43.597084+00:00", + "created_time_dt": "2018-05-12T12:37:43.597084", + "duration": "0:00:13", + "end_time": "2018-05-12 12:37:56.959322+00:00", + "hyperdrive_id": "8431", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8431_c751fb54", + "metric": 3393.0553, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0473324391576943", + "created_time": "2018-05-12 13:40:01.568786+00:00", + "created_time_dt": "2018-05-12T13:40:01.568786", + "duration": "0:00:41", + "end_time": "2018-05-12 13:40:43.404157+00:00", + "hyperdrive_id": "8508", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8508_8b1d3c4f", + "metric": 3411.0568, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0423407988138264", + "created_time": "2018-05-12 12:58:56.644162+00:00", + "created_time_dt": "2018-05-12T12:58:56.644162", + "duration": "0:00:31", + "end_time": "2018-05-12 12:59:28.208493+00:00", + "hyperdrive_id": "8462", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8462_296fe088", + "metric": 3415.083, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0356284101972079", + "created_time": "2018-05-12 13:29:08.960435+00:00", + "created_time_dt": "2018-05-12T13:29:08.960435", + "duration": "0:00:25", + "end_time": "2018-05-12 13:29:34.834119+00:00", + "hyperdrive_id": "8496", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8496_37c0fdc0", + "metric": 3420.4809, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0348149292173761", + "created_time": "2018-05-12 12:47:02.035005+00:00", + "created_time_dt": "2018-05-12T12:47:02.035005", + "duration": "0:00:13", + "end_time": "2018-05-12 12:47:15.727381+00:00", + "hyperdrive_id": "8446", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8446_8bd39a10", + "metric": 3421.1286, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.00351793616431617", + "created_time": "2018-05-12 13:56:22.665906+00:00", + "created_time_dt": "2018-05-12T13:56:22.665906", + "duration": "0:00:44", + "end_time": "2018-05-12 13:57:07.550427+00:00", + "hyperdrive_id": "8525", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8525_e1268c9b", + "metric": 3432.3808, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0127022983869274", + "created_time": "2018-05-12 12:18:21.730539+00:00", + "created_time_dt": "2018-05-12T12:18:21.730539", + "duration": "0:00:13", + "end_time": "2018-05-12 12:18:35.552439+00:00", + "hyperdrive_id": "8396", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8396_836f4190", + "metric": 3435.4146, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0109599437801077", + "created_time": "2018-05-12 12:15:41.253563+00:00", + "created_time_dt": "2018-05-12T12:15:41.253563", + "duration": "0:00:14", + "end_time": "2018-05-12 12:15:55.261054+00:00", + "hyperdrive_id": "8389", + "id": "hyperdrive-sklearn-diabetes_1526126138942_1056_8389_3429880c", + "metric": 3435.7702, + "start_time": "None", + "status": "Completed" + } + ], + { + "categories": [ + "8327", + "8328", + "8329", + "8330", + "8331", + "8332", + "8333", + "8334", + "8335", + "8336", + "8337", + "8338", + "8339", + "8340", + "8341", + "8342", + "8343", + "8344", + "8345", + "8346", + "8347", + "8348", + "8349", + "8350", + "8351", + "8352", + "8353", + "8354", + "8355", + "8356", + "8357", + "8358", + "8359", + "8360", + "8361", + "8362", + "8363", + "8364", + "8365", + "8366", + "8367", + "8368", + "8369", + "8370", + "8371", + "8372", + "8373", + "8374", + "8375", + "8376", + "8377", + "8378", + "8379", + "8380", + "8381", + "8382", + "8383", + "8384", + "8385", + "8386", + "8387", + "8388", + "8389", + "8390", + "8391", + "8392", + "8393", + "8394", + "8395", + "8396", + "8397", + "8398", + "8399", + "8400", + "8401", + "8402", + "8403", + "8404", + "8405", + "8406", + "8407", + "8408", + "8409", + "8410", + "8411", + "8412", + "8413", + "8414", + "8415", + "8416", + "8417", + "8418", + "8419", + "8420", + "8421", + "8422", + "8423", + "8424", + "8425", + "8426", + "8427", + "8428", + "8429", + "8430", + "8431", + "8432", + "8433", + "8434", + "8435", + "8436", + "8437", + "8438", + "8439", + "8440", + "8441", + "8442", + "8443", + "8444", + "8445", + "8446", + "8447", + "8448", + "8449", + "8450", + "8451", + "8452", + "8453", + "8454", + "8455", + "8456", + "8457", + "8458", + "8459", + "8460", + "8461", + "8462", + "8463", + "8464", + "8465", + "8466", + "8467", + "8468", + "8469", + "8470", + "8471", + "8472", + "8473", + "8474", + "8475", + "8476", + "8477", + "8478", + "8479", + "8480", + "8481", + "8482", + "8483", + "8484", + "8485", + "8486", + "8487", + "8488", + "8489", + "8490", + "8491", + "8492", + "8493", + "8494", + "8495", + "8496", + "8497", + "8498", + "8499", + "8500", + "8501", + "8502", + "8503", + "8504", + "8505", + "8506", + "8507", + "8508", + "8509", + "8510", + "8511", + "8512", + "8513", + "8514", + "8515", + "8516", + "8517", + "8518", + "8519", + "8520", + "8521", + "8522", + "8523", + "8524", + "8525", + "8526" + ], + "metricName": "mse", + "series": [ + { + "mode": "markers", + "name": "mse", + "stepped": false, + "type": "scatter", + "uid": "836252", + "x": [ + "8327", + "8328", + "8329", + "8330", + "8331", + "8332", + "8333", + "8334", + "8335", + "8336", + "8337", + "8338", + "8339", + "8340", + "8341", + "8342", + "8343", + "8344", + "8345", + "8346", + "8347", + "8348", + "8349", + "8350", + "8351", + "8352", + "8353", + "8354", + "8355", + "8356", + "8357", + "8358", + "8359", + "8360", + "8361", + "8362", + "8363", + "8364", + "8365", + "8366", + "8367", + "8368", + "8369", + "8370", + "8371", + "8372", + "8373", + "8374", + "8375", + "8376", + "8377", + "8378", + "8379", + "8380", + "8381", + "8382", + "8383", + "8384", + "8385", + "8386", + "8387", + "8388", + "8389", + "8390", + "8391", + "8392", + "8393", + "8394", + "8395", + "8396", + "8397", + "8398", + "8399", + "8400", + "8401", + "8402", + "8403", + "8404", + "8405", + "8406", + "8407", + "8408", + "8409", + "8410", + "8411", + "8412", + "8413", + "8414", + "8415", + "8416", + "8417", + "8418", + "8419", + "8420", + "8421", + "8422", + "8423", + "8424", + "8425", + "8426", + "8427", + "8428", + "8429", + "8430", + "8431", + "8432", + "8433", + "8434", + "8435", + "8436", + "8437", + "8438", + "8439", + "8440", + "8441", + "8442", + "8443", + "8444", + "8445", + "8446", + "8447", + "8448", + "8449", + "8450", + "8451", + "8452", + "8453", + "8454", + "8455", + "8456", + "8457", + "8458", + "8459", + "8460", + "8461", + "8462", + "8463", + "8464", + "8465", + "8466", + "8467", + "8468", + "8469", + "8470", + "8471", + "8472", + "8473", + "8474", + "8475", + "8476", + "8477", + "8478", + "8479", + "8480", + "8481", + "8482", + "8483", + "8484", + "8485", + "8486", + "8487", + "8488", + "8489", + "8490", + "8491", + "8492", + "8493", + "8494", + "8495", + "8496", + "8497", + "8498", + "8499", + "8500", + "8501", + "8502", + "8503", + "8504", + "8505", + "8506", + "8507", + "8508", + "8509", + "8510", + "8511", + "8512", + "8513", + "8514", + "8515", + "8516", + "8517", + "8518", + "8519", + "8520", + "8521", + "8522", + "8523", + "8524", + "8525", + "8526" + ] + }, + { + "line": { + "shape": "hv" + }, + "mode": "lines", + "name": "mse_min", + "stepped": true, + "type": "scatter", + "uid": "aa1605", + "x": [ + "8327", + "8328", + "8329", + "8330", + "8331", + "8332", + "8333", + "8334", + "8335", + "8336", + "8337", + "8338", + "8339", + "8340", + "8341", + "8342", + "8343", + "8344", + "8345", + "8346", + "8347", + "8348", + "8349", + "8350", + "8351", + "8352", + "8353", + "8354", + "8355", + "8356", + "8357", + "8358", + "8359", + "8360", + "8361", + "8362", + "8363", + "8364", + "8365", + "8366", + "8367", + "8368", + "8369", + "8370", + "8371", + "8372", + "8373", + "8374", + "8375", + "8376", + "8377", + "8378", + "8379", + "8380", + "8381", + "8382", + "8383", + "8384", + "8385", + "8386", + "8387", + "8388", + "8389", + "8390", + "8391", + "8392", + "8393", + "8394", + "8395", + "8396", + "8397", + "8398", + "8399", + "8400", + "8401", + "8402", + "8403", + "8404", + "8405", + "8406", + "8407", + "8408", + "8409", + "8410", + "8411", + "8412", + "8413", + "8414", + "8415", + "8416", + "8417", + "8418", + "8419", + "8420", + "8421", + "8422", + "8423", + "8424", + "8425", + "8426", + "8427", + "8428", + "8429", + "8430", + "8431", + "8432", + "8433", + "8434", + "8435", + "8436", + "8437", + "8438", + "8439", + "8440", + "8441", + "8442", + "8443", + "8444", + "8445", + "8446", + "8447", + "8448", + "8449", + "8450", + "8451", + "8452", + "8453", + "8454", + "8455", + "8456", + "8457", + "8458", + "8459", + "8460", + "8461", + "8462", + "8463", + "8464", + "8465", + "8466", + "8467", + "8468", + "8469", + "8470", + "8471", + "8472", + "8473", + "8474", + "8475", + "8476", + "8477", + "8478", + "8479", + "8480", + "8481", + "8482", + "8483", + "8484", + "8485", + "8486", + "8487", + "8488", + "8489", + "8490", + "8491", + "8492", + "8493", + "8494", + "8495", + "8496", + "8497", + "8498", + "8499", + "8500", + "8501", + "8502", + "8503", + "8504", + "8505", + "8506", + "8507", + "8508", + "8509", + "8510", + "8511", + "8512", + "8513", + "8514", + "8515", + "8516", + "8517", + "8518", + "8519", + "8520", + "8521", + "8522", + "8523", + "8524", + "8525", + "8526" + ] + } + ], + "showLegend": false, + "title": "HyperDrive Run Primary Metric : mse" + } + ] + } + }, + "f5ccd42f25e8402bbcccf511b3c6e08f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_train_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowHyperDriveRunsView", + "layout": "IPY_MODEL_83e0767b6c3a41a2833d0f8fcf690c72", + "value": [ + { + "run_id": "hyperdrive-sklearn-diabetes_1526099364301", + "status": "Running", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-e2e-rg%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2euapws/projects/hyperdrive-sklearn-diabetes/run-history/run-details/hyperdrive-sklearn-diabetes_1526099364301?type=HyperDrive" + }, + [ + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.393891676993532", + "created_time": "2018-05-12 04:34:15.248693+00:00", + "created_time_dt": "2018-05-12T04:34:15.248693", + "duration": "0:00:19", + "end_time": "2018-05-12 04:34:34.352899+00:00", + "hyperdrive_id": "8324", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8324_3d18af85", + "metric": 3295.8309, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.375901006177515", + "created_time": "2018-05-12 04:33:51.986890+00:00", + "created_time_dt": "2018-05-12T04:33:51.986890", + "duration": "0:00:25", + "end_time": "2018-05-12 04:34:17.199718+00:00", + "hyperdrive_id": "8323", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8323_ef8a489b", + "metric": 3296.3202, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.358260413565936", + "created_time": "2018-05-12 04:29:36.537978+00:00", + "created_time_dt": "2018-05-12T04:29:36.537978", + "duration": "0:00:30", + "end_time": "2018-05-12 04:30:07.443012+00:00", + "hyperdrive_id": "8307", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8307_6bb93d21", + "metric": 3297.1463, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.505489715550404", + "created_time": "2018-05-12 04:33:25.471155+00:00", + "created_time_dt": "2018-05-12T04:33:25.471155", + "duration": "0:00:33", + "end_time": "2018-05-12 04:33:59.378295+00:00", + "hyperdrive_id": "8322", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8322_5151e960", + "metric": 3299.3001, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.513294267924078", + "created_time": "2018-05-12 04:31:23.890814+00:00", + "created_time_dt": "2018-05-12T04:31:23.890814", + "duration": "0:00:38", + "end_time": "2018-05-12 04:32:02.172094+00:00", + "hyperdrive_id": "8315", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8315_a8d0df53", + "metric": 3299.8883, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.320051010308542", + "created_time": "2018-05-12 04:32:09.433779+00:00", + "created_time_dt": "2018-05-12T04:32:09.433779", + "duration": "0:00:43", + "end_time": "2018-05-12 04:32:53.026804+00:00", + "hyperdrive_id": "8318", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8318_9c385d92", + "metric": 3300.2455, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.658960235847498", + "created_time": "2018-05-12 04:32:47.239418+00:00", + "created_time_dt": "2018-05-12T04:32:47.239418", + "duration": "0:00:55", + "end_time": "2018-05-12 04:33:43.185284+00:00", + "hyperdrive_id": "8320", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8320_2e273b36", + "metric": 3316.6481, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.222378138720275", + "created_time": "2018-05-12 04:31:50.981871+00:00", + "created_time_dt": "2018-05-12T04:31:50.981871", + "duration": "0:00:28", + "end_time": "2018-05-12 04:32:19.478985+00:00", + "hyperdrive_id": "8317", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8317_7551a3d3", + "metric": 3318.4718, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.196888642456712", + "created_time": "2018-05-12 04:31:05.402230+00:00", + "created_time_dt": "2018-05-12T04:31:05.402230", + "duration": "0:00:23", + "end_time": "2018-05-12 04:31:29.205264+00:00", + "hyperdrive_id": "8314", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8314_24c262a7", + "metric": 3326.3371, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.189096447548258", + "created_time": "2018-05-12 04:34:39.460591+00:00", + "created_time_dt": "2018-05-12T04:34:39.460591", + "duration": "0:00:29", + "end_time": "2018-05-12 04:35:08.900171+00:00", + "hyperdrive_id": "8326", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8326_9e14bae2", + "metric": 3329.0536, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.150066832506793", + "created_time": "2018-05-12 04:30:12.926261+00:00", + "created_time_dt": "2018-05-12T04:30:12.926261", + "duration": "0:00:42", + "end_time": "2018-05-12 04:30:55.921563+00:00", + "hyperdrive_id": "8311", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8311_eaaf9dd9", + "metric": 3345.1184, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.853877240746265", + "created_time": "2018-05-12 04:34:15.379264+00:00", + "created_time_dt": "2018-05-12T04:34:15.379264", + "duration": "0:00:36", + "end_time": "2018-05-12 04:34:51.753259+00:00", + "hyperdrive_id": "8325", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8325_76cf6d75", + "metric": 3350.1062, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.864495102907021", + "created_time": "2018-05-12 04:29:36.618937+00:00", + "created_time_dt": "2018-05-12T04:29:36.618937", + "duration": "0:00:47", + "end_time": "2018-05-12 04:30:23.776039+00:00", + "hyperdrive_id": "8309", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8309_950b505d", + "metric": 3352.1487, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.102729222141871", + "created_time": "2018-05-12 04:29:36.417036+00:00", + "created_time_dt": "2018-05-12T04:29:36.417036", + "duration": "0:00:13", + "end_time": "2018-05-12 04:29:50.008716+00:00", + "hyperdrive_id": "8308", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8308_9e19788b", + "metric": 3370.925, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.98813448350046", + "created_time": "2018-05-12 04:29:36.482277+00:00", + "created_time_dt": "2018-05-12T04:29:36.482277", + "duration": "0:01:02", + "end_time": "2018-05-12 04:30:39.202800+00:00", + "hyperdrive_id": "8310", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8310_b7a0d869", + "metric": 3376.9628, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.992686382474524", + "created_time": "2018-05-12 04:32:23.342063+00:00", + "created_time_dt": "2018-05-12T04:32:23.342063", + "duration": "0:00:45", + "end_time": "2018-05-12 04:33:08.616066+00:00", + "hyperdrive_id": "8319", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8319_7d385753", + "metric": 3377.9056, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0720166631869791", + "created_time": "2018-05-12 04:33:00.471785+00:00", + "created_time_dt": "2018-05-12T04:33:00.471785", + "duration": "0:00:24", + "end_time": "2018-05-12 04:33:25.342289+00:00", + "hyperdrive_id": "8321", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8321_93092a4f", + "metric": 3391.9024, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0399458028478119", + "created_time": "2018-05-12 04:31:37.490004+00:00", + "created_time_dt": "2018-05-12T04:31:37.490004", + "duration": "0:00:58", + "end_time": "2018-05-12 04:32:36.227261+00:00", + "hyperdrive_id": "8316", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8316_47adbb4d", + "metric": 3417.0157, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0371855776131041", + "created_time": "2018-05-12 04:30:40.022059+00:00", + "created_time_dt": "2018-05-12T04:30:40.022059", + "duration": "0:00:31", + "end_time": "2018-05-12 04:31:11.265877+00:00", + "hyperdrive_id": "8313", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8313_a2cf1a87", + "metric": 3419.2355, + "start_time": "None", + "status": "Completed" + }, + { + "arguments": "diabetes_sklearn.py --alpha 0.1 --alpha 0.0108521659414031", + "created_time": "2018-05-12 04:30:26.126697+00:00", + "created_time_dt": "2018-05-12T04:30:26.126697", + "duration": "0:01:19", + "end_time": "2018-05-12 04:31:45.385829+00:00", + "hyperdrive_id": "8312", + "id": "hyperdrive-sklearn-diabetes_1526099364301_1055_8312_333b9d43", + "metric": 3435.7832, + "start_time": "None", + "status": "Completed" + } + ], + { + "categories": [ + "8307", + "8308", + "8309", + "8310", + "8311", + "8312", + "8313", + "8314", + "8315", + "8316", + "8317", + "8318", + "8319", + "8320", + "8321", + "8322", + "8323", + "8324", + "8325", + "8326" + ], + "metricName": "mse", + "series": [ + { + "mode": "markers", + "name": "mse", + "stepped": false, + "type": "scatter", + "uid": "0d668b", + "x": [ + "8307", + "8308", + "8309", + "8310", + "8311", + "8312", + "8313", + "8314", + "8315", + "8316", + "8317", + "8318", + "8319", + "8320", + "8321", + "8322", + "8323", + "8324", + "8325", + "8326" + ], + "y": [ + 3297.146322567479, + 3370.9250492845417, + 3352.1487032874497, + 3376.962795304554, + 3345.1183624558485, + 3435.783246565139, + 3419.235515804575, + 3326.3371118238074, + 3299.888294102396, + 3417.015692446415, + 3318.471799408107, + 3300.2455334502383, + 3377.9056290478743, + 3316.6480785229305, + 3391.902383224928, + 3299.300119043289, + 3296.320211066935, + 3295.8308612858723, + 3350.1062329850233, + 3329.0535888350505 + ] + }, + { + "line": { + "shape": "hv" + }, + "mode": "lines", + "name": "mse_min", + "stepped": true, + "type": "scatter", + "uid": "686b96", + "x": [ + "8307", + "8308", + "8309", + "8310", + "8311", + "8312", + "8313", + "8314", + "8315", + "8316", + "8317", + "8318", + "8319", + "8320", + "8321", + "8322", + "8323", + "8324", + "8325", + "8326" + ], + "y": [ + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3297.146322567479, + 3296.320211066935, + 3295.8308612858723, + 3295.8308612858723, + 3295.8308612858723 + ] + } + ], + "showLegend": false, + "title": "HyperDrive Run Primary Metric : mse" + } + ] + } + } + }, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/07.hyperdrive-with-sklearn/diabetes_sklearn.py b/00.Getting Started/07.hyperdrive-with-sklearn/diabetes_sklearn.py new file mode 100644 index 000000000..3fb59770b --- /dev/null +++ b/00.Getting Started/07.hyperdrive-with-sklearn/diabetes_sklearn.py @@ -0,0 +1,52 @@ +from sklearn.datasets import load_diabetes +from sklearn.linear_model import Ridge +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from sklearn.externals import joblib + +import os +import argparse + +# Import Run from azureml.core, +from azureml.core.run import Run + +parser = argparse.ArgumentParser() +parser.add_argument('--alpha', type=float, dest='alpha', + default=0.5, help='regularization strength') +args = parser.parse_args() + +# Get handle of current run for logging and history purposes +run = Run.get_submitted_run() + +X, y = load_diabetes(return_X_y=True) + +columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'] + +x_train, x_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=0) +data = {"train": {"x": x_train, "y": y_train}, + "test": {"x": x_test, "y": y_test}} + +alpha = args.alpha +print('alpha value is:', alpha) + +reg = Ridge(alpha=alpha) +reg.fit(data["train"]["x"], data["train"]["y"]) + +print('Ridget model fitted.') + +preds = reg.predict(data["test"]["x"]) +mse = mean_squared_error(preds, data["test"]["y"]) + +# Log metrics +run.log("alpha", alpha) +run.log("mse", mse) + +os.makedirs('./outputs', exist_ok=True) +model_file_name = "model.pkl" + +# Save model as part of the run history +with open(model_file_name, "wb") as file: + joblib.dump(reg, 'outputs/' + model_file_name) + +print('Mean Squared Error is:', mse) diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/08.hyperdrive-with-TensorFlow.ipynb b/00.Getting Started/08.hyperdrive-with-TensorFlow/08.hyperdrive-with-TensorFlow.ipynb new file mode 100644 index 000000000..619643135 --- /dev/null +++ b/00.Getting Started/08.hyperdrive-with-TensorFlow/08.hyperdrive-with-TensorFlow.ipynb @@ -0,0 +1,3440 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training and Hyperparameter Tuning of a TensorFlow Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this tutorial, we demonstrate how to use the Azure ML Python SDK to train a Convolutional Neural Network (CNN) in TensorFlow to perform handwritten digit recognition on the popular MNIST dataset. We will demonstrate how to perform hyperparameter tuning of the model using AML's HyperDrive service. \n", + "\n", + "We will cover the following concepts:\n", + "* Create a Batch AI GPU cluster\n", + "* (To do): DataStore\n", + "* Train a TensorFlow model on a single node\n", + "* Logging metrics to Run History\n", + "* Set up a hyperparameter sweep with HyperDrive\n", + "* Select the best model for download" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't. In addition, to run through this notebook, you will need to install a few additional packages by running `pip install pillow tensorflow matplotlib pandas tqdm`\n", + "\n", + "### Authorize Hyperdrive Service Principal\n", + "\n", + "Hyperdrive service is in preview so you need to explicitly grant permissions. In Azure portal, add `vienna-test-westus` as a `Contributor` to your resource group. Or, you can also do this from azure-cli:\n", + "```sh\n", + "# find the ARM id of your resource group. Copy into memory.\n", + "$ az group show -n -o json\n", + "\n", + "# check if https://vienna-test-westus-cluster.sp.azureml.net is a Contributor.\n", + "$ az role assignment list --scope -o table\n", + "\n", + "# if not, add it. you will need to be a resource group owner to do this.\n", + "$ az role assignment create --role Contributor --scope --assignee https://vienna-test-westus-cluster.sp.azureml.net\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Set Up a Workspace\n", + "Workspace is the top-level Azure Resource for Azure ML services." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create An Experiment\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "experiment_name = 'hyperdrive-with-tf'\n", + "experiment = Experiment(workspace = ws, name = experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a folder to store the training script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "script_folder = './samples/hyperdrive-with-tf'\n", + "os.makedirs(script_folder, exist_ok = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Provision a New Batch AI Cluster\n", + "Training machine learning models is often a compute-intensive process. Azure's [Batch AI](#https://docs.microsoft.com/en-us/azure/batch-ai/overview) service allows data scientists to leverage the power of compute clusters of CPU or GPU-enabled VMs for training their models. Using the Python SDK, we can easily provision a Batch AI cluster with the specifications we want." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create mlc", + "batchai" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "# choose a name for your cluster\n", + "batchai_cluster_name = ws.name + \"gpu\"\n", + "\n", + "found = False\n", + "# see if this compute target already exists in the workspace\n", + "for ct in ws.compute_targets():\n", + " print(ct.name, ct.type)\n", + " if ct.name == batchai_cluster_name and type(ct) is BatchAiCompute:\n", + " found = True\n", + " print('found compute target. just use it.')\n", + " compute_target = ct\n", + " break\n", + " \n", + "if not found:\n", + " print('creating a new compute target...')\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\", # NC6 is GPU-enabled\n", + " #vm_priority = 'lowpriority', # optional\n", + " autoscale_enabled = True,\n", + " cluster_min_nodes = 1, \n", + " cluster_max_nodes = 4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n", + " \n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + " \n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + " print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we specify the following parameters for the `provisioning_config`:\n", + "* `vm_size`: the family and size of the VM to use. For this tutorial we want to leverage GPU nodes, so we specify the `STANDARD_NC6` VM, which has one NVIDIA K80 GPU\n", + "* `vm_priority`: `'lowpriority'` or `'dedicated'`\n", + "* `autoscale_enabled`: with autoscaling set to `True`, Batch AI will automatically resize the cluster based on the demands of your workload. Default is `False`, will create a cluster with a fixed # of nodes\n", + "* `cluster_min_nodes`: minimum number of VMs for autoscaling\n", + "* `cluster_max_nodes`: maximum number of VMs for autoscaling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Train TensorFlow MNIST\n", + "Now let's train a CNN on the MNIST dataset for predicting handwritten digits. The training script `tf_mnist_train.py` is adapted from TensorFlow's [MNIST](#https://www.tensorflow.org/versions/r1.4/get_started/mnist/pros) tutorial. The changes to the original on concerned logging some metrics about the training run to the AML run history. See the adapted file here: [tf_mnist_train.py](tf_mnist_train.py) -- search for 'run_logger' to find the added lines of code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from shutil import copyfile\n", + "\n", + "training_script = 'tf_mnist_train.py'\n", + "# copy the mnist_tf.py file to the project folder\n", + "copyfile(training_script, os.path.join(script_folder, training_script))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# take a look at the training script\n", + "!more $training_script" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### a. Run a single-node TensorFlow experiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To facilitate ML training, the Python SDK provides a high-level abstraction called Estimators that allows users to train CNTK, TensorFlow, or custom scripts in the Azure ML ecosystem. Let's instantiate an AML TensorFlow Estimator (not to be conflated with the [`tf.estimator.Estimator`](#https://www.tensorflow.org/programmers_guide/estimators) class)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run", + "tensorflow" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.dnn import TensorFlow\n", + "\n", + "script_params = {\n", + " '--minibatch_size': 64,\n", + " '--learning_rate': 0.001,\n", + " '--keep_probability': 0.5,\n", + " '--output_dir': 'outputs',\n", + " '--num_iterations': 1000\n", + "}\n", + "\n", + "tf_estimator = TensorFlow(source_directory = script_folder, \n", + " script_params = script_params, \n", + " compute_target = compute_target, \n", + " entry_script = training_script, \n", + " node_count = 1,\n", + " use_gpu = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We specify the following parameters to the TensorFlow constructor:\n", + "* `script_params`: a dictionary specifying the command-line arguments to your `entry_script`\n", + "* `compute_target`: the compute target object. Can be a local, DSVM, or Batch AI compute target\n", + "* `entry_script`: the relative(?) path to the project directory of the file to be executed during training\n", + "* `node_count`: the number of nodes to use for the training job. Defaults to `1`\n", + "* `use_gpu`: to leverage the GPU for training, set this flag to `True`. Defaults to `False`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Note on `outputs` folder: **\n", + "\n", + "When running an experiment using the Python SDK, you can write files out to a folder named `outputs` that is relative to the root directory. This folder is specially tracked by AML in the sense that any files written to that folder during script execution will be picked up by Run History; these files (known as *artifacts*) will be available as part of the run history record." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "batchai", + "tensorflow" + ] + }, + "outputs": [], + "source": [ + "run = experiment.submit(tf_estimator)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### b. Monitoring the training run\n", + "There are several ways with which the user can monitor the details and status of the training run. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Browse to the run history report (use Chrome please, for now)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print out the current run status" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also use a widget to monitor the progress of your submitted run, which allows you to do so without blocking your notebook execution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "use notebook widget" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![img](../images/hd_tf1.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "batchai", + "tensorflow" + ] + }, + "outputs": [], + "source": [ + "# to block and wait for training to complete \n", + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also check on the Batch AI cluster and job status using `az-cli` commands:\n", + "```shell\n", + "# check cluster status. You can see how many nodes are running.\n", + "$ az batchai cluster list\n", + "\n", + "# check job status. You can see how many jobs are running\n", + "$ az batchai job list\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### c. Log metrics to Run History\n", + "\n", + "Another useful feature of the Python SDK is the ability to log metrics for each run. These metrics are persisted in the run history by AML. In addition, they are automatically displayed and visualized by the RunDetails widget. (Logging run metrics is also required in order to use the HyperDrive service, which we will go over in more detail in section 4.)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The below code snippet from `tf_mnist_train.py` shows how we can we log the script parameters for a training run, by specifying a key for the metric and the corresponding value:\n", + "```python\n", + "from azureml.core.run import Run\n", + "\n", + "run_logger = Run.get_submitted_run()\n", + "run_logger.log(\"learning_rate\", args.learning_rate)\n", + "run_logger.log(\"minibatch_size\", args.minibatch_size)\n", + "run_logger.log(\"keep_probability\", args.keep_probability)\n", + "run_logger.log(\"num_iterations\", args.num_iterations)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "get metrics" + ] + }, + "outputs": [], + "source": [ + "run.get_metrics()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Hyperparameter Tuning with HyperDrive\n", + "\n", + "Now that we've seen how to do a simple TensorFlow training run using the Python SDK, let's see if we can further improve the accuracy of our model.\n", + "\n", + "Hyperparameter tuning is a key part of machine learning experimentation, in which the data scientist tries different configurations of hyperparameters in order to find a set of values that optimizes a specific target metric, such as the accuracy of the model. To this end, Azure ML provides the ** HyperDrive service ** to faciliate the hyperparameter tuning process. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### a. Start a HyperDrive run\n", + "\n", + "Using HyperDrive, we specify the hyperparameter space to sweep over, the primary metric to optimize, and an early termination policy. HyperDrive will kick off multiple children runs with different hyperparameter configurations, and terminate underperforming runs according to the early termination policy provided." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run", + "tensorflow" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.hyperdrive import *\n", + "\n", + "param_sampling = RandomParameterSampling( {\n", + " \"learning_rate\": loguniform(-10, -3),\n", + " \"keep_probability\": uniform(0.5, 0.1)\n", + " }\n", + ")\n", + "\n", + "early_termination_policy = BanditPolicy(slack_factor = 0.15, evaluation_interval=2)\n", + "\n", + "hyperdrive_run_config = HyperDriveRunConfig(estimator = tf_estimator, \n", + " hyperparameter_sampling = param_sampling, \n", + " policy = early_termination_policy,\n", + " primary_metric_name = \"Accuracy\",\n", + " primary_metric_goal = PrimaryMetricGoal.MAXIMIZE,\n", + " max_total_runs = 20,\n", + " max_concurrent_runs = 4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the above cell, we first define a sampling space for the hyperparameters we want to sweep over, specifically the `learning_rate` and `keep_probability`. In this case we are using `RandomParameterSampling`, which allows us to specify the parameter values as either a choice among discrete values or as a distribution over a continuous range (here, we are using a uniform distribution for the `keep_probability`). You can run `help(RandomParameterSampling)` for more API details on this class.\n", + "\n", + "Then, we specify the early termination policy to use. If not specified, the policy defaults (?) to `None`, in which case all training runs are run to completion. Here we use the `BanditPolicy`, which will terminate any run that doesn't fall within the slack factor of our primary evaluation metric. Run `help(BanditPolicy)` for more details on this policy.\n", + "\n", + "To do: explain `evaluation_interval` within context of our training script.\n", + "\n", + "We specify the following parameters to the `HyperDriveRunConfig` constructor:\n", + "* explain input_paths?\n", + "* `estimator`: the estimator that will be called with the sampled hyperparameters\n", + "* `hyperparameter_sampling`: the sampling space to use\n", + "* `policy`: the early termination policy\n", + "* `primary_metric_name`: the name of the metric logged to the AML Run that HyperDrive will use to evaluate runs. Here, we are using the test accuracy (logged as 'Accuracy' in our training script)\n", + "* `primary_metric_goal`: the optimization goal of the primary metric (either `PrimaryMetricGoal.MAXIMIZE` or `PrimaryMetricGoal.MINIMIZE`)\n", + "* `max_total_runs`: the maximum number of runs HyperDrive will kick off\n", + "* `max_concurrent_runs`: the maximum number of runs to run concurrently\n", + "* `compute_target`: the compute target. In our case, the Batch AI cluster we provisioned" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "** Note on logging metrics for HyperDrive: ** \n", + "\n", + "In order to use HyperDrive, we will need to log the metric we want the service to use for evaluating run performance (`primary_metric_name`). In our script, we will use the accuracy of the model evaluated on the MNIST test dataset as our primary metric. For every 100 training iterations, we calculate and log this test accuracy (`'Accuracy'`). We also log an additional utility metric, `'Iterations'`, to inform us of the number of iterations the model was trained on that corresponds to each Accuracy metric logged (see `tf_mnist.py` for more details). This is useful for seeing how many iterations were trained for jobs that were terminated early.\n", + "\n", + "```python\n", + "run_logger.log(\"Accuracy\", float(test_acc))\n", + "run_logger.log(\"Iterations\", i)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hyperdrive run", + "batchai", + "tensorflow" + ] + }, + "outputs": [], + "source": [ + "# start the HyperDrive run\n", + "hyperdrive_run = experiment.submit(hyperdrive_run_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### b. Use a widget to visualize details of the HyperDrive runs\n", + "\n", + "Runs will automatically start to show in the following widget once rendered." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "use notebook widget" + ] + }, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "\n", + "RunDetails(hyperdrive_run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![img](../images/hd_tf2.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# these 2 cells to be replaced with Python calls once DCR 246363 has been completed\n", + "# check cluster status, pay attention to the # of running nodes\n", + "!az batchai cluster list -o table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# check the Batch AI job queue. Notice the Job name is the run history ID. \n", + "# Pay attention to the state of the job.\n", + "!az batchai job list -o table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### c. Find the best run\n", + "\n", + "Once all of the HyperDrive runs have completed, we can find the run that achieved the highest accuracy and its corresponding hyperparameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history", + "get metrics" + ] + }, + "outputs": [], + "source": [ + "table = helpers.ListTable()\n", + "from tqdm import tqdm\n", + "\n", + "run_metrics = {}\n", + "table.append(['Accuracy', 'Run', 'Iterations', 'learning_rate', 'keep_probability'])\n", + "for run in tqdm(hyperdrive_run.get_children()):\n", + " metrics = run.get_metrics()\n", + " if 'Accuracy' in metrics.keys():\n", + " metrics['Accuracy'] = metrics['Accuracy'][-1] # final test accuracy\n", + " metrics['Iterations'] = max(metrics['Iterations']) # number of iterations the run ran for\n", + " \n", + " table.append([metrics['Accuracy'], \n", + " run.id, \n", + " metrics['Iterations'], \n", + " metrics['learning_rate'], \n", + " metrics['keep_probability']])\n", + " run_metrics[run.id] = metrics\n", + "table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.run import Run\n", + "\n", + "best_run_id = max(run_metrics, key = lambda k: run_metrics[k]['Accuracy'])\n", + "best_run_metrics = run_metrics[best_run_id]\n", + "experiment = Experiment(ws, experiment_name)\n", + "best_run = Run(experiment, best_run_id)\n", + "\n", + "print('Best Run is:\\n Accuracy: {0:.6f} \\n Learning rate: {1:.6f} \\n Keep probability: {2}'.format(\n", + " best_run_metrics['Accuracy'],\n", + " best_run_metrics['learning_rate'],\n", + " best_run_metrics['keep_probability']\n", + " ))\n", + "\n", + "print(helpers.get_run_history_url(best_run))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plot the runs [Optional] \n", + "Note you will need to install `matplotlib` for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plot_data = np.array([[run_metrics[i]['keep_probability'], \n", + " run_metrics[i]['learning_rate'], \n", + " run_metrics[i]['Accuracy']] for i in run_metrics.keys()])\n", + "area = np.array([[run_metrics[i]['Iterations']/5] for i in run_metrics.keys()])\n", + "\n", + "plt.figure(figsize = (15,5))\n", + "plt.scatter(plot_data[:,0], plot_data[:,1], s = area, c = plot_data[:,2], alpha = 0.4)\n", + "plt.xlabel(\"keep_probability\")\n", + "plt.ylabel(\"learning_rate\")\n", + "plt.yscale('log')\n", + "plt.ylim(0.00001,0.06)\n", + "plt.colorbar()\n", + "plt.clim(0.95, max(plot_data[:,2]))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### d. Download model from the best run\n", + "Once we've identified the best run from HyperDrive, we can download the model files to our local machine. \n", + "\n", + "The final trained model checkpoint files are located in the `outputs` directory picked up by AML. We can run the below line of code to confirm that those files are present:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "best_run.get_file_names()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can download the relevant checkpoint files. Note there is currently a bug on uploading files when executing on Batch AI cluster so the below code doesn't work yet." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "download file" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "output_dir = 'outputs'\n", + "target_dir = os.path.join('sample_projects', 'outputs')\n", + "model_files_to_download = ['checkpoint', 'model.ckpt.data-00000-of-00001', 'model.ckpt.index', 'model.ckpt.meta']\n", + "for file in model_files_to_download:\n", + " model_src_path = os.path.join(output_dir, file)\n", + " model_dest_path = os.path.join(target_dir, file)\n", + " print('downloading ' + file)\n", + " best_run.download_file(name = model_src_path, output_file_path = model_dest_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### e. Test the model locally\n", + "Now that we have downloaded the best-performing model, we can use it locally to score images of hand-written digits. For this we have prepared a scoring file [tf_mnist_score.py](tf_mnist_score.py) which we import below. tf_mnist_score.py provides a function `run(input_data)` which accepts a base64-encoded image in a JSON dict format (this format is friendly for the deployment of a webservice, which we will do later). \n", + "\n", + "Note that this scoring code requires tensorflow and PIL (`pip install tensorflow pillow`).\n", + "\n", + "First, we will create a base64-encoded image in a json structure based on one of the test images provided in the folder `mnist_test_images`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os, json, base64\n", + "from PIL import Image \n", + "import tf_mnist_score\n", + "from io import BytesIO\n", + "\n", + "def imgToBase64(img):\n", + " imgio = BytesIO()\n", + " img.save(imgio, 'JPEG')\n", + " img_str = base64.b64encode(imgio.getvalue())\n", + " return img_str.decode('utf-8')\n", + "\n", + "# Generate JSON Base64-encoded image from sample test input\n", + "test_img_path = os.path.join('mnist_test_images', 'img_3.jpg')\n", + "base64Img = imgToBase64(Image.open(test_img_path))\n", + "data = json.dumps({'data': base64Img})\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we will call `tf_mnist_score.run()` with the json data structure we created above. And we draw the image that we are scoring, so we can compare the label returned by the image with the acutual handwritten digit." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image as IPImage\n", + "tf_mnist_score.init()\n", + "result = tf_mnist_score.run(data)\n", + "print(result)\n", + "IPImage(filename=test_img_path, width=200)" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "208cc3b53e2c45fea1440188a863efb8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_contrib_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowHyperDriveRunsView", + "layout": "IPY_MODEL_8a36279a14624bbdb1926c2572748861", + "value": { + "child_runs": [ + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000852395056049516 --keep_probability 0.434530370965995", + "created_time": "2018-06-01 19:41:39.666220+00:00", + "created_time_dt": "2018-06-01T19:41:39.666220", + "duration": "0:00:57", + "end_time": "2018-06-01 19:42:37.511351+00:00", + "hyperdrive_id": "5488", + "metric": 0.9842000007629395, + "paras_keep_probability": "0.434530370965995", + "paras_learning_rate": "0.000852395056049516", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5488_abbecb6c", + "run_number": 16, + "start_time": "2018-06-01 19:41:40.368621+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00179999057463703 --keep_probability 0.296515321882523", + "created_time": "2018-06-01 19:29:46.303636+00:00", + "created_time_dt": "2018-06-01T19:29:46.303636", + "duration": "0:02:03", + "end_time": "2018-06-01 19:31:50.043486+00:00", + "hyperdrive_id": "5469", + "metric": 0.9836999773979187, + "paras_keep_probability": "0.296515321882523", + "paras_learning_rate": "0.00179999057463703", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5469_9f034e69", + "run_number": 10, + "start_time": "2018-06-01 19:29:47.033264+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000676904386677712 --keep_probability 0.4154535083569", + "created_time": "2018-06-01 19:50:31.651044+00:00", + "created_time_dt": "2018-06-01T19:50:31.651044", + "duration": "0:14:30", + "end_time": "2018-06-01 20:05:02.591649+00:00", + "hyperdrive_id": "5498", + "metric": 0.9828000068664551, + "paras_keep_probability": "0.4154535083569", + "paras_learning_rate": "0.000676904386677712", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5498_32e0a249", + "run_number": 20, + "start_time": "2018-06-01 19:50:37.386350+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000586938713321222 --keep_probability 0.432942295536284", + "created_time": "2018-06-01 19:37:36.678691+00:00", + "created_time_dt": "2018-06-01T19:37:36.678691", + "duration": "0:01:46", + "end_time": "2018-06-01 19:39:23.211000+00:00", + "hyperdrive_id": "5479", + "metric": 0.9818000197410583, + "paras_keep_probability": "0.432942295536284", + "paras_learning_rate": "0.000586938713321222", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5479_cb5037ed", + "run_number": 11, + "start_time": "2018-06-01 19:37:43.143211+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321696353537414 --keep_probability 0.446837800410634", + "created_time": "2018-06-01 19:41:39.915872+00:00", + "created_time_dt": "2018-06-01T19:41:39.915872", + "duration": "0:02:58", + "end_time": "2018-06-01 19:44:38.693923+00:00", + "hyperdrive_id": "5490", + "metric": 0.9812999963760376, + "paras_keep_probability": "0.446837800410634", + "paras_learning_rate": "0.000321696353537414", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5490_cfcbcea1", + "run_number": 17, + "start_time": "2018-06-01 19:41:40.688804+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000598930751146987 --keep_probability 0.173175740602207", + "created_time": "2018-06-01 19:41:44.682554+00:00", + "created_time_dt": "2018-06-01T19:41:44.682554", + "duration": "0:01:54", + "end_time": "2018-06-01 19:43:38.690104+00:00", + "hyperdrive_id": "5491", + "metric": 0.9785000085830688, + "paras_keep_probability": "0.173175740602207", + "paras_learning_rate": "0.000598930751146987", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5491_1ab60563", + "run_number": 18, + "start_time": "2018-06-01 19:41:45.356160+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00313856224079023 --keep_probability 0.308708329651949", + "created_time": "2018-06-01 19:29:46.140940+00:00", + "created_time_dt": "2018-06-01T19:29:46.140940", + "duration": "0:02:02", + "end_time": "2018-06-01 19:31:49.127224+00:00", + "hyperdrive_id": "5471", + "metric": 0.9764000177383423, + "paras_keep_probability": "0.308708329651949", + "paras_learning_rate": "0.00313856224079023", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5471_05cdc17b", + "run_number": 8, + "start_time": "2018-06-01 19:29:46.876362+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321079619657115 --keep_probability 0.166071686996525", + "created_time": "2018-06-01 19:26:11.468523+00:00", + "created_time_dt": "2018-06-01T19:26:11.468523", + "duration": "0:01:48", + "end_time": "2018-06-01 19:28:00.170666+00:00", + "hyperdrive_id": "5460", + "metric": 0.9703999757766724, + "paras_keep_probability": "0.166071686996525", + "paras_learning_rate": "0.000321079619657115", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5460_0ff67ff3", + "run_number": 6, + "start_time": "2018-06-01 19:26:12.172473+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00970484525511844 --keep_probability 0.334371206847485", + "created_time": "2018-06-01 19:25:53.815492+00:00", + "created_time_dt": "2018-06-01T19:25:53.815492", + "duration": "0:02:07", + "end_time": "2018-06-01 19:28:01.507944+00:00", + "hyperdrive_id": "5457", + "metric": 0.968500018119812, + "paras_keep_probability": "0.334371206847485", + "paras_learning_rate": "0.00970484525511844", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5457_a4c3a147", + "run_number": 4, + "start_time": "2018-06-01 19:26:08.553859+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00922621594789716 --keep_probability 0.227683838955561", + "created_time": "2018-06-01 19:37:36.835723+00:00", + "created_time_dt": "2018-06-01T19:37:36.835723", + "duration": "0:01:03", + "end_time": "2018-06-01 19:38:40.652773+00:00", + "hyperdrive_id": "5480", + "metric": 0.9663000106811523, + "paras_keep_probability": "0.227683838955561", + "paras_learning_rate": "0.00922621594789716", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5480_f234fb08", + "run_number": 14, + "start_time": "2018-06-01 19:37:38.116439+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0155645426732787 --keep_probability 0.159123698168668", + "created_time": "2018-06-01 19:29:46.277531+00:00", + "created_time_dt": "2018-06-01T19:29:46.277531", + "duration": "0:01:09", + "end_time": "2018-06-01 19:30:55.727701+00:00", + "hyperdrive_id": "5472", + "metric": 0.964900016784668, + "paras_keep_probability": "0.159123698168668", + "paras_learning_rate": "0.0155645426732787", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5472_23122c4b", + "run_number": 9, + "start_time": "2018-06-01 19:29:46.964148+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00838536088211458 --keep_probability 0.102478957268164", + "created_time": "2018-06-01 19:25:53.548553+00:00", + "created_time_dt": "2018-06-01T19:25:53.548553", + "duration": "0:01:05", + "end_time": "2018-06-01 19:26:59.136632+00:00", + "hyperdrive_id": "5459", + "metric": 0.9646999835968018, + "paras_keep_probability": "0.102478957268164", + "paras_learning_rate": "0.00838536088211458", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5459_030491ad", + "run_number": 3, + "start_time": "2018-06-01 19:25:54.739654+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000143958552086584 --keep_probability 0.273084377226789", + "created_time": "2018-06-01 19:29:46.057879+00:00", + "created_time_dt": "2018-06-01T19:29:46.057879", + "duration": "0:01:18", + "end_time": "2018-06-01 19:31:04.843202+00:00", + "hyperdrive_id": "5470", + "metric": 0.963699996471405, + "paras_keep_probability": "0.273084377226789", + "paras_learning_rate": "0.000143958552086584", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5470_a648dbea", + "run_number": 7, + "start_time": "2018-06-01 19:29:46.780201+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.14051833348127E-05 --keep_probability 0.472685817381368", + "created_time": "2018-06-01 19:41:39.648602+00:00", + "created_time_dt": "2018-06-01T19:41:39.648602", + "duration": "0:03:06", + "end_time": "2018-06-01 19:44:45.699811+00:00", + "hyperdrive_id": "5489", + "metric": 0.9613000154495239, + "paras_keep_probability": "0.472685817381368", + "paras_learning_rate": "7.14051833348127E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5489_38907948", + "run_number": 15, + "start_time": "2018-06-01 19:41:40.512369+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00737747352627753 --keep_probability 0.205239625544216", + "created_time": "2018-06-01 19:50:33.596963+00:00", + "created_time_dt": "2018-06-01T19:50:33.596963", + "duration": "0:01:51", + "end_time": "2018-06-01 19:52:25.281499+00:00", + "hyperdrive_id": "5497", + "metric": 0.9581999778747559, + "paras_keep_probability": "0.205239625544216", + "paras_learning_rate": "0.00737747352627753", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5497_8130025b", + "run_number": 22, + "start_time": "2018-06-01 19:50:34.456850+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0211316024512922 --keep_probability 0.456008246140918", + "created_time": "2018-06-01 19:50:31.581841+00:00", + "created_time_dt": "2018-06-01T19:50:31.581841", + "duration": "0:01:03", + "end_time": "2018-06-01 19:51:35.272415+00:00", + "hyperdrive_id": "5499", + "metric": 0.9580000042915344, + "paras_keep_probability": "0.456008246140918", + "paras_learning_rate": "0.0211316024512922", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5499_e0b5a73f", + "run_number": 19, + "start_time": "2018-06-01 19:50:32.786951+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.56451710371043E-05 --keep_probability 0.321364540919092", + "created_time": "2018-06-01 19:50:33.421674+00:00", + "created_time_dt": "2018-06-01T19:50:33.421674", + "duration": "0:06:27", + "end_time": "2018-06-01 19:57:00.982688+00:00", + "hyperdrive_id": "5496", + "metric": 0.9520000219345093, + "paras_keep_probability": "0.321364540919092", + "paras_learning_rate": "7.56451710371043E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5496_46a98c1f", + "run_number": 21, + "start_time": "2018-06-01 19:50:34.379782+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 6.86923046964849E-05 --keep_probability 0.229123758955098", + "created_time": "2018-06-01 19:37:36.816510+00:00", + "created_time_dt": "2018-06-01T19:37:36.816510", + "duration": "0:01:12", + "end_time": "2018-06-01 19:38:49.439465+00:00", + "hyperdrive_id": "5477", + "metric": 0.9483000040054321, + "paras_keep_probability": "0.229123758955098", + "paras_learning_rate": "6.86923046964849E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5477_c428bcf0", + "run_number": 13, + "start_time": "2018-06-01 19:37:42.971387+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.014609502490554 --keep_probability 0.480459935106515", + "created_time": "2018-06-01 19:26:10.258955+00:00", + "created_time_dt": "2018-06-01T19:26:10.258955", + "duration": "0:02:41", + "end_time": "2018-06-01 19:28:52.069673+00:00", + "hyperdrive_id": "5458", + "metric": 0.12110000103712082, + "paras_keep_probability": "0.480459935106515", + "paras_learning_rate": "0.014609502490554", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5458_3f73f0ac", + "run_number": 5, + "start_time": "2018-06-01 19:26:17.107379+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0149932664638274 --keep_probability 0.284424630578217", + "created_time": "2018-06-01 19:37:36.730460+00:00", + "created_time_dt": "2018-06-01T19:37:36.730460", + "duration": "0:01:08", + "end_time": "2018-06-01 19:38:44.881339+00:00", + "hyperdrive_id": "5478", + "metric": 0.11349999904632568, + "paras_keep_probability": "0.284424630578217", + "paras_learning_rate": "0.0149932664638274", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5478_24390740", + "run_number": 12, + "start_time": "2018-06-01 19:37:42.865594+00:00", + "status": "Completed" + } + ], + "children_metrics": { + "allArguments": [ + "keep_probability", + "learning_rate", + "minibatch_size", + "output_dir", + "num_iterations", + "Accuracy" + ], + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "jobHistData": [ + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000852395056049516 --keep_probability 0.434530370965995", + "created_time": "2018-06-01 19:41:39.666220+00:00", + "created_time_dt": "2018-06-01T19:41:39.666220", + "duration": "0:00:57", + "end_time": "2018-06-01 19:42:37.511351+00:00", + "hyperdrive_id": "5488", + "metric": 0.9842000007629395, + "paras_keep_probability": "0.434530370965995", + "paras_learning_rate": "0.000852395056049516", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5488_abbecb6c", + "run_number": 16, + "start_time": "2018-06-01 19:41:40.368621+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00179999057463703 --keep_probability 0.296515321882523", + "created_time": "2018-06-01 19:29:46.303636+00:00", + "created_time_dt": "2018-06-01T19:29:46.303636", + "duration": "0:02:03", + "end_time": "2018-06-01 19:31:50.043486+00:00", + "hyperdrive_id": "5469", + "metric": 0.9836999773979187, + "paras_keep_probability": "0.296515321882523", + "paras_learning_rate": "0.00179999057463703", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5469_9f034e69", + "run_number": 10, + "start_time": "2018-06-01 19:29:47.033264+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000676904386677712 --keep_probability 0.4154535083569", + "created_time": "2018-06-01 19:50:31.651044+00:00", + "created_time_dt": "2018-06-01T19:50:31.651044", + "duration": "0:14:30", + "end_time": "2018-06-01 20:05:02.591649+00:00", + "hyperdrive_id": "5498", + "metric": 0.9828000068664551, + "paras_keep_probability": "0.4154535083569", + "paras_learning_rate": "0.000676904386677712", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5498_32e0a249", + "run_number": 20, + "start_time": "2018-06-01 19:50:37.386350+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000586938713321222 --keep_probability 0.432942295536284", + "created_time": "2018-06-01 19:37:36.678691+00:00", + "created_time_dt": "2018-06-01T19:37:36.678691", + "duration": "0:01:46", + "end_time": "2018-06-01 19:39:23.211000+00:00", + "hyperdrive_id": "5479", + "metric": 0.9818000197410583, + "paras_keep_probability": "0.432942295536284", + "paras_learning_rate": "0.000586938713321222", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5479_cb5037ed", + "run_number": 11, + "start_time": "2018-06-01 19:37:43.143211+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321696353537414 --keep_probability 0.446837800410634", + "created_time": "2018-06-01 19:41:39.915872+00:00", + "created_time_dt": "2018-06-01T19:41:39.915872", + "duration": "0:02:58", + "end_time": "2018-06-01 19:44:38.693923+00:00", + "hyperdrive_id": "5490", + "metric": 0.9812999963760376, + "paras_keep_probability": "0.446837800410634", + "paras_learning_rate": "0.000321696353537414", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5490_cfcbcea1", + "run_number": 17, + "start_time": "2018-06-01 19:41:40.688804+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000598930751146987 --keep_probability 0.173175740602207", + "created_time": "2018-06-01 19:41:44.682554+00:00", + "created_time_dt": "2018-06-01T19:41:44.682554", + "duration": "0:01:54", + "end_time": "2018-06-01 19:43:38.690104+00:00", + "hyperdrive_id": "5491", + "metric": 0.9785000085830688, + "paras_keep_probability": "0.173175740602207", + "paras_learning_rate": "0.000598930751146987", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5491_1ab60563", + "run_number": 18, + "start_time": "2018-06-01 19:41:45.356160+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00313856224079023 --keep_probability 0.308708329651949", + "created_time": "2018-06-01 19:29:46.140940+00:00", + "created_time_dt": "2018-06-01T19:29:46.140940", + "duration": "0:02:02", + "end_time": "2018-06-01 19:31:49.127224+00:00", + "hyperdrive_id": "5471", + "metric": 0.9764000177383423, + "paras_keep_probability": "0.308708329651949", + "paras_learning_rate": "0.00313856224079023", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5471_05cdc17b", + "run_number": 8, + "start_time": "2018-06-01 19:29:46.876362+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321079619657115 --keep_probability 0.166071686996525", + "created_time": "2018-06-01 19:26:11.468523+00:00", + "created_time_dt": "2018-06-01T19:26:11.468523", + "duration": "0:01:48", + "end_time": "2018-06-01 19:28:00.170666+00:00", + "hyperdrive_id": "5460", + "metric": 0.9703999757766724, + "paras_keep_probability": "0.166071686996525", + "paras_learning_rate": "0.000321079619657115", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5460_0ff67ff3", + "run_number": 6, + "start_time": "2018-06-01 19:26:12.172473+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00970484525511844 --keep_probability 0.334371206847485", + "created_time": "2018-06-01 19:25:53.815492+00:00", + "created_time_dt": "2018-06-01T19:25:53.815492", + "duration": "0:02:07", + "end_time": "2018-06-01 19:28:01.507944+00:00", + "hyperdrive_id": "5457", + "metric": 0.968500018119812, + "paras_keep_probability": "0.334371206847485", + "paras_learning_rate": "0.00970484525511844", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5457_a4c3a147", + "run_number": 4, + "start_time": "2018-06-01 19:26:08.553859+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00922621594789716 --keep_probability 0.227683838955561", + "created_time": "2018-06-01 19:37:36.835723+00:00", + "created_time_dt": "2018-06-01T19:37:36.835723", + "duration": "0:01:03", + "end_time": "2018-06-01 19:38:40.652773+00:00", + "hyperdrive_id": "5480", + "metric": 0.9663000106811523, + "paras_keep_probability": "0.227683838955561", + "paras_learning_rate": "0.00922621594789716", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5480_f234fb08", + "run_number": 14, + "start_time": "2018-06-01 19:37:38.116439+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0155645426732787 --keep_probability 0.159123698168668", + "created_time": "2018-06-01 19:29:46.277531+00:00", + "created_time_dt": "2018-06-01T19:29:46.277531", + "duration": "0:01:09", + "end_time": "2018-06-01 19:30:55.727701+00:00", + "hyperdrive_id": "5472", + "metric": 0.964900016784668, + "paras_keep_probability": "0.159123698168668", + "paras_learning_rate": "0.0155645426732787", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5472_23122c4b", + "run_number": 9, + "start_time": "2018-06-01 19:29:46.964148+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00838536088211458 --keep_probability 0.102478957268164", + "created_time": "2018-06-01 19:25:53.548553+00:00", + "created_time_dt": "2018-06-01T19:25:53.548553", + "duration": "0:01:05", + "end_time": "2018-06-01 19:26:59.136632+00:00", + "hyperdrive_id": "5459", + "metric": 0.9646999835968018, + "paras_keep_probability": "0.102478957268164", + "paras_learning_rate": "0.00838536088211458", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5459_030491ad", + "run_number": 3, + "start_time": "2018-06-01 19:25:54.739654+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000143958552086584 --keep_probability 0.273084377226789", + "created_time": "2018-06-01 19:29:46.057879+00:00", + "created_time_dt": "2018-06-01T19:29:46.057879", + "duration": "0:01:18", + "end_time": "2018-06-01 19:31:04.843202+00:00", + "hyperdrive_id": "5470", + "metric": 0.963699996471405, + "paras_keep_probability": "0.273084377226789", + "paras_learning_rate": "0.000143958552086584", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5470_a648dbea", + "run_number": 7, + "start_time": "2018-06-01 19:29:46.780201+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.14051833348127E-05 --keep_probability 0.472685817381368", + "created_time": "2018-06-01 19:41:39.648602+00:00", + "created_time_dt": "2018-06-01T19:41:39.648602", + "duration": "0:03:06", + "end_time": "2018-06-01 19:44:45.699811+00:00", + "hyperdrive_id": "5489", + "metric": 0.9613000154495239, + "paras_keep_probability": "0.472685817381368", + "paras_learning_rate": "7.14051833348127E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5489_38907948", + "run_number": 15, + "start_time": "2018-06-01 19:41:40.512369+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00737747352627753 --keep_probability 0.205239625544216", + "created_time": "2018-06-01 19:50:33.596963+00:00", + "created_time_dt": "2018-06-01T19:50:33.596963", + "duration": "0:01:51", + "end_time": "2018-06-01 19:52:25.281499+00:00", + "hyperdrive_id": "5497", + "metric": 0.9581999778747559, + "paras_keep_probability": "0.205239625544216", + "paras_learning_rate": "0.00737747352627753", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5497_8130025b", + "run_number": 22, + "start_time": "2018-06-01 19:50:34.456850+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0211316024512922 --keep_probability 0.456008246140918", + "created_time": "2018-06-01 19:50:31.581841+00:00", + "created_time_dt": "2018-06-01T19:50:31.581841", + "duration": "0:01:03", + "end_time": "2018-06-01 19:51:35.272415+00:00", + "hyperdrive_id": "5499", + "metric": 0.9580000042915344, + "paras_keep_probability": "0.456008246140918", + "paras_learning_rate": "0.0211316024512922", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5499_e0b5a73f", + "run_number": 19, + "start_time": "2018-06-01 19:50:32.786951+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.56451710371043E-05 --keep_probability 0.321364540919092", + "created_time": "2018-06-01 19:50:33.421674+00:00", + "created_time_dt": "2018-06-01T19:50:33.421674", + "duration": "0:06:27", + "end_time": "2018-06-01 19:57:00.982688+00:00", + "hyperdrive_id": "5496", + "metric": 0.9520000219345093, + "paras_keep_probability": "0.321364540919092", + "paras_learning_rate": "7.56451710371043E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5496_46a98c1f", + "run_number": 21, + "start_time": "2018-06-01 19:50:34.379782+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 6.86923046964849E-05 --keep_probability 0.229123758955098", + "created_time": "2018-06-01 19:37:36.816510+00:00", + "created_time_dt": "2018-06-01T19:37:36.816510", + "duration": "0:01:12", + "end_time": "2018-06-01 19:38:49.439465+00:00", + "hyperdrive_id": "5477", + "metric": 0.9483000040054321, + "paras_keep_probability": "0.229123758955098", + "paras_learning_rate": "6.86923046964849E-05", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5477_c428bcf0", + "run_number": 13, + "start_time": "2018-06-01 19:37:42.971387+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.014609502490554 --keep_probability 0.480459935106515", + "created_time": "2018-06-01 19:26:10.258955+00:00", + "created_time_dt": "2018-06-01T19:26:10.258955", + "duration": "0:02:41", + "end_time": "2018-06-01 19:28:52.069673+00:00", + "hyperdrive_id": "5458", + "metric": 0.12110000103712082, + "paras_keep_probability": "0.480459935106515", + "paras_learning_rate": "0.014609502490554", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5458_3f73f0ac", + "run_number": 5, + "start_time": "2018-06-01 19:26:17.107379+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0149932664638274 --keep_probability 0.284424630578217", + "created_time": "2018-06-01 19:37:36.730460+00:00", + "created_time_dt": "2018-06-01T19:37:36.730460", + "duration": "0:01:08", + "end_time": "2018-06-01 19:38:44.881339+00:00", + "hyperdrive_id": "5478", + "metric": 0.11349999904632568, + "paras_keep_probability": "0.284424630578217", + "paras_learning_rate": "0.0149932664638274", + "paras_minibatch_size": "64", + "paras_num_iterations": "1000", + "paras_output_dir": "outputs", + "run_id": "tensorflow-hyperdrive_1527881081325_446_5478_24390740", + "run_number": 12, + "start_time": "2018-06-01 19:37:42.865594+00:00", + "status": "Completed" + } + ], + "metricName": "Accuracy", + "series": [ + { + "mode": "lines", + "name": 20, + "run_id": 20, + "stepped": false, + "uid": "d9463a", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.052000001072883606, + 0.9318000078201294, + 0.9584000110626221, + 0.9639999866485596, + 0.9710999727249146, + 0.9746999740600586, + 0.9768999814987183, + 0.9822999835014343, + 0.978600025177002, + 0.9801999926567078, + 0.9828000068664551 + ] + }, + { + "mode": "lines", + "name": 7, + "run_id": 7, + "stepped": false, + "uid": "2d6574", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.08749999850988388, + 0.855400025844574, + 0.911300003528595, + 0.9289000034332275, + 0.9394999742507935, + 0.9431999921798706, + 0.9509999752044678, + 0.9553999900817871, + 0.9555000066757202, + 0.963699996471405, + 0.9616000056266785 + ] + }, + { + "mode": "lines", + "name": 16, + "run_id": 16, + "stepped": false, + "uid": "add68c", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.08869999647140503, + 0.9404000043869019, + 0.9574000239372253, + 0.9700999855995178, + 0.9715999960899353, + 0.979200005531311, + 0.9797000288963318, + 0.9812999963760376, + 0.9797999858856201, + 0.9817000031471252, + 0.9842000007629395 + ] + }, + { + "mode": "lines", + "name": 14, + "run_id": 14, + "stepped": false, + "uid": "38792f", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.08950000256299973, + 0.9248999953269958, + 0.9545999765396118, + 0.9581000208854675, + 0.9559999704360962, + 0.9627000093460083, + 0.9642000198364258, + 0.9663000106811523, + 0.9585999846458435, + 0.963100016117096, + 0.9595999717712402 + ] + }, + { + "mode": "lines", + "name": 6, + "run_id": 6, + "stepped": false, + "uid": "10fcf1", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.08980000019073486, + 0.8791999816894531, + 0.9261000156402588, + 0.9426000118255615, + 0.9501000046730042, + 0.9546999931335449, + 0.9573000073432922, + 0.963699996471405, + 0.965499997138977, + 0.9684000015258789, + 0.9703999757766724 + ] + }, + { + "mode": "lines", + "name": 22, + "run_id": 22, + "stepped": false, + "uid": "f09911", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.0949999988079071, + 0.8639000058174133, + 0.9139999747276306, + 0.9171000123023987, + 0.930899977684021, + 0.9441999793052673, + 0.9545999765396118, + 0.9560999870300293, + 0.9581999778747559, + 0.9539999961853027, + 0.9559000134468079 + ] + }, + { + "mode": "lines", + "name": 8, + "run_id": 8, + "stepped": false, + "uid": "e6ae29", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.0949999988079071, + 0.8953999876976013, + 0.942300021648407, + 0.9513999819755554, + 0.9617000222206116, + 0.9564999938011169, + 0.9689000248908997, + 0.9688000082969666, + 0.9704999923706055, + 0.9760000109672546, + 0.9764000177383423 + ] + }, + { + "mode": "lines", + "name": 12, + "run_id": 12, + "stepped": false, + "uid": "84d316", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.09629999846220016, + 0.11349999904632568, + 0.10279999673366547, + 0.10090000182390213, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.10279999673366547, + 0.10279999673366547, + 0.11349999904632568, + 0.11349999904632568 + ] + }, + { + "mode": "lines", + "name": 5, + "run_id": 5, + "stepped": false, + "uid": "05acc0", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.12110000103712082, + 0.0982000008225441, + 0.11349999904632568, + 0.11349999904632568, + 0.10320000350475311, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568 + ] + }, + { + "mode": "lines", + "name": 4, + "run_id": 4, + "stepped": false, + "uid": "9b5194", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.10109999775886536, + 0.8985999822616577, + 0.9424999952316284, + 0.9294999837875366, + 0.9545999765396118, + 0.9581000208854675, + 0.9616000056266785, + 0.9678999781608582, + 0.9661999940872192, + 0.9672999978065491, + 0.968500018119812 + ] + }, + { + "mode": "lines", + "name": 11, + "run_id": 11, + "stepped": false, + "uid": "b7a136", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.10440000146627426, + 0.9383000135421753, + 0.9538999795913696, + 0.9664000272750854, + 0.9717000126838684, + 0.9760000109672546, + 0.9732999801635742, + 0.9779000282287598, + 0.9817000031471252, + 0.9818000197410583, + 0.9799000024795532 + ] + }, + { + "mode": "lines", + "name": 3, + "run_id": 3, + "stepped": false, + "uid": "dab69b", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.11159999668598175, + 0.8949000239372253, + 0.9286999702453613, + 0.9498999714851379, + 0.9539999961853027, + 0.953499972820282, + 0.9606999754905701, + 0.9613000154495239, + 0.9549999833106995, + 0.9646999835968018, + 0.9581999778747559 + ] + }, + { + "mode": "lines", + "name": 15, + "run_id": 15, + "stepped": false, + "uid": "90901a", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.11159999668598175, + 0.8270000219345093, + 0.9024999737739563, + 0.9175999760627747, + 0.9323999881744385, + 0.9368000030517578, + 0.9431999921798706, + 0.9506000280380249, + 0.9526000022888184, + 0.9584000110626221, + 0.9613000154495239 + ] + }, + { + "mode": "lines", + "name": 10, + "run_id": 10, + "stepped": false, + "uid": "bd666d", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.11209999769926071, + 0.9466000199317932, + 0.9639000296592712, + 0.9722999930381775, + 0.9781000018119812, + 0.9800999760627747, + 0.9781000018119812, + 0.9814000129699707, + 0.9833999872207642, + 0.9836999773979187, + 0.9829000234603882 + ] + }, + { + "mode": "lines", + "name": 21, + "run_id": 21, + "stepped": false, + "uid": "3f3472", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.11500000208616257, + 0.789900004863739, + 0.8838000297546387, + 0.9072999954223633, + 0.9203000068664551, + 0.9312000274658203, + 0.9319000244140625, + 0.9434000253677368, + 0.9470999836921692, + 0.9477999806404114, + 0.9520000219345093 + ] + }, + { + "mode": "lines", + "name": 19, + "run_id": 19, + "stepped": false, + "uid": "982581", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.1160999983549118, + 0.9366000294685364, + 0.9473000168800354, + 0.9552000164985657, + 0.95660001039505, + 0.9465000033378601, + 0.9575999975204468, + 0.9580000042915344, + 0.949999988079071, + 0.9520999789237976, + 0.9567999839782715 + ] + }, + { + "mode": "lines", + "name": 18, + "run_id": 18, + "stepped": false, + "uid": "f812c0", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.12210000306367874, + 0.917900025844574, + 0.9437999725341797, + 0.95660001039505, + 0.9646000266075134, + 0.9692000150680542, + 0.9707000255584717, + 0.9735999703407288, + 0.9781000018119812, + 0.9775999784469604, + 0.9785000085830688 + ] + }, + { + "mode": "lines", + "name": 13, + "run_id": 13, + "stepped": false, + "uid": "022e43", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.13899999856948853, + 0.7547000050544739, + 0.8593000173568726, + 0.8981999754905701, + 0.9186999797821045, + 0.9254999756813049, + 0.934499979019165, + 0.9377999901771545, + 0.9431999921798706, + 0.9437999725341797, + 0.9483000040054321 + ] + }, + { + "mode": "lines", + "name": 9, + "run_id": 9, + "stepped": false, + "uid": "ee6d23", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.14800000190734863, + 0.9146999716758728, + 0.9452999830245972, + 0.9506000280380249, + 0.9550999999046326, + 0.9584000110626221, + 0.9599000215530396, + 0.9621000289916992, + 0.964900016784668, + 0.9510999917984009, + 0.9624000191688538 + ] + }, + { + "mode": "lines", + "name": 17, + "run_id": 17, + "stepped": false, + "uid": "4fd9d8", + "visible": "legendonly", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.16419999301433563, + 0.9247999787330627, + 0.9431999921798706, + 0.9599000215530396, + 0.9674999713897705, + 0.9695000052452087, + 0.9765999913215637, + 0.9763000011444092, + 0.9779999852180481, + 0.9812999963760376, + 0.9789000153541565 + ] + } + ], + "showLegend": true, + "title": "HyperDrive Run Primary Metric : Accuracy" + }, + "run_id": "tensorflow-hyperdrive_1527881081325", + "run_logs": "", + "run_metrics": [], + "run_properties": { + "additional_properties": {}, + "created_utc": "2018-06-01T19:24:41.846775", + "description": null, + "end_time_utc": "2018-06-01T20:05:15.398835", + "experiment_id": "54fc7a8b-21a4-4a10-8931-bd36c717c9b7", + "heartbeat_enabled": false, + "hidden": false, + "name": "tensorflow-hyperdrive", + "parent_run_id": null, + "properties": { + "all_jobs_generated": "true", + "azureml.runsource": "hyperdrive", + "cancellation_requested": "false", + "generator_config": "{\"name\": \"RANDOM\", \"parameter_space\": {\"learning_rate\": [\"loguniform\", [-10, -3]], \"keep_probability\": [\"uniform\", [0.5, 0.1]]}}", + "is_hyperdrive_run": "true", + "max_concurrent_jobs": "4", + "max_duration_minutes": "43200", + "max_total_jobs": "20", + "policy_config": "{\"name\": \"BANDIT\", \"properties\": {\"slack_factor\": 0.15, \"evaluation_interval\": 2, \"delay_evaluation\": 0}}", + "primary_metric_config": "{\"name\": \"Accuracy\", \"goal\": \"maximize\"}", + "runTemplate": "HyperDrive" + }, + "root_run_id": "tensorflow-hyperdrive_1527881081325", + "run_id": "tensorflow-hyperdrive_1527881081325", + "run_number": 2, + "script_name": "tf_mnist_train.py", + "start_time_utc": null, + "status": "Completed", + "tags": {}, + "target": "gpucluster", + "token": null, + "token_expiry_time_utc": null, + "user_id": "fffc1c66-275f-4935-bb04-70a760c82fda" + }, + "status": "Completed", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-notebooks%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2ws3/projects/tensorflow-hyperdrive/run-history/run-details/tensorflow-hyperdrive_1527881081325?type=HyperDrive" + } + } + }, + "49be30037a73481a900026b30fc816eb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "524fd810f76f4ed69236c8e0cf20da11": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_contrib_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowRunDetailsView", + "layout": "IPY_MODEL_cce84fe5e64141a0a69e0029ca3553a5", + "value": { + "child_runs": [], + "children_metrics": {}, + "run_id": "tensorflow-hyperdrive_1527879977658", + "run_logs": "Uploading experiment status to history service.\nAdding run profile attachment azureml-logs/80_driver_log.txt\nUploading experiment status to history service.\nAdding run profile attachment azureml-logs/60_control_log.txt\n\n\rUsing Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\nSetting default log level to \"WARN\".\nTo adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n18/06/01 19:20:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n/azureml-envs/azureml_799f97443dc957270fd0268d825cda62/lib/python3.6/importlib/_bootstrap.py:205: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6\n return f(*args, **kwds)\nSuccessfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\nExtracting MNIST_data/train-images-idx3-ubyte.gz\nSuccessfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\nExtracting MNIST_data/train-labels-idx1-ubyte.gz\nSuccessfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\nExtracting MNIST_data/t10k-images-idx3-ubyte.gz\nSuccessfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\nExtracting MNIST_data/t10k-labels-idx1-ubyte.gz\n2018-06-01 19:20:33.459019: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA\n2018-06-01 19:20:33.674933: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 0 with properties: \nname: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\npciBusID: 813c:00:00.0\ntotalMemory: 11.17GiB freeMemory: 11.09GiB\n2018-06-01 19:20:33.674977: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: Tesla K80, pci bus id: 813c:00:00.0, compute capability: 3.7)\nstep 0, training accuracy 0.09375, test accuracy, 0.0958\nstep 100, training accuracy 0.953125, test accuracy, 0.9461\nstep 200, training accuracy 0.953125, test accuracy, 0.9594\nstep 300, training accuracy 0.953125, test accuracy, 0.9719\nstep 400, training accuracy 0.984375, test accuracy, 0.9787\nstep 500, training accuracy 0.96875, test accuracy, 0.9778\nstep 600, training accuracy 0.984375, test accuracy, 0.9806\nstep 700, training accuracy 0.96875, test accuracy, 0.9788\nstep 800, training accuracy 0.96875, test accuracy, 0.9819\nstep 900, training accuracy 0.984375, test accuracy, 0.9818\ntest accuracy 0.9836\nThe experiment completed successfully. Starting post-processing steps.\n\n\r", + "run_metrics": [ + { + "categories": [ + 0 + ], + "name": "learning_rate", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0.001 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "minibatch_size", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 64 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "keep_probability", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0.5 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "num_iterations", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 1000 + ] + } + ] + }, + { + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "name": "Accuracy", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "uid": "5c86a1", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0.0957999974489212, + 0.9460999965667725, + 0.9593999981880188, + 0.9718999862670898, + 0.9786999821662903, + 0.9778000116348267, + 0.9805999994277954, + 0.9787999987602234, + 0.9818999767303467, + 0.9818000197410583, + 0.9836000204086304 + ] + } + ] + }, + { + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "name": "Iterations", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "uid": "8867c0", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "y": [ + 0, + 100, + 200, + 300, + 400, + 500, + 600, + 700, + 800, + 900, + 1000 + ] + } + ] + } + ], + "run_properties": { + "additional_properties": {}, + "created_utc": "2018-06-01T19:06:18.159119", + "description": null, + "end_time_utc": "2018-06-01T19:21:08.302609", + "experiment_id": "54fc7a8b-21a4-4a10-8931-bd36c717c9b7", + "heartbeat_enabled": false, + "hidden": false, + "name": null, + "parent_run_id": null, + "properties": { + "Arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000", + "ContentSnapshotId": "67acb36c-e77e-44ae-b820-e6242b9909ab", + "azureml.runsource": "experiment" + }, + "root_run_id": "tensorflow-hyperdrive_1527879977658", + "run_id": "tensorflow-hyperdrive_1527879977658", + "run_number": 1, + "script_name": "tf_mnist_train.py", + "start_time_utc": "2018-06-01T19:19:31.364278", + "status": "Completed", + "tags": {}, + "target": "gpucluster", + "token": null, + "token_expiry_time_utc": null, + "user_id": "fb7d2bbf-2c54-46d7-8775-7e318644dd6b" + }, + "status": "Completed", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-notebooks%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2ws3/projects/tensorflow-hyperdrive/run-history/run-details/tensorflow-hyperdrive_1527879977658?type=user" + } + } + }, + "8a36279a14624bbdb1926c2572748861": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "bc94f0e90ff64d62a1ff1f84bc34803b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "cce84fe5e64141a0a69e0029ca3553a5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "d7a8a4fc54e4453fbc31d11604742430": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_contrib_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowRunDetailsView", + "layout": "IPY_MODEL_49be30037a73481a900026b30fc816eb", + "value": { + "child_runs": [], + "children_metrics": {}, + "run_id": "tensorflow-hyperdrive_1527879977658", + "run_logs": "Uploading experiment status to history service.\nAdding run profile attachment azureml-logs/80_driver_log.txt\nUploading experiment status to history service.\nAdding run profile attachment azureml-logs/60_control_log.txt\n\n\rUsing Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\nSetting default log level to \"WARN\".\nTo adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n18/06/01 19:20:28 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n/azureml-envs/azureml_799f97443dc957270fd0268d825cda62/lib/python3.6/importlib/_bootstrap.py:205: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6\n return f(*args, **kwds)\nSuccessfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.\nExtracting MNIST_data/train-images-idx3-ubyte.gz\nSuccessfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.\nExtracting MNIST_data/train-labels-idx1-ubyte.gz\nSuccessfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.\nExtracting MNIST_data/t10k-images-idx3-ubyte.gz\nSuccessfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.\nExtracting MNIST_data/t10k-labels-idx1-ubyte.gz\n2018-06-01 19:20:33.459019: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA\n2018-06-01 19:20:33.674933: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1030] Found device 0 with properties: \nname: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\npciBusID: 813c:00:00.0\ntotalMemory: 11.17GiB freeMemory: 11.09GiB\n2018-06-01 19:20:33.674977: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1120] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: Tesla K80, pci bus id: 813c:00:00.0, compute capability: 3.7)\nstep 0, training accuracy 0.09375, test accuracy, 0.0958\nstep 100, training accuracy 0.953125, test accuracy, 0.9461\nstep 200, training accuracy 0.953125, test accuracy, 0.9594\nstep 300, training accuracy 0.953125, test accuracy, 0.9719\nstep 400, training accuracy 0.984375, test accuracy, 0.9787\nstep 500, training accuracy 0.96875, test accuracy, 0.9778\nstep 600, training accuracy 0.984375, test accuracy, 0.9806\nstep 700, training accuracy 0.96875, test accuracy, 0.9788\nstep 800, training accuracy 0.96875, test accuracy, 0.9819\nstep 900, training accuracy 0.984375, test accuracy, 0.9818\ntest accuracy 0.9836\nThe experiment completed successfully. Starting post-processing steps.\n\n\r", + "run_metrics": [ + { + "categories": [ + 0 + ], + "name": "learning_rate", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0.001 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "minibatch_size", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 64 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "keep_probability", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0.5 + ] + } + ] + }, + { + "categories": [ + 0 + ], + "name": "num_iterations", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 1000 + ] + } + ] + }, + { + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "name": "Accuracy", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0.0957999974489212, + 0.9460999965667725, + 0.9593999981880188, + 0.9718999862670898, + 0.9786999821662903, + 0.9778000116348267, + 0.9805999994277954, + 0.9787999987602234, + 0.9818999767303467, + 0.9818000197410583, + 0.9836000204086304 + ] + } + ] + }, + { + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "name": "Iterations", + "run_id": "tensorflow-hyperdrive_1527879977658", + "series": [ + { + "data": [ + 0, + 100, + 200, + 300, + 400, + 500, + 600, + 700, + 800, + 900, + 1000 + ] + } + ] + } + ], + "run_properties": { + "additional_properties": {}, + "created_utc": "2018-06-01T19:06:18.159119", + "description": null, + "end_time_utc": "2018-06-01T19:21:08.302609", + "experiment_id": "54fc7a8b-21a4-4a10-8931-bd36c717c9b7", + "heartbeat_enabled": false, + "hidden": false, + "name": null, + "parent_run_id": null, + "properties": { + "Arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000", + "ContentSnapshotId": "67acb36c-e77e-44ae-b820-e6242b9909ab", + "azureml.runsource": "experiment" + }, + "root_run_id": "tensorflow-hyperdrive_1527879977658", + "run_id": "tensorflow-hyperdrive_1527879977658", + "run_number": 1, + "script_name": "tf_mnist_train.py", + "start_time_utc": "2018-06-01T19:19:31.364278", + "status": "Completed", + "tags": {}, + "target": "gpucluster", + "token": null, + "token_expiry_time_utc": null, + "user_id": "fb7d2bbf-2c54-46d7-8775-7e318644dd6b" + }, + "status": "Completed", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-notebooks%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2ws3/projects/tensorflow-hyperdrive/run-history/run-details/tensorflow-hyperdrive_1527879977658?type=user" + } + } + }, + "fd3e0717dec9444b881194f135b82853": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "DOMWidgetModel", + "state": { + "_model_name": "DOMWidgetModel", + "_view_module": "azureml_contrib_widgets", + "_view_module_version": "^0.1.0", + "_view_name": "ShowHyperDriveRunsView", + "layout": "IPY_MODEL_bc94f0e90ff64d62a1ff1f84bc34803b", + "value": { + "child_runs": [ + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000852395056049516 --keep_probability 0.434530370965995", + "created_time": "2018-06-01 19:41:39.666220+00:00", + "created_time_dt": "2018-06-01T19:41:39.666220", + "duration": "0:00:57", + "end_time": "2018-06-01 19:42:37.511351+00:00", + "hyperdrive_id": "5488", + "metric": 0.9842000007629395, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5488_abbecb6c", + "run_number": 16, + "start_time": "2018-06-01 19:41:40.368621+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00179999057463703 --keep_probability 0.296515321882523", + "created_time": "2018-06-01 19:29:46.303636+00:00", + "created_time_dt": "2018-06-01T19:29:46.303636", + "duration": "0:02:03", + "end_time": "2018-06-01 19:31:50.043486+00:00", + "hyperdrive_id": "5469", + "metric": 0.9836999773979187, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5469_9f034e69", + "run_number": 10, + "start_time": "2018-06-01 19:29:47.033264+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000676904386677712 --keep_probability 0.4154535083569", + "created_time": "2018-06-01 19:50:31.651044+00:00", + "created_time_dt": "2018-06-01T19:50:31.651044", + "duration": "0:14:30", + "end_time": "2018-06-01 20:05:02.591649+00:00", + "hyperdrive_id": "5498", + "metric": 0.9828000068664551, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5498_32e0a249", + "run_number": 20, + "start_time": "2018-06-01 19:50:37.386350+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000586938713321222 --keep_probability 0.432942295536284", + "created_time": "2018-06-01 19:37:36.678691+00:00", + "created_time_dt": "2018-06-01T19:37:36.678691", + "duration": "0:01:46", + "end_time": "2018-06-01 19:39:23.211000+00:00", + "hyperdrive_id": "5479", + "metric": 0.9818000197410583, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5479_cb5037ed", + "run_number": 11, + "start_time": "2018-06-01 19:37:43.143211+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321696353537414 --keep_probability 0.446837800410634", + "created_time": "2018-06-01 19:41:39.915872+00:00", + "created_time_dt": "2018-06-01T19:41:39.915872", + "duration": "0:02:58", + "end_time": "2018-06-01 19:44:38.693923+00:00", + "hyperdrive_id": "5490", + "metric": 0.9812999963760376, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5490_cfcbcea1", + "run_number": 17, + "start_time": "2018-06-01 19:41:40.688804+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000598930751146987 --keep_probability 0.173175740602207", + "created_time": "2018-06-01 19:41:44.682554+00:00", + "created_time_dt": "2018-06-01T19:41:44.682554", + "duration": "0:01:54", + "end_time": "2018-06-01 19:43:38.690104+00:00", + "hyperdrive_id": "5491", + "metric": 0.9785000085830688, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5491_1ab60563", + "run_number": 18, + "start_time": "2018-06-01 19:41:45.356160+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00313856224079023 --keep_probability 0.308708329651949", + "created_time": "2018-06-01 19:29:46.140940+00:00", + "created_time_dt": "2018-06-01T19:29:46.140940", + "duration": "0:02:02", + "end_time": "2018-06-01 19:31:49.127224+00:00", + "hyperdrive_id": "5471", + "metric": 0.9764000177383423, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5471_05cdc17b", + "run_number": 8, + "start_time": "2018-06-01 19:29:46.876362+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000321079619657115 --keep_probability 0.166071686996525", + "created_time": "2018-06-01 19:26:11.468523+00:00", + "created_time_dt": "2018-06-01T19:26:11.468523", + "duration": "0:01:48", + "end_time": "2018-06-01 19:28:00.170666+00:00", + "hyperdrive_id": "5460", + "metric": 0.9703999757766724, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5460_0ff67ff3", + "run_number": 6, + "start_time": "2018-06-01 19:26:12.172473+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00970484525511844 --keep_probability 0.334371206847485", + "created_time": "2018-06-01 19:25:53.815492+00:00", + "created_time_dt": "2018-06-01T19:25:53.815492", + "duration": "0:02:07", + "end_time": "2018-06-01 19:28:01.507944+00:00", + "hyperdrive_id": "5457", + "metric": 0.968500018119812, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5457_a4c3a147", + "run_number": 4, + "start_time": "2018-06-01 19:26:08.553859+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00922621594789716 --keep_probability 0.227683838955561", + "created_time": "2018-06-01 19:37:36.835723+00:00", + "created_time_dt": "2018-06-01T19:37:36.835723", + "duration": "0:01:03", + "end_time": "2018-06-01 19:38:40.652773+00:00", + "hyperdrive_id": "5480", + "metric": 0.9663000106811523, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5480_f234fb08", + "run_number": 14, + "start_time": "2018-06-01 19:37:38.116439+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0155645426732787 --keep_probability 0.159123698168668", + "created_time": "2018-06-01 19:29:46.277531+00:00", + "created_time_dt": "2018-06-01T19:29:46.277531", + "duration": "0:01:09", + "end_time": "2018-06-01 19:30:55.727701+00:00", + "hyperdrive_id": "5472", + "metric": 0.964900016784668, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5472_23122c4b", + "run_number": 9, + "start_time": "2018-06-01 19:29:46.964148+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00838536088211458 --keep_probability 0.102478957268164", + "created_time": "2018-06-01 19:25:53.548553+00:00", + "created_time_dt": "2018-06-01T19:25:53.548553", + "duration": "0:01:05", + "end_time": "2018-06-01 19:26:59.136632+00:00", + "hyperdrive_id": "5459", + "metric": 0.9646999835968018, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5459_030491ad", + "run_number": 3, + "start_time": "2018-06-01 19:25:54.739654+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.000143958552086584 --keep_probability 0.273084377226789", + "created_time": "2018-06-01 19:29:46.057879+00:00", + "created_time_dt": "2018-06-01T19:29:46.057879", + "duration": "0:01:18", + "end_time": "2018-06-01 19:31:04.843202+00:00", + "hyperdrive_id": "5470", + "metric": 0.963699996471405, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5470_a648dbea", + "run_number": 7, + "start_time": "2018-06-01 19:29:46.780201+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.14051833348127E-05 --keep_probability 0.472685817381368", + "created_time": "2018-06-01 19:41:39.648602+00:00", + "created_time_dt": "2018-06-01T19:41:39.648602", + "duration": "0:03:06", + "end_time": "2018-06-01 19:44:45.699811+00:00", + "hyperdrive_id": "5489", + "metric": 0.9613000154495239, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5489_38907948", + "run_number": 15, + "start_time": "2018-06-01 19:41:40.512369+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.00737747352627753 --keep_probability 0.205239625544216", + "created_time": "2018-06-01 19:50:33.596963+00:00", + "created_time_dt": "2018-06-01T19:50:33.596963", + "duration": "0:01:51", + "end_time": "2018-06-01 19:52:25.281499+00:00", + "hyperdrive_id": "5497", + "metric": 0.9581999778747559, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5497_8130025b", + "run_number": 22, + "start_time": "2018-06-01 19:50:34.456850+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0211316024512922 --keep_probability 0.456008246140918", + "created_time": "2018-06-01 19:50:31.581841+00:00", + "created_time_dt": "2018-06-01T19:50:31.581841", + "duration": "0:01:03", + "end_time": "2018-06-01 19:51:35.272415+00:00", + "hyperdrive_id": "5499", + "metric": 0.9580000042915344, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5499_e0b5a73f", + "run_number": 19, + "start_time": "2018-06-01 19:50:32.786951+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 7.56451710371043E-05 --keep_probability 0.321364540919092", + "created_time": "2018-06-01 19:50:33.421674+00:00", + "created_time_dt": "2018-06-01T19:50:33.421674", + "duration": "0:06:27", + "end_time": "2018-06-01 19:57:00.982688+00:00", + "hyperdrive_id": "5496", + "metric": 0.9520000219345093, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5496_46a98c1f", + "run_number": 21, + "start_time": "2018-06-01 19:50:34.379782+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 6.86923046964849E-05 --keep_probability 0.229123758955098", + "created_time": "2018-06-01 19:37:36.816510+00:00", + "created_time_dt": "2018-06-01T19:37:36.816510", + "duration": "0:01:12", + "end_time": "2018-06-01 19:38:49.439465+00:00", + "hyperdrive_id": "5477", + "metric": 0.9483000040054321, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5477_c428bcf0", + "run_number": 13, + "start_time": "2018-06-01 19:37:42.971387+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.014609502490554 --keep_probability 0.480459935106515", + "created_time": "2018-06-01 19:26:10.258955+00:00", + "created_time_dt": "2018-06-01T19:26:10.258955", + "duration": "0:02:41", + "end_time": "2018-06-01 19:28:52.069673+00:00", + "hyperdrive_id": "5458", + "metric": 0.12110000103712082, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5458_3f73f0ac", + "run_number": 5, + "start_time": "2018-06-01 19:26:17.107379+00:00", + "status": "Completed" + }, + { + "arguments": "azureml-setup/context_manager_injector.py -i OutputCollection:context_managers.RunHistory -i DaskOnBatch:context_managers.DaskOnBatch tf_mnist_train.py --minibatch_size 64 --learning_rate 0.001 --keep_probability 0.5 --output_dir outputs --num_iterations 1000 --learning_rate 0.0149932664638274 --keep_probability 0.284424630578217", + "created_time": "2018-06-01 19:37:36.730460+00:00", + "created_time_dt": "2018-06-01T19:37:36.730460", + "duration": "0:01:08", + "end_time": "2018-06-01 19:38:44.881339+00:00", + "hyperdrive_id": "5478", + "metric": 0.11349999904632568, + "run_id": "tensorflow-hyperdrive_1527881081325_446_5478_24390740", + "run_number": 12, + "start_time": "2018-06-01 19:37:42.865594+00:00", + "status": "Completed" + } + ], + "children_metrics": { + "categories": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ], + "metricName": "Accuracy", + "series": [ + { + "data": [ + 0.11159999668598175, + 0.8949000239372253, + 0.9286999702453613, + 0.9498999714851379, + 0.9539999961853027, + 0.953499972820282, + 0.9606999754905701, + 0.9613000154495239, + 0.9549999833106995, + 0.9646999835968018, + 0.9581999778747559 + ], + "mode": "lines", + "name": 3, + "run_id": 3, + "stepped": false + }, + { + "data": [ + 0.10109999775886536, + 0.8985999822616577, + 0.9424999952316284, + 0.9294999837875366, + 0.9545999765396118, + 0.9581000208854675, + 0.9616000056266785, + 0.9678999781608582, + 0.9661999940872192, + 0.9672999978065491, + 0.968500018119812 + ], + "mode": "lines", + "name": 4, + "run_id": 4, + "stepped": false + }, + { + "data": [ + 0.08980000019073486, + 0.8791999816894531, + 0.9261000156402588, + 0.9426000118255615, + 0.9501000046730042, + 0.9546999931335449, + 0.9573000073432922, + 0.963699996471405, + 0.965499997138977, + 0.9684000015258789, + 0.9703999757766724 + ], + "mode": "lines", + "name": 6, + "run_id": 6, + "stepped": false + }, + { + "data": [ + 0.12110000103712082, + 0.0982000008225441, + 0.11349999904632568, + 0.11349999904632568, + 0.10320000350475311, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568 + ], + "mode": "lines", + "name": 5, + "run_id": 5, + "stepped": false + }, + { + "data": [ + 0.14800000190734863, + 0.9146999716758728, + 0.9452999830245972, + 0.9506000280380249, + 0.9550999999046326, + 0.9584000110626221, + 0.9599000215530396, + 0.9621000289916992, + 0.964900016784668, + 0.9510999917984009, + 0.9624000191688538 + ], + "mode": "lines", + "name": 9, + "run_id": 9, + "stepped": false + }, + { + "data": [ + 0.08749999850988388, + 0.855400025844574, + 0.911300003528595, + 0.9289000034332275, + 0.9394999742507935, + 0.9431999921798706, + 0.9509999752044678, + 0.9553999900817871, + 0.9555000066757202, + 0.963699996471405, + 0.9616000056266785 + ], + "mode": "lines", + "name": 7, + "run_id": 7, + "stepped": false + }, + { + "data": [ + 0.11209999769926071, + 0.9466000199317932, + 0.9639000296592712, + 0.9722999930381775, + 0.9781000018119812, + 0.9800999760627747, + 0.9781000018119812, + 0.9814000129699707, + 0.9833999872207642, + 0.9836999773979187, + 0.9829000234603882 + ], + "mode": "lines", + "name": 10, + "run_id": 10, + "stepped": false + }, + { + "data": [ + 0.0949999988079071, + 0.8953999876976013, + 0.942300021648407, + 0.9513999819755554, + 0.9617000222206116, + 0.9564999938011169, + 0.9689000248908997, + 0.9688000082969666, + 0.9704999923706055, + 0.9760000109672546, + 0.9764000177383423 + ], + "mode": "lines", + "name": 8, + "run_id": 8, + "stepped": false + }, + { + "data": [ + 0.08950000256299973, + 0.9248999953269958, + 0.9545999765396118, + 0.9581000208854675, + 0.9559999704360962, + 0.9627000093460083, + 0.9642000198364258, + 0.9663000106811523, + 0.9585999846458435, + 0.963100016117096, + 0.9595999717712402 + ], + "mode": "lines", + "name": 14, + "run_id": 14, + "stepped": false + }, + { + "data": [ + 0.09629999846220016, + 0.11349999904632568, + 0.10279999673366547, + 0.10090000182390213, + 0.11349999904632568, + 0.11349999904632568, + 0.11349999904632568, + 0.10279999673366547, + 0.10279999673366547, + 0.11349999904632568, + 0.11349999904632568 + ], + "mode": "lines", + "name": 12, + "run_id": 12, + "stepped": false + }, + { + "data": [ + 0.13899999856948853, + 0.7547000050544739, + 0.8593000173568726, + 0.8981999754905701, + 0.9186999797821045, + 0.9254999756813049, + 0.934499979019165, + 0.9377999901771545, + 0.9431999921798706, + 0.9437999725341797, + 0.9483000040054321 + ], + "mode": "lines", + "name": 13, + "run_id": 13, + "stepped": false + }, + { + "data": [ + 0.10440000146627426, + 0.9383000135421753, + 0.9538999795913696, + 0.9664000272750854, + 0.9717000126838684, + 0.9760000109672546, + 0.9732999801635742, + 0.9779000282287598, + 0.9817000031471252, + 0.9818000197410583, + 0.9799000024795532 + ], + "mode": "lines", + "name": 11, + "run_id": 11, + "stepped": false + }, + { + "data": [ + 0.08869999647140503, + 0.9404000043869019, + 0.9574000239372253, + 0.9700999855995178, + 0.9715999960899353, + 0.979200005531311, + 0.9797000288963318, + 0.9812999963760376, + 0.9797999858856201, + 0.9817000031471252, + 0.9842000007629395 + ], + "mode": "lines", + "name": 16, + "run_id": 16, + "stepped": false + }, + { + "data": [ + 0.12210000306367874, + 0.917900025844574, + 0.9437999725341797, + 0.95660001039505, + 0.9646000266075134, + 0.9692000150680542, + 0.9707000255584717, + 0.9735999703407288, + 0.9781000018119812, + 0.9775999784469604, + 0.9785000085830688 + ], + "mode": "lines", + "name": 18, + "run_id": 18, + "stepped": false + }, + { + "data": [ + 0.11159999668598175, + 0.8270000219345093, + 0.9024999737739563, + 0.9175999760627747, + 0.9323999881744385, + 0.9368000030517578, + 0.9431999921798706, + 0.9506000280380249, + 0.9526000022888184, + 0.9584000110626221, + 0.9613000154495239 + ], + "mode": "lines", + "name": 15, + "run_id": 15, + "stepped": false + }, + { + "data": [ + 0.16419999301433563, + 0.9247999787330627, + 0.9431999921798706, + 0.9599000215530396, + 0.9674999713897705, + 0.9695000052452087, + 0.9765999913215637, + 0.9763000011444092, + 0.9779999852180481, + 0.9812999963760376, + 0.9789000153541565 + ], + "mode": "lines", + "name": 17, + "run_id": 17, + "stepped": false + }, + { + "data": [ + 0.1160999983549118, + 0.9366000294685364, + 0.9473000168800354, + 0.9552000164985657, + 0.95660001039505, + 0.9465000033378601, + 0.9575999975204468, + 0.9580000042915344, + 0.949999988079071, + 0.9520999789237976, + 0.9567999839782715 + ], + "mode": "lines", + "name": 19, + "run_id": 19, + "stepped": false + }, + { + "data": [ + 0.0949999988079071, + 0.8639000058174133, + 0.9139999747276306, + 0.9171000123023987, + 0.930899977684021, + 0.9441999793052673, + 0.9545999765396118, + 0.9560999870300293, + 0.9581999778747559, + 0.9539999961853027, + 0.9559000134468079 + ], + "mode": "lines", + "name": 22, + "run_id": 22, + "stepped": false + }, + { + "data": [ + 0.11500000208616257, + 0.789900004863739, + 0.8838000297546387, + 0.9072999954223633, + 0.9203000068664551, + 0.9312000274658203, + 0.9319000244140625, + 0.9434000253677368, + 0.9470999836921692, + 0.9477999806404114, + 0.9520000219345093 + ], + "mode": "lines", + "name": 21, + "run_id": 21, + "stepped": false + }, + { + "data": [ + 0.052000001072883606, + 0.9318000078201294, + 0.9584000110626221, + 0.9639999866485596, + 0.9710999727249146, + 0.9746999740600586, + 0.9768999814987183, + 0.9822999835014343, + 0.978600025177002, + 0.9801999926567078, + 0.9828000068664551 + ], + "mode": "lines", + "name": 20, + "run_id": 20, + "stepped": false + } + ], + "showLegend": true + }, + "run_id": "tensorflow-hyperdrive_1527881081325", + "run_logs": "", + "run_metrics": [], + "run_properties": { + "additional_properties": {}, + "created_utc": "2018-06-01T19:24:41.846775", + "description": null, + "end_time_utc": "2018-06-01T20:05:15.398835", + "experiment_id": "54fc7a8b-21a4-4a10-8931-bd36c717c9b7", + "heartbeat_enabled": false, + "hidden": false, + "name": "tensorflow-hyperdrive", + "parent_run_id": null, + "properties": { + "all_jobs_generated": "true", + "azureml.runsource": "hyperdrive", + "cancellation_requested": "false", + "generator_config": "{\"name\": \"RANDOM\", \"parameter_space\": {\"learning_rate\": [\"loguniform\", [-10, -3]], \"keep_probability\": [\"uniform\", [0.5, 0.1]]}}", + "is_hyperdrive_run": "true", + "max_concurrent_jobs": "4", + "max_duration_minutes": "43200", + "max_total_jobs": "20", + "policy_config": "{\"name\": \"BANDIT\", \"properties\": {\"slack_factor\": 0.15, \"evaluation_interval\": 2, \"delay_evaluation\": 0}}", + "primary_metric_config": "{\"name\": \"Accuracy\", \"goal\": \"maximize\"}", + "runTemplate": "HyperDrive" + }, + "root_run_id": "tensorflow-hyperdrive_1527881081325", + "run_id": "tensorflow-hyperdrive_1527881081325", + "run_number": 2, + "script_name": "tf_mnist_train.py", + "start_time_utc": null, + "status": "Completed", + "tags": {}, + "target": "gpucluster", + "token": null, + "token_expiry_time_utc": null, + "user_id": "fffc1c66-275f-4935-bb04-70a760c82fda" + }, + "status": "Completed", + "workbench_run_details_uri": "https://mlworkbench.azureml-test.net/home/%2Fsubscriptions%2Ffac34303-435d-4486-8c3f-7094d82a0b60%2FresourceGroups%2Faml-notebooks%2Fproviders%2FMicrosoft.MachineLearningServices%2Fworkspaces%2Fhaieastus2ws3/projects/tensorflow-hyperdrive/run-history/run-details/tensorflow-hyperdrive_1527881081325?type=HyperDrive" + } + } + } + }, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_1.jpg b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_1.jpg new file mode 100644 index 000000000..b709a206b Binary files /dev/null and b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_1.jpg differ diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_10.jpg b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_10.jpg new file mode 100644 index 000000000..40a9004c3 Binary files /dev/null and b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_10.jpg differ diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_2.jpg b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_2.jpg new file mode 100644 index 000000000..4fbc86a06 Binary files /dev/null and b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_2.jpg differ diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_3.jpg b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_3.jpg new file mode 100644 index 000000000..29e336c7c Binary files /dev/null and b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_3.jpg differ diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_4.jpg b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_4.jpg new file mode 100644 index 000000000..49773f93a Binary files /dev/null and b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_4.jpg differ diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_5.jpg b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_5.jpg new file mode 100644 index 000000000..168772f87 Binary files /dev/null and b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_5.jpg differ diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_6.jpg b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_6.jpg new file mode 100644 index 000000000..b53650d3f Binary files /dev/null and b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_6.jpg differ diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_7.jpg b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_7.jpg new file mode 100644 index 000000000..0eba1bd22 Binary files /dev/null and b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_7.jpg differ diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_8.jpg b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_8.jpg new file mode 100644 index 000000000..952318928 Binary files /dev/null and b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_8.jpg differ diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_9.jpg b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_9.jpg new file mode 100644 index 000000000..9fcfdc1d5 Binary files /dev/null and b/00.Getting Started/08.hyperdrive-with-TensorFlow/mnist_test_images/img_9.jpg differ diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/tf_mnist_score.py b/00.Getting Started/08.hyperdrive-with-TensorFlow/tf_mnist_score.py new file mode 100644 index 000000000..ba80bd364 --- /dev/null +++ b/00.Getting Started/08.hyperdrive-with-TensorFlow/tf_mnist_score.py @@ -0,0 +1,103 @@ +from __future__ import print_function +import tensorflow as tf +import numpy as np +import os +import json +import base64 +from io import BytesIO +from PIL import Image + +############################################## +# helper functions +############################################## + + +def build_model(x, y_, keep_prob): + def weight_variable(shape): + initial = tf.truncated_normal(shape, stddev=0.1) + return tf.Variable(initial) + + def bias_variable(shape): + initial = tf.constant(0.1, shape=shape) + return tf.Variable(initial) + + def conv2d(x, W): + return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') + + def max_pool_2x2(x): + return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') + + W_conv1 = weight_variable([5, 5, 1, 32]) + b_conv1 = bias_variable([32]) + + x_image = tf.reshape(x, [-1, 28, 28, 1]) + + h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) + h_pool1 = max_pool_2x2(h_conv1) + + W_conv2 = weight_variable([5, 5, 32, 64]) + b_conv2 = bias_variable([64]) + + h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) + h_pool2 = max_pool_2x2(h_conv2) + + W_fc1 = weight_variable([7 * 7 * 64, 1024]) + b_fc1 = bias_variable([1024]) + + h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) + h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) + + h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) + + W_fc2 = weight_variable([1024, 10]) + b_fc2 = bias_variable([10]) + + y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 + + return y_conv + + +def base64ToImg(base64ImgString): + if base64ImgString.startswith('b\''): + base64ImgString = base64ImgString[2:-1] + base64Img = base64ImgString.encode('utf-8') + decoded_img = base64.b64decode(base64Img) + img_buffer = BytesIO(decoded_img) + img = Image.open(img_buffer) + return img + +############################################## +# API init() and run() methods +############################################## + + +def init(): + global x, keep_prob, y_conv, sess + g = tf.Graph() + with g.as_default(): + x = tf.placeholder(tf.float32, shape=[None, 784]) + y_ = tf.placeholder(tf.float32, shape=[None, 10]) + keep_prob = tf.placeholder(tf.float32) + y_conv = build_model(x, y_, keep_prob) + + saver = tf.train.Saver() + init_op = tf.global_variables_initializer() + + model_dir = os.path.join('sample_projects', 'outputs') + saved_model_path = os.path.join(model_dir, 'model.ckpt') + + sess = tf.Session(graph=g) + sess.run(init_op) + saver.restore(sess, saved_model_path) + + +def run(input_data): + img = base64ToImg(json.loads(input_data)['data']) + img_data = np.array(img, dtype=np.float32).flatten() + img_data.resize((1, 784)) + + y_pred = sess.run(y_conv, feed_dict={x: img_data, keep_prob: 1.0}) + predicted_label = np.argmax(y_pred[0]) + + outJsonString = json.dumps({"label": str(predicted_label)}) + return str(outJsonString) diff --git a/00.Getting Started/08.hyperdrive-with-TensorFlow/tf_mnist_train.py b/00.Getting Started/08.hyperdrive-with-TensorFlow/tf_mnist_train.py new file mode 100644 index 000000000..f8a6c8919 --- /dev/null +++ b/00.Getting Started/08.hyperdrive-with-TensorFlow/tf_mnist_train.py @@ -0,0 +1,151 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +# Load MNIST Data +from tensorflow.examples.tutorials.mnist import input_data +import os +import argparse + +from azureml.core.run import Run + +# the following 10 lines can be removed once BUG# 241943 is fixed + + +def get_logger(): + try: + return Run.get_submitted_run() + except Exception: + return LocalLogger() + + +class LocalLogger: + def log(self, key, value): + print("AML-Log:", key, value) + + +def build_model(x, y_, keep_prob): + def weight_variable(shape): + initial = tf.truncated_normal(shape, stddev=0.1) + return tf.Variable(initial) + + def bias_variable(shape): + initial = tf.constant(0.1, shape=shape) + return tf.Variable(initial) + + def conv2d(x, W): + return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') + + def max_pool_2x2(x): + return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') + + W_conv1 = weight_variable([5, 5, 1, 32]) + b_conv1 = bias_variable([32]) + + x_image = tf.reshape(x, [-1, 28, 28, 1]) + + h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) + h_pool1 = max_pool_2x2(h_conv1) + + W_conv2 = weight_variable([5, 5, 32, 64]) + b_conv2 = bias_variable([64]) + + h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) + h_pool2 = max_pool_2x2(h_conv2) + + W_fc1 = weight_variable([7 * 7 * 64, 1024]) + b_fc1 = bias_variable([1024]) + + h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) + h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) + + h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) + + W_fc2 = weight_variable([1024, 10]) + b_fc2 = bias_variable([10]) + + y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 + + return y_conv + + +def main(): + # Get command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument('--learning_rate', type=float, + default=0.0001, help='learning rate') + parser.add_argument('--minibatch_size', type=int, + default=50, help='minibatch size') + parser.add_argument('--keep_probability', type=float, + default=0.5, help='keep probability for dropout layer') + parser.add_argument('--num_iterations', type=int, + default=1000, help='number of iterations') + parser.add_argument('--output_dir', type=str, default='./outputs', + help='output directory to write checkpoints to') + + args = parser.parse_args() + + # log parameters + run_logger = get_logger() + run_logger.log("learning_rate", args.learning_rate) + run_logger.log("minibatch_size", args.minibatch_size) + run_logger.log("keep_probability", args.keep_probability) + run_logger.log("num_iterations", args.num_iterations) + + # Load MNIST data + mnist = input_data.read_data_sets('MNIST_data', one_hot=True) + + sess = tf.InteractiveSession() + + x = tf.placeholder(tf.float32, shape=[None, 784]) + y_ = tf.placeholder(tf.float32, shape=[None, 10]) + keep_prob = tf.placeholder(tf.float32) + + y_conv = build_model(x, y_, keep_prob) + + cross_entropy = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) + + train_step = tf.train.AdamOptimizer( + args.learning_rate).minimize(cross_entropy) + correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) + + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) + sess.run(tf.global_variables_initializer()) + + for i in range(args.num_iterations): + batch = mnist.train.next_batch(args.minibatch_size) + if i % 100 == 0: + test_acc = accuracy.eval( + feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}) + train_accuracy = accuracy.eval( + feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0}) + print("step %d, training accuracy %g, test accuracy, %g" % + (i, train_accuracy, test_acc)) + + # log test accuracy to AML + run_logger.log("Accuracy", float(test_acc)) + run_logger.log("Iterations", i) + + sess.run(train_step, feed_dict={ + x: batch[0], y_: batch[1], keep_prob: args.keep_probability}) + + # Save the trained model + model_dir = args.output_dir + model_file = 'model.ckpt' + + os.makedirs(model_dir, exist_ok=True) + + saver = tf.train.Saver() + saver.save(sess, os.path.join(model_dir, model_file)) + + final_test_acc = sess.run(accuracy, feed_dict={ + x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}) + run_logger.log("Accuracy", float(final_test_acc)) + run_logger.log("Iterations", args.num_iterations) + print("test accuracy %g" % final_test_acc) + + +if __name__ == "__main__": + main() diff --git a/00.Getting Started/10.register-model-create-image-deploy-service/10.register-model-create-image-deploy-service.ipynb b/00.Getting Started/10.register-model-create-image-deploy-service/10.register-model-create-image-deploy-service.ipynb new file mode 100644 index 000000000..c6fd8468c --- /dev/null +++ b/00.Getting Started/10.register-model-create-image-deploy-service/10.register-model-create-image-deploy-service.ipynb @@ -0,0 +1,420 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 10. Register Model, Create Image and Deploy Service\n", + "\n", + "This example shows how to deploy a web service in step-by-step fashion:\n", + "\n", + " 1. Register model\n", + " 2. Query versions of models and select one to deploy\n", + " 3. Create Docker image\n", + " 4. Query versions of images\n", + " 5. Deploy the image as web service\n", + " \n", + "**IMPORTANT**:\n", + " * This notebook requires you to first complete \"01.SDK-101-Train-and-Deploy-to-ACI.ipynb\" Notebook\n", + " \n", + "The 101 Notebook taught you how to deploy a web service directly from model in one step. This Notebook shows a more advanced approach that gives you more control over model versions and Docker image versions. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Register Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can add tags and descriptions to your models. Note you need to have a `sklearn_linreg_model.pkl` file in the current directory. This file is generated by the 01 notebook. The below call registers that file as a model with the same name `sklearn_linreg_model.pkl` in the workspace.\n", + "\n", + "Using tags, you can track useful information such as the name and version of the machine learning library used to train the model. Note that tags must be alphanumeric." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "register model from file" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.model import Model\n", + "import sklearn\n", + "\n", + "library_version = \"sklearn\"+sklearn.__version__.replace(\".\",\"x\")\n", + "\n", + "model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n", + " model_name = \"sklearn_regression_model.pkl\",\n", + " tags = {'area': \"diabetes\", 'type': \"regression\", 'version': library_version},\n", + " description = \"Ridge regression model to predict diabetes\",\n", + " workspace = ws)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can explore the registered models within your workspace and query by tag. Models are versioned. If you call the register_model command many times with same model name, you will get multiple versions of the model with increasing version numbers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "register model from file" + ] + }, + "outputs": [], + "source": [ + "regression_models = ws.models(tags=['area'])\n", + "for m in regression_models:\n", + " print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can pick a specific model to deploy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(model.name, model.description, model.version, sep = '\\t')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Docker Image" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Show `score.py`. Note that the `sklearn_regression_model.pkl` in the `get_model_path` call is referring to a model named `sklearn_linreg_model.pkl` registered under the workspace. It is NOT referenceing the local file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile score.py\n", + "import pickle\n", + "import json\n", + "import numpy\n", + "from sklearn.externals import joblib\n", + "from sklearn.linear_model import Ridge\n", + "from azureml.core.model import Model\n", + "\n", + "def init():\n", + " global model\n", + " # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n", + " # this is a different behavior than before when the code is run locally, even though the code is the same.\n", + " model_path = Model.get_model_path('sklearn_regression_model.pkl')\n", + " # deserialize the model file back into a sklearn model\n", + " model = joblib.load(model_path)\n", + "\n", + "# note you can pass in multiple rows for scoring\n", + "def run(raw_data):\n", + " try:\n", + " data = json.loads(raw_data)['data']\n", + " data = numpy.array(data)\n", + " result = model.predict(data)\n", + " except Exception as e:\n", + " result = str(e)\n", + " return json.dumps({\"result\": result.tolist()})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.conda_dependencies import CondaDependencies \n", + "\n", + "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n", + "\n", + "with open(\"myenv.yml\",\"w\") as f:\n", + " f.write(myenv.serialize_to_string())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that following command can take few minutes. \n", + "\n", + "You can add tags and descriptions to images. Also, an image can contain multiple models." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create image" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.image import Image, ContainerImage\n", + "\n", + "image_config = ContainerImage.image_configuration(runtime= \"python\",\n", + " execution_script=\"score.py\",\n", + " conda_file=\"myenv.yml\",\n", + " tags = {'area': \"diabetes\", 'type': \"regression\"},\n", + " description = \"Image with ridge regression model\")\n", + "\n", + "image = Image.create(name = \"myimage1\",\n", + " # this is the model object \n", + " models = [model],\n", + " image_config = image_config, \n", + " workspace = ws)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create image" + ] + }, + "outputs": [], + "source": [ + "image.wait_for_creation(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "List images by tag and find out the detailed build log for debugging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create image" + ] + }, + "outputs": [], + "source": [ + "for i in Image.list(workspace = ws,tags = [\"area\"]):\n", + " print('{}(v.{} [{}]) stored at {} with build log {}'.format(i.name, i.version, i.creation_state, i.image_location, i.image_build_log_uri))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deploy image as web service on Azure Container Instance\n", + "\n", + "Note that the service creation can take few minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.webservice import AciWebservice\n", + "\n", + "aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n", + " memory_gb = 1, \n", + " tags = {'area': \"diabetes\", 'type': \"regression\"}, \n", + " description = 'Predict diabetes using regression model')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.webservice import Webservice\n", + "\n", + "aci_service_name = 'my-aci-service-2'\n", + "print(aci_service_name)\n", + "aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n", + " image = image,\n", + " name = aci_service_name,\n", + " workspace = ws)\n", + "aci_service.wait_for_deployment(True)\n", + "print(aci_service.state)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test web service" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Call the web service with some dummy input data to get a prediction." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "test_sample = json.dumps({'data': [\n", + " [1,2,3,4,5,6,7,8,9,10], \n", + " [10,9,8,7,6,5,4,3,2,1]\n", + "]})\n", + "test_sample = bytes(test_sample,encoding = 'utf8')\n", + "\n", + "prediction = aci_service.run(input_data = test_sample)\n", + "print(prediction)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Delete ACI to clean up" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "deploy service", + "aci" + ] + }, + "outputs": [], + "source": [ + "aci_service.delete()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/10.register-model-create-image-deploy-service/sklearn_regression_model.pkl b/00.Getting Started/10.register-model-create-image-deploy-service/sklearn_regression_model.pkl new file mode 100644 index 000000000..d10309b6c Binary files /dev/null and b/00.Getting Started/10.register-model-create-image-deploy-service/sklearn_regression_model.pkl differ diff --git a/00.Getting Started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb b/00.Getting Started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb new file mode 100644 index 000000000..bc300341f --- /dev/null +++ b/00.Getting Started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb @@ -0,0 +1,335 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Deploying a web service to Azure Kubernetes Service (AKS)\n", + "This notebook shows the steps for deploying a service: registering a model, creating an image, provisioning a cluster (one time action), and deploying a service to it. \n", + "We then test and delete the service, image and model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "from azureml.core.compute import AksCompute, ComputeTarget\n", + "from azureml.core.webservice import Webservice, AksWebservice\n", + "from azureml.core.image import Image\n", + "from azureml.core.model import Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import azureml.core\n", + "print(azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get workspace\n", + "Load existing workspace from the config file info." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Register the model\n", + "Register an existing trained model, add descirption and tags." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Register the model\n", + "from azureml.core.model import Model\n", + "model = Model.register(model_path = \"sklearn_regression_model.pkl\", # this points to a local file\n", + " model_name = \"sklearn_regression_model.pkl\", # this is the name the model is registered as\n", + " tags = {'area': \"diabetes\", 'type': \"regression\"},\n", + " description = \"Ridge regression model to predict diabetes\",\n", + " workspace = ws)\n", + "\n", + "print(model.name, model.description, model.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create an image\n", + "Create an image using the registered model the script that will load and run the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile score.py\n", + "import pickle\n", + "import json\n", + "import numpy\n", + "from sklearn.externals import joblib\n", + "from sklearn.linear_model import Ridge\n", + "from azureml.core.model import Model\n", + "\n", + "def init():\n", + " global model\n", + " # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n", + " # this is a different behavior than before when the code is run locally, even though the code is the same.\n", + " model_path = Model.get_model_path('sklearn_regression_model.pkl')\n", + " # deserialize the model file back into a sklearn model\n", + " model = joblib.load(model_path)\n", + "\n", + "# note you can pass in multiple rows for scoring\n", + "def run(raw_data):\n", + " try:\n", + " data = json.loads(raw_data)['data']\n", + " data = numpy.array(data)\n", + " result = model.predict(data)\n", + " except Exception as e:\n", + " result = str(e)\n", + " return json.dumps({\"result\": result.tolist()})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.conda_dependencies import CondaDependencies \n", + "\n", + "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n", + "\n", + "with open(\"myenv.yml\",\"w\") as f:\n", + " f.write(myenv.serialize_to_string())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.image import ContainerImage\n", + "\n", + "image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n", + " runtime = \"python\",\n", + " conda_file = \"myenv.yml\",\n", + " description = \"Image with ridge regression model\",\n", + " tags = {'area': \"diabetes\", 'type': \"regression\"}\n", + " )\n", + "\n", + "image = ContainerImage.create(name = \"myimage1\",\n", + " # this is the model object\n", + " models = [model],\n", + " image_config = image_config,\n", + " workspace = ws)\n", + "\n", + "image.wait_for_creation(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Provision the AKS Cluster\n", + "This is a one time setup. You can reuse this cluster for multiple deployments after it has been created. If you delete the cluster or the resource group that contains it, then you would have to recreate it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use the default configuration (can also provide parameters to customize)\n", + "prov_config = AksCompute.provisioning_configuration()\n", + "\n", + "aks_name = 'my-aks-9' \n", + "# Create the cluster\n", + "aks_target = ComputeTarget.create(workspace = ws, \n", + " name = aks_name, \n", + " provisioning_configuration = prov_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "aks_target.wait_for_completion(show_output = True)\n", + "print(aks_target.provisioning_state)\n", + "print(aks_target.provisioning_errors)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Optional step: Attach existing AKS cluster\n", + "\n", + "If you have existing AKS cluster in your Azure subscription, you can attach it to the Workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + "# Use the default configuration (can also provide parameters to customize)\n", + "resource_id = '/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/raymondsdk0604/providers/Microsoft.ContainerService/managedClusters/my-aks-0605d37425356b7d01'\n", + "\n", + "create_name='my-existing-aks' \n", + "# Create the cluster\n", + "aks_target = AksCompute.attach(workspace=ws, name=create_name, resource_id=resource_id)\n", + "# Wait for the operation to complete\n", + "aks_target.wait_for_completion(True)\n", + "'''" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Deploy web service to AKS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Set the web service configuration (using default here)\n", + "aks_config = AksWebservice.deploy_configuration()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "aks_service_name ='aks-service-1'\n", + "\n", + "aks_service = Webservice.deploy_from_image(workspace = ws, \n", + " name = aks_service_name,\n", + " image = image,\n", + " deployment_config = aks_config,\n", + " deployment_target = aks_target)\n", + "aks_service.wait_for_deployment(show_output = True)\n", + "print(aks_service.state)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test the web service\n", + "We test the web sevice by passing data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import json\n", + "\n", + "test_sample = json.dumps({'data': [\n", + " [1,2,3,4,5,6,7,8,9,10], \n", + " [10,9,8,7,6,5,4,3,2,1]\n", + "]})\n", + "test_sample = bytes(test_sample,encoding = 'utf8')\n", + "\n", + "prediction = aks_service.run(input_data = test_sample)\n", + "print(prediction)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Clean up\n", + "Delete the service, image and model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "aks_service.delete()\n", + "image.delete()\n", + "model.delete()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/11.production-deploy-to-aks/sklearn_regression_model.pkl b/00.Getting Started/11.production-deploy-to-aks/sklearn_regression_model.pkl new file mode 100644 index 000000000..d10309b6c Binary files /dev/null and b/00.Getting Started/11.production-deploy-to-aks/sklearn_regression_model.pkl differ diff --git a/00.Getting Started/12.enable-data-collection-for-models-in-aks/12.enable-data-collection-for-models-in-aks.ipynb b/00.Getting Started/12.enable-data-collection-for-models-in-aks/12.enable-data-collection-for-models-in-aks.ipynb new file mode 100644 index 000000000..507a6a042 --- /dev/null +++ b/00.Getting Started/12.enable-data-collection-for-models-in-aks/12.enable-data-collection-for-models-in-aks.ipynb @@ -0,0 +1,438 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Enabling Data Collection for Models in Production\n", + "With this notebook, you can learn how to collect input model data from your Azure Machine Learning service in an Azure Blob storage. Once enabled, this data collected gives you the opportunity:\n", + "\n", + "* Monitor data drifts as production data enters your model\n", + "* Make better decisions on when to retrain or optimize your model\n", + "* Retrain your model with the data collected\n", + "\n", + "## What data is collected?\n", + "* Model input data (voice, images, and video are not supported) from services deployed in Azure Kubernetes Cluster (AKS)\n", + "* Model predictions using production input data.\n", + "\n", + "**Note:** pre-aggregation or pre-calculations on this data are done by user and not included in this version of the product.\n", + "\n", + "## What is different compared to standard production deployment process?\n", + "1. Update scoring file.\n", + "2. Update yml file with new dependency.\n", + "3. Update aks configuration.\n", + "4. Build new image and deploy it. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Import your dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace, Run\n", + "from azureml.core.compute import AksCompute, ComputeTarget\n", + "from azureml.core.webservice import Webservice, AksWebservice\n", + "from azureml.core.image import Image\n", + "from azureml.core.model import Model\n", + "\n", + "import azureml.core\n", + "print(azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Set up your configuration and create a workspace\n", + "Follow Notebook 00 instructions to do this.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Register Model\n", + "Register an existing trained model, add descirption and tags." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Register the model\n", + "from azureml.core.model import Model\n", + "model = Model.register(model_path = 'sklearn_regression_model.pkl', # this points to a local file\n", + " model_name = \"best_model\", # this is the name the model is registered as\n", + " tags = {'area': \"diabetes\", 'type': \"regression\"},\n", + " description = \"Ridge regression model to predict diabetes\",\n", + " workspace = ws)\n", + "\n", + "print(model.name, model.description, model.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Update your scoring file with Data Collection\n", + "The file below, compared to the file used in notebook 11, has the following changes:\n", + "### a. Import the module\n", + " from azureml.monitoring import ModelDataCollector \n", + "### b. In your init function add:\n", + " global inputs_dc, prediction_d\n", + " inputs_dc = ModelDataCollector(\"best_model\", identifier=\"inputs\", feature_names=[\"feat1\", \"feat2\", \"feat3\". \"feat4\", \"feat5\", \"Feat6\"])\n", + " prediction_dc = ModelDataCollector(\"best_model\", identifier=\"predictions\", feature_names=[\"prediction1\", \"prediction2\"])\n", + " \n", + "* Identifier: Identifier is later used for building the folder structure in your Blob, it can be used to divide “raw” data versus “processed”.\n", + "* CorrelationId: is an optional parameter, you do not need to set it up if your model doesn’t require it. Having a correlationId in place does help you for easier mapping with other data. (Examples include: LoanNumber, CustomerId, etc.)\n", + "* Feature Names: These need to be set up in the order of your features in order for them to have column names when the .csv is created.\n", + "\n", + "### c. In your run function add:\n", + " inputs_dc.collect(data)\n", + " prediction_dc.collect(result) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile score.py\n", + "import pickle\n", + "import json\n", + "import numpy as np\n", + "from sklearn.externals import joblib\n", + "from sklearn.linear_model import Ridge\n", + "from azureml.core.model import Model\n", + "from azureml.monitoring import ModelDataCollector\n", + "import time\n", + "\n", + "def init():\n", + " global model\n", + " #print (\"model initialized\" + time.strftime(\"%H:%M:%S\"))\n", + " # note here \"best_model\" is the name of the model registered under the workspace\n", + " # this call should return the path to the model.pkl file on the local disk.\n", + " model_path = Model.get_model_path(model_name = 'best_model')\n", + " # deserialize the model file back into a sklearn model\n", + " model = joblib.load(model_path)\n", + " global inputs_dc, prediction_dc\n", + " # this setup will help us save our inputs under the \"inputs\" path in our Azure Blob\n", + " inputs_dc = ModelDataCollector(model_name=\"best_model\", identifier=\"inputs\", feature_names=[\"feat1\", \"feat2\", \"feat3\",\"feat4\", \"feat5\",\"feat6\"]) \n", + " # this setup will help us save our ipredictions under the \"predictions\" path in our Azure Blob\n", + " prediction_dc = ModelDataCollector(\"best_model\", identifier=\"predictions\", feature_names=[\"prediction1\", \"prediction2\"]) \n", + " \n", + "# note you can pass in multiple rows for scoring\n", + "def run(raw_data):\n", + " global inputs_dc, prediction_dc\n", + " try:\n", + " data = json.loads(raw_data)['data']\n", + " data = np.array(data)\n", + " result = model.predict(data)\n", + " inputs_dc.collect(data) #this call is saving our input data into our blob\n", + " prediction_dc.collect(result)#this call is saving our prediction data into our blob\n", + " return json.dumps({\"result\": result.tolist()})\n", + " except Exception as e:\n", + " result = str(e)\n", + " #print (result + time.strftime(\"%H:%M:%S\"))\n", + " return json.dumps({\"error\": result})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Update your myenv.yml file with the required module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile myenv.yml\n", + "name: myenv\n", + "channels:\n", + " - defaults\n", + "dependencies:\n", + " - pip:\n", + " - numpy\n", + " - scikit-learn\n", + " # Required packages for AzureML execution, history, and data preparation.\n", + " - --extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1\n", + " - azureml-core\n", + " - azureml-monitoring" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Create your new Image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.image import ContainerImage\n", + "\n", + "image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n", + " runtime = \"python\",\n", + " conda_file = \"myenv.yml\",\n", + " description = \"Image with ridge regression model\",\n", + " tags = {'area': \"diabetes\", 'type': \"regression\"}\n", + " )\n", + "\n", + "image = ContainerImage.create(name = \"myimage1\",\n", + " # this is the model object\n", + " models = [model],\n", + " image_config = image_config,\n", + " workspace = ws)\n", + "\n", + "image.wait_for_creation(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(model.name, model.description, model.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Deploy to AKS service\n", + "For this step you would need to have an AKS cluster setup (Notebook 11).\n", + "In this case we are attaching to a previously created service" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "resource_id = '/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/marthateresource_groupjw/providers/Microsoft.ContainerService/managedClusters/my-aks-colfb348092fd3a760'\n", + "create_name= 'my-existing-aks'\n", + "aks_target = AksCompute.attach(workspace = ws, \n", + " name = create_name, \n", + " resource_id=resource_id)\n", + "# Wait for the operation to complete\n", + "aks_target.wait_for_completion(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### a. Activate Data Collection and App Insights\n", + "In order to enable Data Collection and App Insights in your service you will need to update your AKS configuration file:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Set the web service configuration (using default here)\n", + "aks_config = AksWebservice.deploy_configuration(collect_model_data=True, enable_app_insights=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### b. Deploy your service" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "aks_service_name ='aks-w-collv5'\n", + "\n", + "aks_service = Webservice.deploy_from_image(workspace = ws, \n", + " name = aks_service_name,\n", + " image = image,\n", + " deployment_config = aks_config,\n", + " deployment_target = aks_target\n", + " )\n", + "aks_service.wait_for_deployment(show_output = True)\n", + "print(aks_service.state)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Test your service and send some data\n", + "**Note**: It will take around 15 mins for your data to appear in your blob.\n", + "The data will appear in your Azure Blob following this format:\n", + "\n", + "/modeldata/subscriptionid/resourcegroupname/workspacename/webservicename/modelname/modelversion/identifier/year/month/day/data.csv " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "import json\n", + "\n", + "test_sample = json.dumps({'data': [\n", + " [1,2,3,4,54,6,7,8,88,10], \n", + " [10,9,8,37,36,45,4,33,2,1]\n", + "]})\n", + "test_sample = bytes(test_sample,encoding = 'utf8')\n", + "\n", + "prediction = aks_service.run(input_data = test_sample)\n", + "print(prediction)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_sample = json.dumps({'data': [\n", + " [1,22,3,4,5,68,7,98,95,310], \n", + " [10,92,8,7,6,53,84,23,323,1]\n", + "]})\n", + "test_sample = bytes(test_sample,encoding = 'utf8')\n", + "\n", + "prediction = aks_service.run(input_data = test_sample)\n", + "print(prediction)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9. Validate you data and analyze it\n", + "You can look into your data following this path format in your Azure Blob:\n", + "\n", + "/modeldata/**subscriptionid>**/**resourcegroupname>**/**workspacename>**/**webservicename>**/**modelname>**/**modelversion>>**/**identifier>**/*year/month/day*/data.csv \n", + "\n", + "For doing further analysis you have multiple options:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### a. Create DataBricks cluster and connect it to your blob\n", + "https://docs.microsoft.com/en-us/azure/azure-databricks/quickstart-create-databricks-workspace-portal or in your databricks workspace you can look for the template \"Azure Blob Storage Import Example Notebook\".\n", + "\n", + "\n", + "Here is an example for setting up the file location to extract the relevant data:\n", + "\n", + " file_location = \"wasbs://mycontainer@testmartstoragendbblgwy.blob.core.windows.net/unknown/unknown/unknown-bigdataset-unknown/my_iterate_parking_inputs/2018/°/°/data.csv\" \n", + "file_type = \"csv\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### b. Connect Blob to Power Bi (Small Data only)\n", + "1. Download and Open PowerBi Desktop\n", + "2. Select “Get Data” and click on “Azure Blob Storage” >> Connect\n", + "3. Add your storage account and enter your storage key.\n", + "4. Select the container where your Data Collection is stored and click on Edit. \n", + "5. In the query editor, click under “Name” column and add your Storage account Model path into the filter. Note: if you want to only look into files from a specific year or month, just expand the filter path. For example, just look into March data: /modeldata/subscriptionid>/resourcegroupname>/workspacename>/webservicename>/modelname>/modelversion>/identifier>/year>/3\n", + "6. Click on the double arrow aside the “Content” column to combine the files. \n", + "7. Click OK and the data will preload.\n", + "8. You can now click Close and Apply and start building your custom reports on your Model Input data." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Disable Data Collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "aks_service.update(collect_model_data=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/00.Getting Started/12.enable-data-collection-for-models-in-aks/sklearn_regression_model.pkl b/00.Getting Started/12.enable-data-collection-for-models-in-aks/sklearn_regression_model.pkl new file mode 100644 index 000000000..d10309b6c Binary files /dev/null and b/00.Getting Started/12.enable-data-collection-for-models-in-aks/sklearn_regression_model.pkl differ diff --git a/pipeline/06.pipeline-batch-scoring.ipynb b/pipeline/06.pipeline-batch-scoring.ipynb new file mode 100644 index 000000000..dae220339 --- /dev/null +++ b/pipeline/06.pipeline-batch-scoring.ipynb @@ -0,0 +1,529 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from azureml.core import Workspace, Run, Experiment\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')\n", + "\n", + "# Also create a Project and attach to Workspace\n", + "project_folder = \"sample_projects\"\n", + "run_history_name = project_folder\n", + "\n", + "if not os.path.isdir(project_folder):\n", + " os.mkdir(project_folder)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute, ComputeTarget\n", + "from azureml.core.datastore import Datastore\n", + "from azureml.data.data_reference import DataReference\n", + "from azureml.pipeline.core import Pipeline, PipelineData\n", + "from azureml.pipeline.steps import PythonScriptStep\n", + "from azureml.core.runconfig import CondaDependencies, RunConfiguration" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create and attach Compute targets\n", + "Use the below code to create and attach Compute targets. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Batch AI compute\n", + "cluster_name = \"gpu_cluster\"\n", + "try:\n", + " cluster = BatchAiCompute(ws, cluster_name)\n", + " print(\"found existing cluster.\")\n", + "except:\n", + " print(\"creating new cluster\")\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\",\n", + " autoscale_enabled = True,\n", + " cluster_min_nodes = 0, \n", + " cluster_max_nodes = 1)\n", + "\n", + " # create the cluster\n", + " cluster = ComputeTarget.create(ws, cluster_name, provisioning_config)\n", + " cluster.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python scripts to run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Python scripts that run the batch scoring. `batchai_score.py` takes input images in `dataset_path`, pretrained models in `model_dir` and outputs a `results-label.txt` to `output_dir`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $project_folder/batchai_score.py\n", + "import os\n", + "import argparse\n", + "import datetime,time\n", + "import tensorflow as tf\n", + "from math import ceil\n", + "import numpy as np\n", + "import shutil\n", + "from tensorflow.contrib.slim.python.slim.nets import inception_v3\n", + "\n", + "slim = tf.contrib.slim\n", + "\n", + "parser = argparse.ArgumentParser(description=\"Start a tensorflow model serving\")\n", + "parser.add_argument('--model_dir', dest=\"model_dir\", required=True)\n", + "parser.add_argument('--dataset_path', dest=\"dataset_path\", required=True)\n", + "parser.add_argument('--output_dir', dest=\"output_dir\", required=True)\n", + "parser.add_argument('--batch_size', dest=\"batch_size\", type=int, required=True)\n", + "\n", + "args = parser.parse_args()\n", + "\n", + "image_size = 299\n", + "num_channel = 3\n", + "\n", + "# create output directory if it does not exist\n", + "os.makedirs(args.output_dir, exist_ok=True)\n", + "\n", + "def get_class_label_dict(label_file):\n", + " label = []\n", + " proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()\n", + " for l in proto_as_ascii_lines:\n", + " label.append(l.rstrip())\n", + " return label\n", + "\n", + "\n", + "class DataIterator:\n", + " def __init__(self, data_dir):\n", + " self.file_paths = []\n", + " image_list = os.listdir(data_dir)\n", + " total_size = len(image_list)\n", + " self.file_paths = [data_dir + '/' + file_name.rstrip() for file_name in image_list ]\n", + "\n", + " self.labels = [1 for file_name in self.file_paths]\n", + "\n", + " @property\n", + " def size(self):\n", + " return len(self.labels)\n", + "\n", + " def input_pipeline(self, batch_size):\n", + " images_tensor = tf.convert_to_tensor(self.file_paths, dtype=tf.string)\n", + " labels_tensor = tf.convert_to_tensor(self.labels, dtype=tf.int64)\n", + " input_queue = tf.train.slice_input_producer([images_tensor, labels_tensor], shuffle=False)\n", + " labels = input_queue[1]\n", + " images_content = tf.read_file(input_queue[0])\n", + "\n", + " image_reader = tf.image.decode_jpeg(images_content, channels=num_channel, name=\"jpeg_reader\")\n", + " float_caster = tf.cast(image_reader, tf.float32)\n", + " new_size = tf.constant([image_size, image_size], dtype=tf.int32)\n", + " images = tf.image.resize_images(float_caster, new_size)\n", + " images = tf.divide(tf.subtract(images, [0]), [255])\n", + "\n", + " image_batch, label_batch = tf.train.batch([images, labels], batch_size=batch_size, capacity=5 * batch_size)\n", + " return image_batch\n", + "\n", + "def main(_):\n", + " start_time = datetime.datetime.now()\n", + " label_file_name = os.path.join(args.model_dir, \"labels.txt\")\n", + " label_dict = get_class_label_dict(label_file_name)\n", + " classes_num = len(label_dict)\n", + " test_feeder = DataIterator(data_dir=args.dataset_path)\n", + " total_size = len(test_feeder.labels)\n", + " count = 0\n", + " with tf.Session() as sess:\n", + " test_images = test_feeder.input_pipeline(batch_size=args.batch_size)\n", + " with slim.arg_scope(inception_v3.inception_v3_arg_scope()):\n", + " input_images = tf.placeholder(tf.float32, [args.batch_size, image_size, image_size, num_channel])\n", + " logits, _ = inception_v3.inception_v3(input_images,\n", + " num_classes=classes_num,\n", + " is_training=False)\n", + " probabilities = tf.argmax(logits, 1)\n", + "\n", + " sess.run(tf.global_variables_initializer())\n", + " sess.run(tf.local_variables_initializer())\n", + " coord = tf.train.Coordinator()\n", + " threads = tf.train.start_queue_runners(sess=sess, coord=coord)\n", + " saver = tf.train.Saver()\n", + " model_path = os.path.join(args.model_dir, \"inception_v3.ckpt\")\n", + " saver.restore(sess, model_path)\n", + " out_filename = os.path.join(args.output_dir, \"result-labels.txt\")\n", + " with open(out_filename, \"w\") as result_file:\n", + " i = 0\n", + " while count < total_size and not coord.should_stop():\n", + " test_images_batch = sess.run(test_images)\n", + " file_names_batch = test_feeder.file_paths[i*args.batch_size: min(test_feeder.size, (i+1)*args.batch_size)]\n", + " results = sess.run(probabilities, feed_dict={input_images: test_images_batch})\n", + " new_add = min(args.batch_size, total_size-count)\n", + " count += new_add\n", + " i += 1\n", + " for j in range(new_add):\n", + " result_file.write(os.path.basename(file_names_batch[j]) + \": \" + label_dict[results[j]] + \"\\n\")\n", + " result_file.flush()\n", + " coord.request_stop()\n", + " coord.join(threads)\n", + " \n", + " # copy the file to artifacts\n", + " shutil.copy(out_filename, \"./outputs/\")\n", + " # Move the processed data out of the blob so that the next run can process the data.\n", + "\n", + "if __name__ == \"__main__\":\n", + " tf.app.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "account_name = \"pipelinedata\"\n", + "sample_data = Datastore.register_azure_blob_container(ws, \"sampledata\", \"sampledata\", \n", + " account_name=account_name, \n", + " overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Output datastore" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We write the outputs to the default datastore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "default_ds = \"workspaceblobstore\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Specify where the data is stored or will be written to" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.conda_dependencies import CondaDependencies\n", + "from azureml.data.data_reference import DataReference\n", + "from azureml.pipeline.core import Pipeline, PipelineData\n", + "from azureml.core import Datastore\n", + "from azureml.core import Experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "input_images = DataReference(datastore=sample_data, \n", + " data_reference_name=\"input_images\",\n", + " path_on_datastore=\"batchscoring/images\",\n", + " mode=\"download\"\n", + " )\n", + "model_dir = DataReference(datastore=sample_data, \n", + " data_reference_name=\"input_model\",\n", + " path_on_datastore=\"batchscoring/models\",\n", + " mode=\"download\" \n", + " )\n", + "output_dir = PipelineData(name=\"scores\", \n", + " datastore_name=default_ds, \n", + " output_path_on_compute=\"batchscoring/results\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Specify environment to run the script" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd = CondaDependencies.create(pip_packages=[\"tensorflow-gpu==1.4.0\", \"azureml-defaults\"])\n", + "\n", + "# Runconfig\n", + "batchai_run_config = RunConfiguration(conda_dependencies=cd)\n", + "batchai_run_config.environment.docker.enabled = True\n", + "batchai_run_config.environment.docker.gpu_support = True\n", + "batchai_run_config.environment.docker.base_image = \"microsoft/mmlspark:gpu-0.12\"\n", + "batchai_run_config.environment.spark.precache_packages = False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Steps to run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "step = PythonScriptStep(\n", + " name=\"batch ai scoring\",\n", + " script_name=\"batchai_score.py\",\n", + " arguments=[\"--dataset_path\", input_images, \"--model_dir\", model_dir, \"--output_dir\", output_dir, \"--batch_size\", 20],\n", + " target=cluster,\n", + " inputs=[input_images, model_dir],\n", + " outputs=[output_dir],\n", + " runconfig=batchai_run_config,\n", + " source_directory=project_folder,\n", + " allow_reuse=False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline = Pipeline(workspace=ws, steps=[step])\n", + "pipeline_run = Experiment(ws, 'batch_scoring').submit(pipeline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Monitor run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(pipeline_run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "node_run = list(pipeline_run.get_children())[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "node_run.download_file(\"./outputs/result-labels.txt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Display few results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "df = pd.read_csv(\"result-labels.txt\", delimiter=\":\", header=None)\n", + "df.columns = [\"Filename\", \"Prediction\"]\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create template and rerun the pipeline using a REST call" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create template" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "template = pipeline_run.create_template(name=\"batch score\", description=\"scores images kept in container sampledata\",\n", + " version=\"1.0\")\n", + "template_id = template.template_id" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rerun using REST call" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get AAD token" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.authentication import AzureCliAuthentication\n", + "import requests\n", + "\n", + "cli_auth = AzureCliAuthentication()\n", + "aad_token = cli_auth.get_authentication_header()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hit the REST endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core import Template\n", + "\n", + "rest_endpoint = Template.get_template_endpoint(template_id, ws)\n", + "response = requests.post(rest_endpoint, headers=aad_token, json={})\n", + "run_id = response.json()[\"Id\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Monitor the template run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.pipeline.core.run import PipelineRun\n", + "template_run = PipelineRun(ws.experiments()[\"batch_scoring\"], run_id)\n", + "\n", + "RunDetails(template_run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/training/40.tensorboard/40.tensorboard.ipynb b/training/40.tensorboard/40.tensorboard.ipynb new file mode 100644 index 000000000..97b64db9f --- /dev/null +++ b/training/40.tensorboard/40.tensorboard.ipynb @@ -0,0 +1,502 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 40. Tensorboard Integration with Run History\n", + "\n", + "1. Run a Tensorflow job locally and view its TB output live.\n", + "2. The same, for a DSVM.\n", + "3. And once more, with Batch AI.\n", + "4. Finally, we'll collect all of these historical runs together into a single Tensorboard graph." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set experiment name and create project\n", + "Choose a name for your run history container in the workspace, and create a folder for the project." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from os import path, makedirs\n", + "experiment_name = 'tensorboard-demo'\n", + "\n", + "# experiment folder\n", + "exp_dir = './sample_projects/' + experiment_name\n", + "\n", + "if not path.exists(exp_dir):\n", + " makedirs(exp_dir)\n", + "\n", + "# runs we started in this session, for the finale\n", + "runs = []" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download Tensorflow Tensorboard demo code\n", + "\n", + "Tensorflow's repository has an MNIST demo with extensive Tensorboard instrumentation. We'll use it here for our purposes.\n", + "\n", + "Note that we don't need to make any code changes at all - the code works without modification from the Tensorflow repository." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import os\n", + "import tempfile\n", + "tf_code = requests.get(\"https://raw.githubusercontent.com/tensorflow/tensorflow/r1.8/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py\")\n", + "with open(os.path.join(exp_dir, \"mnist_with_summaries.py\"), \"w\") as file:\n", + " file.write(tf_code.text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure and run locally\n", + "\n", + "We'll start by running this locally. While it might not initially seem that useful to use this for a local run - why not just run TB against the files generated locally? - even in this case there is some value to using this feature. Your local run will be registered in the run history, and your Tensorboard logs will be uploaded to the artifact store associated with this run. Later, you'll be able to restore the logs from any run, regardless of where it happened.\n", + "\n", + "Note that for this run, you will need to install Tensorflow on your local machine by yourself. Further, the Tensorboard module (that is, the one included with Tensorflow) must be accessible to this notebook's kernel, as the local machine is what runs Tensorboard." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "\n", + "# Create a run configuration.\n", + "run_config = RunConfiguration()\n", + "run_config.environment.python.user_managed_dependencies = True\n", + "\n", + "# You can choose a specific Python environment by pointing to a Python path \n", + "#run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment, Run\n", + "from azureml.core.script_run_config import ScriptRunConfig\n", + "import tensorflow as tf\n", + "\n", + "logs_dir = os.curdir + os.sep + \"logs\"\n", + "tensorflow_logs_dir = os.path.join(logs_dir, \"tensorflow\")\n", + "\n", + "if not path.exists(tensorflow_logs_dir):\n", + " makedirs(tensorflow_logs_dir)\n", + "\n", + "os.environ[\"TEST_TMPDIR\"] = logs_dir\n", + "\n", + "# Writing logs to ./logs results in their being uploaded to Artifact Service,\n", + "# and thus, made accessible to our Tensorboard instance.\n", + "arguments_list = [\"--log_dir\", logs_dir]\n", + "\n", + "# Create an experiment\n", + "exp = Experiment(ws, experiment_name)\n", + "\n", + "script = ScriptRunConfig(exp_dir,\n", + " script=\"mnist_with_summaries.py\",\n", + " run_config=run_config)\n", + "\n", + "# If you would like the run to go for longer, add --max_steps 5000 to the arguments list:\n", + "# arguments_list += [\"--max_steps\", \"5000\"]\n", + "kwargs = {}\n", + "kwargs['arguments_list'] = arguments_list\n", + "run = exp.submit(script, kwargs)\n", + "# You can also wait for the run to complete\n", + "# run.wait_for_completion(show_output=True)\n", + "runs.append(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start Tensorboard\n", + "\n", + "Now, while the run is in progress, we just need to start Tensorboard with the run as its target, and it will begin streaming logs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.contrib.tensorboard import Tensorboard\n", + "\n", + "# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here\n", + "tb = Tensorboard([run])\n", + "\n", + "# If successful, start() returns a string with the URI of the instance.\n", + "tb.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stop Tensorboard\n", + "\n", + "When you're done, make sure to call the `stop()` method of the Tensorboard object, or it will stay running even after your job completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tb.stop()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Now, with a DSVM\n", + "\n", + "Tensorboard uploading works with all compute targets. Here we demonstrate it from a DSVM.\n", + "Note that the Tensorboard instance itself will be run by the notebook kernel. Again, this means this notebook's kernel must have access to the Tensorboard module.\n", + "\n", + "If you are unfamiliar with DSVM configuration, check [04. Train in a remote VM (Ubuntu DSVM)](04.train-on-remote-vm.ipynb) for a more detailed breakdown." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import DsvmCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "compute_target_name = 'cpu-dsvm'\n", + "\n", + "try:\n", + " compute_target = DsvmCompute(workspace = ws, name = compute_target_name)\n", + " print('found existing:', compute_target.name)\n", + "except ComputeTargetException:\n", + " print('creating new.')\n", + " dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n", + " compute_target = DsvmCompute.create(ws, name = compute_target_name, provisioning_configuration = dsvm_config)\n", + " compute_target.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit run using TensorFlow estimator\n", + "\n", + "Instead of manually configuring the DSVM environment, we can use the TensorFlow estimator and everything is set up automatically." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import TensorFlow\n", + "\n", + "script_params = {\"--log_dir\": \"./logs\"}\n", + "\n", + "# If you want the run to go longer, set --max-steps to a higher number.\n", + "# script_params[\"--max_steps\"] = \"5000\"\n", + "\n", + "tf_estimator = TensorFlow(source_directory=exp_dir,\n", + " compute_target=compute_target,\n", + " entry_script='mnist_with_summaries.py',\n", + " script_params=script_params)\n", + "\n", + "run = exp.submit(tf_estimator)\n", + "\n", + "runs.append(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start Tensorboard with this run\n", + "\n", + "Just like before." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here\n", + "tb = Tensorboard([run])\n", + "\n", + "# If successful, start() returns a string with the URI of the instance.\n", + "tb.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stop Tensorboard\n", + "\n", + "When you're done, make sure to call the `stop()` method of the Tensorboard object, or it will stay running even after your job completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tb.stop()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Once more, with a Batch AI cluster\n", + "\n", + "Just to prove we can, let's create a Batch AI cluster using MLC, and run our demo there, as well." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "\n", + "clust_name = ws.name + \"cpu\"\n", + "\n", + "try:\n", + " # If you already have a cluster named this, we don't need to make a new one.\n", + " compute_target = [ct for ct in ws.compute_targets() if ct.name == clust_name and ct.type == 'BatchAI'][0]\n", + "except:\n", + " # Let's make a new one here.\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(cluster_max_nodes=2, \n", + " autoscale_enabled=True, \n", + " cluster_min_nodes=1,\n", + " vm_size='Standard_D11_V2')\n", + " \n", + " compute_target = BatchAiCompute.create(ws, clust_name, provisioning_config)\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=1, timeout_in_minutes=20)\n", + "print(compute_target.name)\n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + " # print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit run using TensorFlow estimator\n", + "\n", + "Again, we can use the TensorFlow estimator and everything is set up automatically." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "script_params = {\"--log_dir\": \"./logs\"}\n", + "\n", + "# If you want the run to go longer, set --max-steps to a higher number.\n", + "# script_params[\"--max_steps\"] = \"5000\"\n", + "\n", + "tf_estimator = TensorFlow(source_directory=exp_dir,\n", + " compute_target=compute_target,\n", + " entry_script='mnist_with_summaries.py',\n", + " script_params=script_params)\n", + "\n", + "run = exp.submit(tf_estimator)\n", + "\n", + "runs.append(run)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start Tensorboard with this run\n", + "\n", + "Once more..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here\n", + "tb = Tensorboard([run])\n", + "\n", + "# If successful, start() returns a string with the URI of the instance.\n", + "tb.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stop Tensorboard\n", + "\n", + "When you're done, make sure to call the `stop()` method of the Tensorboard object, or it will stay running even after your job completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tb.stop()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finale\n", + "\n", + "If you've paid close attention, you'll have noticed that we've been saving the run objects in an array as we went along. We can start a Tensorboard instance that combines all of these run objects into a single process. This way, you can compare historical runs. You can even do this with live runs; if you made some of those previous runs longer via the `--max_steps` parameter, they might still be running, and you'll see them live in this instance as well." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The Tensorboard constructor takes an array of runs...\n", + "# and it turns out that we have been building one of those all along.\n", + "tb = Tensorboard(runs)\n", + "\n", + "# If successful, start() returns a string with the URI of the instance.\n", + "tb.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stop Tensorboard\n", + "\n", + "As you might already know, make sure to call the `stop()` method of the Tensorboard object, or it will stay running (until you kill the kernel associated with this notebook, at least)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tb.stop()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/training/41.export-run-history-to-tensorboard/41.export-run-history-to-tensorboard.ipynb b/training/41.export-run-history-to-tensorboard/41.export-run-history-to-tensorboard.ipynb new file mode 100644 index 000000000..bac37e583 --- /dev/null +++ b/training/41.export-run-history-to-tensorboard/41.export-run-history-to-tensorboard.ipynb @@ -0,0 +1,243 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 41. Export Run History as Tensorboard logs\n", + "\n", + "1. Run some training and log some metrics into Run History\n", + "2. Export the run history to some directory as Tensorboard logs\n", + "3. Launch a local Tensorboard to view the run history" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace, Run, Experiment\n", + "\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set experiment name and start the run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment_name = 'export-to-tensorboard'\n", + "exp = Experiment(ws, experiment_name)\n", + "root_run = exp.start_logging()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load diabetes dataset, a well-known built-in small dataset that comes with scikit-learn\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "X, y = load_diabetes(return_X_y=True)\n", + "\n", + "columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n", + "\n", + "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "data = {\n", + " \"train\":{\"x\":x_train, \"y\":y_train}, \n", + " \"test\":{\"x\":x_test, \"y\":y_test}\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example experiment\n", + "from tqdm import tqdm\n", + "\n", + "alphas = [.1, .2, .3, .4, .5, .6 , .7]\n", + "\n", + "# try a bunch of alpha values in a Linear Regression (Ridge) model\n", + "for alpha in tqdm(alphas):\n", + " # create a bunch of child runs\n", + " with root_run.child_run(\"alpha\" + str(alpha)) as run:\n", + " # More data science stuff\n", + " reg = Ridge(alpha=alpha)\n", + " reg.fit(data[\"train\"][\"x\"], data[\"train\"][\"y\"])\n", + " # TODO save model\n", + " preds = reg.predict(data[\"test\"][\"x\"])\n", + " mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n", + " # End train and eval\n", + "\n", + " # log alpha, mean_squared_error and feature names in run history\n", + " root_run.log(\"alpha\", alpha)\n", + " root_run.log(\"mse\", mse)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Export Run History to Tensorboard logs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Export Run History to Tensorboard logs\n", + "from azureml.contrib.tensorboard.export import export_to_tensorboard\n", + "import os\n", + "import tensorflow as tf\n", + "\n", + "logdir = 'exportedTBlogs'\n", + "log_path = os.path.join(os.getcwd(), logdir)\n", + "try:\n", + " os.stat(log_path)\n", + "except os.error:\n", + " os.mkdir(log_path)\n", + "print(logdir)\n", + "\n", + "# export run history for the project\n", + "export_to_tensorboard(root_run, logdir)\n", + "\n", + "# or export a particular run\n", + "# export_to_tensorboard(run, logdir)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "root_run.complete()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Start Tensorboard\n", + "\n", + "Or you can start the Tensorboard outside this notebook to view the result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.contrib.tensorboard import Tensorboard\n", + "\n", + "# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here\n", + "tb = Tensorboard([], local_root=logdir, port=6006)\n", + "\n", + "# If successful, start() returns a string with the URI of the instance.\n", + "tb.start()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stop Tensorboard\n", + "\n", + "When you're done, make sure to call the `stop()` method of the Tensorboard object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tb.stop()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/training/50.distributed-tensorflow-with-horovod/50.distributed-tensorflow-with-horovod.ipynb b/training/50.distributed-tensorflow-with-horovod/50.distributed-tensorflow-with-horovod.ipynb new file mode 100644 index 000000000..a53acd5b3 --- /dev/null +++ b/training/50.distributed-tensorflow-with-horovod/50.distributed-tensorflow-with-horovod.ipynb @@ -0,0 +1,500 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 50. Distributed Tensorflow Horovod\n", + "\n", + "In this tutorial we demonstrate how to use the Azure ML Training SDK to train Tensorflow model in a distributed manner using Horovod framework.\n", + "\n", + "# Prerequisites\n", + "\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "from azureml.core.experiment import Experiment\n", + "\n", + "username = getpass.getuser().replace('-','')\n", + "\n", + "# choose a name for the run history container in the workspace\n", + "experiment = Experiment(ws, username + '-horovod')\n", + "\n", + "# project folder name\n", + "project_folder = './samples/distributed-tensorflow-horovod'\n", + "os.makedirs(project_folder, exist_ok = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This recipe is using a MLC-managed Batch AI cluster. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "batchai_cluster_name='gpucluster'\n", + "\n", + "\n", + "try:\n", + " # Check for existing cluster\n", + " compute_target = ComputeTarget(ws,batchai_cluster_name)\n", + " print('Found existing compute target')\n", + "except:\n", + " # Else, create new one\n", + " print('Creating a new compute target...')\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\", # NC6 is GPU-enabled\n", + " #vm_priority = 'lowpriority', # optional\n", + " autoscale_enabled = True,\n", + " cluster_min_nodes = 0, \n", + " cluster_max_nodes = 4)\n", + " compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + "\n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + "print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile {project_folder}/word2vec.py\n", + "\n", + "# Copyright 2015 The TensorFlow Authors. All Rights Reserved.\n", + "# Modifications copyright (C) 2017 Uber Technologies, Inc.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================\n", + "\"\"\"Basic word2vec example.\"\"\"\n", + "\n", + "from __future__ import absolute_import\n", + "from __future__ import division\n", + "from __future__ import print_function\n", + "\n", + "import collections\n", + "import math\n", + "import os\n", + "import random\n", + "import zipfile\n", + "import argparse\n", + "\n", + "import numpy as np\n", + "from six.moves import urllib\n", + "from six.moves import xrange # pylint: disable=redefined-builtin\n", + "import tensorflow as tf\n", + "import horovod.tensorflow as hvd\n", + "from azureml.core.run import Run\n", + "\n", + "# Horovod: initialize Horovod.\n", + "hvd.init()\n", + "\n", + "parser = argparse.ArgumentParser()\n", + "parser.add_argument('--data_dir', type=str, help='input directory')\n", + "\n", + "args = parser.parse_args()\n", + "\n", + "data_dir = args.data_dir\n", + "print(\"the input data_dir is %s\" % data_dir)\n", + "\n", + "# Step 1: Download the data.\n", + "url = 'http://mattmahoney.net/dc/text8.zip'\n", + "\n", + "\n", + "def maybe_download(filename, expected_bytes):\n", + " \"\"\"Download a file if not present, and make sure it's the right size.\"\"\"\n", + " if not filename:\n", + " filename = \"text8.zip\"\n", + " if not os.path.exists(filename):\n", + " print(\"Downloading the data from http://mattmahoney.net/dc/text8.zip\")\n", + " filename, _ = urllib.request.urlretrieve(url, filename)\n", + " else:\n", + " print(\"Use the data from the input data_dir %s\" % data_dir)\n", + " statinfo = os.stat(filename)\n", + " if statinfo.st_size == expected_bytes:\n", + " print('Found and verified', filename)\n", + " else:\n", + " print(statinfo.st_size)\n", + " raise Exception(\n", + " 'Failed to verify ' + url + '. Can you get to it with a browser?')\n", + " return filename\n", + "\n", + "filename = maybe_download(data_dir, 31344016)\n", + "\n", + "\n", + "# Read the data into a list of strings.\n", + "def read_data(filename):\n", + " \"\"\"Extract the first file enclosed in a zip file as a list of words.\"\"\"\n", + " with zipfile.ZipFile(filename) as f:\n", + " data = tf.compat.as_str(f.read(f.namelist()[0])).split()\n", + " return data\n", + "\n", + "vocabulary = read_data(filename)\n", + "print('Data size', len(vocabulary))\n", + "\n", + "# Step 2: Build the dictionary and replace rare words with UNK token.\n", + "vocabulary_size = 50000\n", + "\n", + "\n", + "def build_dataset(words, n_words):\n", + " \"\"\"Process raw inputs into a dataset.\"\"\"\n", + " count = [['UNK', -1]]\n", + " count.extend(collections.Counter(words).most_common(n_words - 1))\n", + " dictionary = dict()\n", + " for word, _ in count:\n", + " dictionary[word] = len(dictionary)\n", + " data = list()\n", + " unk_count = 0\n", + " for word in words:\n", + " if word in dictionary:\n", + " index = dictionary[word]\n", + " else:\n", + " index = 0 # dictionary['UNK']\n", + " unk_count += 1\n", + " data.append(index)\n", + " count[0][1] = unk_count\n", + " reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))\n", + " return data, count, dictionary, reversed_dictionary\n", + "\n", + "data, count, dictionary, reverse_dictionary = build_dataset(vocabulary,\n", + " vocabulary_size)\n", + "del vocabulary # Hint to reduce memory.\n", + "print('Most common words (+UNK)', count[:5])\n", + "print('Sample data', data[:10], [reverse_dictionary[i] for i in data[:10]])\n", + "\n", + "\n", + "# Step 3: Function to generate a training batch for the skip-gram model.\n", + "def generate_batch(batch_size, num_skips, skip_window):\n", + " assert num_skips <= 2 * skip_window\n", + " # Adjust batch_size to match num_skips\n", + " batch_size = batch_size // num_skips * num_skips\n", + " span = 2 * skip_window + 1 # [ skip_window target skip_window ]\n", + " # Backtrack a little bit to avoid skipping words in the end of a batch\n", + " data_index = random.randint(0, len(data) - span - 1)\n", + " batch = np.ndarray(shape=(batch_size), dtype=np.int32)\n", + " labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)\n", + " buffer = collections.deque(maxlen=span)\n", + " for _ in range(span):\n", + " buffer.append(data[data_index])\n", + " data_index = (data_index + 1) % len(data)\n", + " for i in range(batch_size // num_skips):\n", + " target = skip_window # target label at the center of the buffer\n", + " targets_to_avoid = [skip_window]\n", + " for j in range(num_skips):\n", + " while target in targets_to_avoid:\n", + " target = random.randint(0, span - 1)\n", + " targets_to_avoid.append(target)\n", + " batch[i * num_skips + j] = buffer[skip_window]\n", + " labels[i * num_skips + j, 0] = buffer[target]\n", + " buffer.append(data[data_index])\n", + " data_index = (data_index + 1) % len(data)\n", + " return batch, labels\n", + "\n", + "batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1)\n", + "for i in range(8):\n", + " print(batch[i], reverse_dictionary[batch[i]],\n", + " '->', labels[i, 0], reverse_dictionary[labels[i, 0]])\n", + "\n", + "# Step 4: Build and train a skip-gram model.\n", + "\n", + "max_batch_size = 128\n", + "embedding_size = 128 # Dimension of the embedding vector.\n", + "skip_window = 1 # How many words to consider left and right.\n", + "num_skips = 2 # How many times to reuse an input to generate a label.\n", + "\n", + "# We pick a random validation set to sample nearest neighbors. Here we limit the\n", + "# validation samples to the words that have a low numeric ID, which by\n", + "# construction are also the most frequent.\n", + "valid_size = 16 # Random set of words to evaluate similarity on.\n", + "valid_window = 100 # Only pick dev samples in the head of the distribution.\n", + "valid_examples = np.random.choice(valid_window, valid_size, replace=False)\n", + "num_sampled = 64 # Number of negative examples to sample.\n", + "\n", + "graph = tf.Graph()\n", + "\n", + "with graph.as_default():\n", + "\n", + " # Input data.\n", + " train_inputs = tf.placeholder(tf.int32, shape=[None])\n", + " train_labels = tf.placeholder(tf.int32, shape=[None, 1])\n", + " valid_dataset = tf.constant(valid_examples, dtype=tf.int32)\n", + "\n", + " # Look up embeddings for inputs.\n", + " embeddings = tf.Variable(\n", + " tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))\n", + " embed = tf.nn.embedding_lookup(embeddings, train_inputs)\n", + "\n", + " # Construct the variables for the NCE loss\n", + " nce_weights = tf.Variable(\n", + " tf.truncated_normal([vocabulary_size, embedding_size],\n", + " stddev=1.0 / math.sqrt(embedding_size)))\n", + " nce_biases = tf.Variable(tf.zeros([vocabulary_size]))\n", + "\n", + " # Compute the average NCE loss for the batch.\n", + " # tf.nce_loss automatically draws a new sample of the negative labels each\n", + " # time we evaluate the loss.\n", + " loss = tf.reduce_mean(\n", + " tf.nn.nce_loss(weights=nce_weights,\n", + " biases=nce_biases,\n", + " labels=train_labels,\n", + " inputs=embed,\n", + " num_sampled=num_sampled,\n", + " num_classes=vocabulary_size))\n", + "\n", + " # Horovod: adjust learning rate based on number of GPUs.\n", + " optimizer = tf.train.GradientDescentOptimizer(1.0 * hvd.size())\n", + "\n", + " # Horovod: add Horovod Distributed Optimizer.\n", + " optimizer = hvd.DistributedOptimizer(optimizer)\n", + "\n", + " train_op = optimizer.minimize(loss)\n", + "\n", + " # Compute the cosine similarity between minibatch examples and all embeddings.\n", + " norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))\n", + " normalized_embeddings = embeddings / norm\n", + " valid_embeddings = tf.nn.embedding_lookup(\n", + " normalized_embeddings, valid_dataset)\n", + " similarity = tf.matmul(\n", + " valid_embeddings, normalized_embeddings, transpose_b=True)\n", + "\n", + " # Add variable initializer.\n", + " init = tf.global_variables_initializer()\n", + "\n", + " # Horovod: broadcast initial variable states from rank 0 to all other processes.\n", + " # This is necessary to ensure consistent initialization of all workers when\n", + " # training is started with random weights or restored from a checkpoint.\n", + " bcast = hvd.broadcast_global_variables(0)\n", + "\n", + "# Step 5: Begin training.\n", + "\n", + "# Horovod: adjust number of steps based on number of GPUs.\n", + "num_steps = 4000 // hvd.size() + 1\n", + "\n", + "# Horovod: pin GPU to be used to process local rank (one GPU per process)\n", + "config = tf.ConfigProto()\n", + "config.gpu_options.allow_growth = True\n", + "config.gpu_options.visible_device_list = str(hvd.local_rank())\n", + "\n", + "with tf.Session(graph=graph, config=config) as session:\n", + " # We must initialize all variables before we use them.\n", + " init.run()\n", + " bcast.run()\n", + " print('Initialized')\n", + " run = Run.get_submitted_run()\n", + " average_loss = 0\n", + " for step in xrange(num_steps):\n", + " # simulate various sentence length by randomization\n", + " batch_size = random.randint(max_batch_size // 2, max_batch_size)\n", + " batch_inputs, batch_labels = generate_batch(\n", + " batch_size, num_skips, skip_window)\n", + " feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels}\n", + "\n", + " # We perform one update step by evaluating the optimizer op (including it\n", + " # in the list of returned values for session.run()\n", + " _, loss_val = session.run([train_op, loss], feed_dict=feed_dict)\n", + " average_loss += loss_val\n", + "\n", + " if step % 2000 == 0:\n", + " if step > 0:\n", + " average_loss /= 2000\n", + " # The average loss is an estimate of the loss over the last 2000 batches.\n", + " print('Average loss at step ', step, ': ', average_loss)\n", + " run.log(\"Loss\", average_loss)\n", + " average_loss = 0\n", + " final_embeddings = normalized_embeddings.eval()\n", + "\n", + " # Evaluate similarity in the end on worker 0.\n", + " if hvd.rank() == 0:\n", + " sim = similarity.eval()\n", + " for i in xrange(valid_size):\n", + " valid_word = reverse_dictionary[valid_examples[i]]\n", + " top_k = 8 # number of nearest neighbors\n", + " nearest = (-sim[i, :]).argsort()[1:top_k + 1]\n", + " log_str = 'Nearest to %s:' % valid_word\n", + " for k in xrange(top_k):\n", + " close_word = reverse_dictionary[nearest[k]]\n", + " log_str = '%s %s,' % (log_str, close_word)\n", + " print(log_str)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Upload http://mattmahoney.net/dc/text8.zip to the azure blob storage." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = ws.get_default_datastore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib\n", + "\n", + "os.makedirs('./data', exist_ok = True)\n", + "\n", + "urllib.request.urlretrieve('http://mattmahoney.net/dc/text8.zip', filename = './data/text8.zip')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds.upload(src_dir = 'data', target_path = 'data', overwrite=True, show_progress = True)\n", + "\n", + "path_on_datastore = \"/data/text8.zip\"\n", + "ds_data = ds.path(path_on_datastore)\n", + "print(ds_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import *\n", + "script_params={\n", + " \"--data_dir\": ds_data\n", + "}\n", + "tf_estimator = TensorFlow(source_directory=project_folder,\n", + " compute_target=compute_target,\n", + " entry_script='word2vec.py',\n", + " script_params=script_params,\n", + " node_count=2,\n", + " process_count_per_node=1,\n", + " distributed_backend=\"mpi\",\n", + " use_gpu=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(tf_estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/training/51.distributed-tensorflow-with-parameter-server/51.distributed-tensorflow-with-parameter-server.ipynb b/training/51.distributed-tensorflow-with-parameter-server/51.distributed-tensorflow-with-parameter-server.ipynb new file mode 100644 index 000000000..55decdf03 --- /dev/null +++ b/training/51.distributed-tensorflow-with-parameter-server/51.distributed-tensorflow-with-parameter-server.ipynb @@ -0,0 +1,473 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 51. Distributed TensorFlow using Parameter Server\n", + "In this tutorial we demonstrate how to use the Azure ML Training SDK to train Tensorflow model in a distributed manner using Parameter Server.\n", + "\n", + "# Prerequisites\n", + "\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "from azureml.core.experiment import Experiment\n", + "\n", + "username = getpass.getuser().replace('-','')\n", + "\n", + "# choose a name for the run history container in the workspace\n", + "run_history_name = username + '-tf_ps'\n", + "\n", + "experiment = Experiment(ws, run_history_name)\n", + "\n", + "# project folder name\n", + "project_folder = './' + run_history_name\n", + "\n", + "print(project_folder)\n", + "os.makedirs(project_folder, exist_ok = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This recipe is using a MLC-managed Batch AI cluster. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "batchai_cluster_name='gpucluster'\n", + "\n", + "\n", + "try:\n", + " # Check for existing cluster\n", + " compute_target = ComputeTarget(ws,batchai_cluster_name)\n", + " print('Found existing compute target')\n", + "except:\n", + " # Else, create new one\n", + " print('Creating a new compute target...')\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\", # NC6 is GPU-enabled\n", + " #vm_priority = 'lowpriority', # optional\n", + " autoscale_enabled = True,\n", + " cluster_min_nodes = 0, \n", + " cluster_max_nodes = 4)\n", + " compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + "\n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + "print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile {project_folder}/mnist_replica.py\n", + "\n", + "# Copyright 2016 The TensorFlow Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================\n", + "\"\"\"Distributed MNIST training and validation, with model replicas.\n", + "A simple softmax model with one hidden layer is defined. The parameters\n", + "(weights and biases) are located on one parameter server (ps), while the ops\n", + "are executed on two worker nodes by default. The TF sessions also run on the\n", + "worker node.\n", + "Multiple invocations of this script can be done in parallel, with different\n", + "values for --task_index. There should be exactly one invocation with\n", + "--task_index, which will create a master session that carries out variable\n", + "initialization. The other, non-master, sessions will wait for the master\n", + "session to finish the initialization before proceeding to the training stage.\n", + "The coordination between the multiple worker invocations occurs due to\n", + "the definition of the parameters on the same ps devices. The parameter updates\n", + "from one worker is visible to all other workers. As such, the workers can\n", + "perform forward computation and gradient calculation in parallel, which\n", + "should lead to increased training speed for the simple model.\n", + "\"\"\"\n", + "\n", + "from __future__ import absolute_import\n", + "from __future__ import division\n", + "from __future__ import print_function\n", + "\n", + "import os\n", + "import math\n", + "import sys\n", + "import tempfile\n", + "import time\n", + "import json\n", + "\n", + "import tensorflow as tf\n", + "from tensorflow.examples.tutorials.mnist import input_data\n", + "from azureml.core.run import Run\n", + "\n", + "flags = tf.app.flags\n", + "flags.DEFINE_string(\"data_dir\", \"/tmp/mnist-data\",\n", + " \"Directory for storing mnist data\")\n", + "flags.DEFINE_boolean(\"download_only\", False,\n", + " \"Only perform downloading of data; Do not proceed to \"\n", + " \"session preparation, model definition or training\")\n", + "flags.DEFINE_integer(\"num_gpus\", 0, \"Total number of gpus for each machine.\"\n", + " \"If you don't use GPU, please set it to '0'\")\n", + "flags.DEFINE_integer(\"replicas_to_aggregate\", None,\n", + " \"Number of replicas to aggregate before parameter update \"\n", + " \"is applied (For sync_replicas mode only; default: \"\n", + " \"num_workers)\")\n", + "flags.DEFINE_integer(\"hidden_units\", 100,\n", + " \"Number of units in the hidden layer of the NN\")\n", + "flags.DEFINE_integer(\"train_steps\", 200,\n", + " \"Number of (global) training steps to perform\")\n", + "flags.DEFINE_integer(\"batch_size\", 100, \"Training batch size\")\n", + "flags.DEFINE_float(\"learning_rate\", 0.01, \"Learning rate\")\n", + "flags.DEFINE_boolean(\n", + " \"sync_replicas\", False,\n", + " \"Use the sync_replicas (synchronized replicas) mode, \"\n", + " \"wherein the parameter updates from workers are aggregated \"\n", + " \"before applied to avoid stale gradients\")\n", + "flags.DEFINE_boolean(\n", + " \"existing_servers\", False, \"Whether servers already exists. If True, \"\n", + " \"will use the worker hosts via their GRPC URLs (one client process \"\n", + " \"per worker host). Otherwise, will create an in-process TensorFlow \"\n", + " \"server.\")\n", + "\n", + "FLAGS = flags.FLAGS\n", + "\n", + "IMAGE_PIXELS = 28\n", + "\n", + "\n", + "def main(unused_argv):\n", + " data_root = os.path.join(\"outputs\", \"MNIST\")\n", + " mnist = None\n", + " tf_config = os.environ.get(\"TF_CONFIG\")\n", + " if not tf_config or tf_config == \"\":\n", + " raise ValueError(\"TF_CONFIG not found.\")\n", + " tf_config_json = json.loads(tf_config)\n", + " cluster = tf_config_json.get('cluster')\n", + " job_name = tf_config_json.get('task', {}).get('type')\n", + " task_index = tf_config_json.get('task', {}).get('index')\n", + " job_name = \"worker\" if job_name == \"master\" else job_name\n", + " sentinel_path = os.path.join(data_root, \"complete.txt\") \n", + " if job_name==\"worker\" and task_index==0:\n", + " mnist = input_data.read_data_sets(data_root, one_hot=True)\n", + " path = os.path.join(data_root, \"complete.txt\") \n", + " with open(sentinel_path, 'w+') as f:\n", + " f.write(\"download complete\")\n", + " else:\n", + " while not os.path.exists(sentinel_path):\n", + " time.sleep(0.01)\n", + " mnist = input_data.read_data_sets(data_root, one_hot=True)\n", + " \n", + " if FLAGS.download_only:\n", + " sys.exit(0)\n", + "\n", + " print(\"job name = %s\" % job_name)\n", + " print(\"task index = %d\" % task_index)\n", + " print(\"number of GPUs = %d\" % FLAGS.num_gpus)\n", + "\n", + " #Construct the cluster and start the server\n", + " cluster_spec = tf.train.ClusterSpec(cluster)\n", + " \n", + " # Get the number of workers.\n", + " num_workers = len(cluster_spec.task_indices(\"worker\"))\n", + "\n", + " if not FLAGS.existing_servers:\n", + " # Not using existing servers. Create an in-process server.\n", + " server = tf.train.Server(\n", + " cluster_spec, job_name=job_name, task_index=task_index)\n", + " if job_name == \"ps\":\n", + " server.join()\n", + "\n", + " is_chief = (task_index == 0)\n", + " if FLAGS.num_gpus > 0:\n", + " # Avoid gpu allocation conflict: now allocate task_num -> #gpu\n", + " # for each worker in the corresponding machine\n", + " gpu = (task_index % FLAGS.num_gpus)\n", + " worker_device = \"/job:worker/task:%d/gpu:%d\" % (task_index, gpu)\n", + " elif FLAGS.num_gpus == 0:\n", + " # Just allocate the CPU to worker server\n", + " cpu = 0\n", + " worker_device = \"/job:worker/task:%d/cpu:%d\" % (task_index, cpu)\n", + " # The device setter will automatically place Variables ops on separate\n", + " # parameter servers (ps). The non-Variable ops will be placed on the workers.\n", + " # The ps use CPU and workers use corresponding GPU\n", + " with tf.device(\n", + " tf.train.replica_device_setter(\n", + " worker_device=worker_device,\n", + " ps_device=\"/job:ps/cpu:0\",\n", + " cluster=cluster)):\n", + " global_step = tf.Variable(0, name=\"global_step\", trainable=False)\n", + "\n", + " # Variables of the hidden layer\n", + " hid_w = tf.Variable(\n", + " tf.truncated_normal(\n", + " [IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units],\n", + " stddev=1.0 / IMAGE_PIXELS),\n", + " name=\"hid_w\")\n", + " hid_b = tf.Variable(tf.zeros([FLAGS.hidden_units]), name=\"hid_b\")\n", + "\n", + " # Variables of the softmax layer\n", + " sm_w = tf.Variable(\n", + " tf.truncated_normal(\n", + " [FLAGS.hidden_units, 10],\n", + " stddev=1.0 / math.sqrt(FLAGS.hidden_units)),\n", + " name=\"sm_w\")\n", + " sm_b = tf.Variable(tf.zeros([10]), name=\"sm_b\")\n", + "\n", + " # Ops: located on the worker specified with task_index\n", + " x = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS])\n", + " y_ = tf.placeholder(tf.float32, [None, 10])\n", + "\n", + " hid_lin = tf.nn.xw_plus_b(x, hid_w, hid_b)\n", + " hid = tf.nn.relu(hid_lin)\n", + "\n", + " y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b))\n", + " cross_entropy = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)))\n", + "\n", + " opt = tf.train.AdamOptimizer(FLAGS.learning_rate)\n", + "\n", + " if FLAGS.sync_replicas:\n", + " if FLAGS.replicas_to_aggregate is None:\n", + " replicas_to_aggregate = num_workers\n", + " else:\n", + " replicas_to_aggregate = FLAGS.replicas_to_aggregate\n", + "\n", + " opt = tf.train.SyncReplicasOptimizer(\n", + " opt,\n", + " replicas_to_aggregate=replicas_to_aggregate,\n", + " total_num_replicas=num_workers,\n", + " name=\"mnist_sync_replicas\")\n", + "\n", + " train_step = opt.minimize(cross_entropy, global_step=global_step)\n", + "\n", + " if FLAGS.sync_replicas:\n", + " local_init_op = opt.local_step_init_op\n", + " if is_chief:\n", + " local_init_op = opt.chief_init_op\n", + "\n", + " ready_for_local_init_op = opt.ready_for_local_init_op\n", + "\n", + " # Initial token and chief queue runners required by the sync_replicas mode\n", + " chief_queue_runner = opt.get_chief_queue_runner()\n", + " sync_init_op = opt.get_init_tokens_op()\n", + "\n", + " init_op = tf.global_variables_initializer()\n", + " train_dir = tempfile.mkdtemp()\n", + "\n", + " if FLAGS.sync_replicas:\n", + " sv = tf.train.Supervisor(\n", + " is_chief=is_chief,\n", + " logdir=train_dir,\n", + " init_op=init_op,\n", + " local_init_op=local_init_op,\n", + " ready_for_local_init_op=ready_for_local_init_op,\n", + " recovery_wait_secs=1,\n", + " global_step=global_step)\n", + " else:\n", + " sv = tf.train.Supervisor(\n", + " is_chief=is_chief,\n", + " logdir=train_dir,\n", + " init_op=init_op,\n", + " recovery_wait_secs=1,\n", + " global_step=global_step)\n", + "\n", + " sess_config = tf.ConfigProto(\n", + " allow_soft_placement=True,\n", + " log_device_placement=False,\n", + " device_filters=[\"/job:ps\",\n", + " \"/job:worker/task:%d\" % task_index])\n", + "\n", + " # The chief worker (task_index==0) session will prepare the session,\n", + " # while the remaining workers will wait for the preparation to complete.\n", + " if is_chief:\n", + " print(\"Worker %d: Initializing session...\" % task_index)\n", + " else:\n", + " print(\"Worker %d: Waiting for session to be initialized...\" %\n", + " task_index)\n", + "\n", + " if FLAGS.existing_servers:\n", + " server_grpc_url = \"grpc://\" + worker_spec[task_index]\n", + " print(\"Using existing server at: %s\" % server_grpc_url)\n", + "\n", + " sess = sv.prepare_or_wait_for_session(server_grpc_url, config=sess_config)\n", + " else:\n", + " sess = sv.prepare_or_wait_for_session(server.target, config=sess_config)\n", + "\n", + " print(\"Worker %d: Session initialization complete.\" % task_index)\n", + "\n", + " if FLAGS.sync_replicas and is_chief:\n", + " # Chief worker will start the chief queue runner and call the init op.\n", + " sess.run(sync_init_op)\n", + " sv.start_queue_runners(sess, [chief_queue_runner])\n", + "\n", + " # Perform training\n", + " time_begin = time.time()\n", + " print(\"Training begins @ %f\" % time_begin)\n", + "\n", + " local_step = 0\n", + " while True:\n", + " # Training feed\n", + " batch_xs, batch_ys = mnist.train.next_batch(FLAGS.batch_size)\n", + " train_feed = {x: batch_xs, y_: batch_ys}\n", + "\n", + " _, step = sess.run([train_step, global_step], feed_dict=train_feed)\n", + " local_step += 1\n", + "\n", + " now = time.time()\n", + " print(\"%f: Worker %d: training step %d done (global step: %d)\" %\n", + " (now, task_index, local_step, step))\n", + "\n", + " if step >= FLAGS.train_steps:\n", + " break\n", + "\n", + " time_end = time.time()\n", + " print(\"Training ends @ %f\" % time_end)\n", + " training_time = time_end - time_begin\n", + " print(\"Training elapsed time: %f s\" % training_time)\n", + "\n", + " # Validation feed\n", + " val_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n", + " val_xent = sess.run(cross_entropy, feed_dict=val_feed)\n", + " print(\"After %d training step(s), validation cross entropy = %g\" %\n", + " (FLAGS.train_steps, val_xent))\n", + " if job_name==\"worker\" and task_index==0:\n", + " run = Run.get_submitted_run()\n", + " run.log(\"CrossEntropy\", val_xent)\n", + "\n", + "if __name__ == \"__main__\":\n", + " tf.app.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import *\n", + "tf_estimator = TensorFlow(source_directory=project_folder,\n", + " compute_target=compute_target,\n", + " entry_script='mnist_replica.py',\n", + " node_count=2,\n", + " worker_count=2,\n", + " parameter_server_count=1, \n", + " distributed_backend=\"ps\",\n", + " use_gpu=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(tf_estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/training/52.distributed-cntk/52.distributed-cntk.ipynb b/training/52.distributed-cntk/52.distributed-cntk.ipynb new file mode 100644 index 000000000..38c566875 --- /dev/null +++ b/training/52.distributed-cntk/52.distributed-cntk.ipynb @@ -0,0 +1,509 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 52. Distributed CNTK\n", + "In this tutorial we demonstrate how to use the Azure ML Training SDK to train CNTK model in a distributed manner.\n", + "\n", + "# Prerequisites\n", + "\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "from azureml.core.experiment import Experiment\n", + "\n", + "username = getpass.getuser().replace('-','')\n", + "\n", + "# choose a name for the run history container in the workspace\n", + "run_history_name = username + '-cntk-distrib'\n", + "\n", + "experiment = Experiment(ws, run_history_name)\n", + "\n", + "# project folder name\n", + "project_folder = './' + run_history_name\n", + "\n", + "print(project_folder)\n", + "os.makedirs(project_folder, exist_ok = True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This recipe is using a MLC-managed Batch AI cluster. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "batchai_cluster_name='gpucluster'\n", + "\n", + "\n", + "try:\n", + " # Check for existing cluster\n", + " compute_target = ComputeTarget(ws,batchai_cluster_name)\n", + " print('Found existing compute target')\n", + "except:\n", + " # Else, create new one\n", + " print('Creating a new compute target...')\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\", # NC6 is GPU-enabled\n", + " #vm_priority = 'lowpriority', # optional\n", + " autoscale_enabled = True,\n", + " cluster_min_nodes = 0, \n", + " cluster_max_nodes = 4)\n", + " compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + "\n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + "print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile {project_folder}/cntk_mnist.py\n", + "\n", + "# This code is adapted from CNTK MNIST tutorials: \n", + "# 1. https://github.com/Microsoft/CNTK/blob/v2.0/Tutorials/CNTK_103A_MNIST_DataLoader.ipynb\n", + "# 2. https://github.com/Microsoft/CNTK/blob/v2.0/Tutorials/CNTK_103C_MNIST_MultiLayerPerceptron.ipynb\n", + "\n", + "# Import the relevant modules to be used later\n", + "from __future__ import print_function\n", + "import gzip\n", + "import numpy as np\n", + "import os\n", + "import shutil\n", + "import struct\n", + "import sys\n", + "import time\n", + "import pandas \n", + "\n", + "import cntk as C\n", + "from azureml.core.run import Run\n", + "import argparse\n", + "\n", + "run = Run.get_submitted_run()\n", + "\n", + "parser=argparse.ArgumentParser()\n", + "\n", + "parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate')\n", + "parser.add_argument('--num_hidden_layers', type=int, default=2, help='number of hidden layers')\n", + "parser.add_argument('--minibatch_size', type=int, default=64, help='minibatchsize')\n", + "\n", + "args=parser.parse_args() \n", + "\n", + "# Functions to load MNIST images and unpack into train and test set.\n", + "# - loadData reads image data and formats into a 28x28 long array\n", + "# - loadLabels reads the corresponding labels data, 1 for each image\n", + "# - load packs the downloaded image and labels data into a combined format to be read later by \n", + "# CNTK text reader \n", + "def loadData(src, cimg):\n", + " print ('Downloading ' + src)\n", + " gzfname, h = urlretrieve(src, './delete.me')\n", + " print ('Done.')\n", + " try:\n", + " with gzip.open(gzfname) as gz:\n", + " n = struct.unpack('I', gz.read(4))\n", + " # Read magic number.\n", + " if n[0] != 0x3080000:\n", + " raise Exception('Invalid file: unexpected magic number.')\n", + " # Read number of entries.\n", + " n = struct.unpack('>I', gz.read(4))[0]\n", + " if n != cimg:\n", + " raise Exception('Invalid file: expected {0} entries.'.format(cimg))\n", + " crow = struct.unpack('>I', gz.read(4))[0]\n", + " ccol = struct.unpack('>I', gz.read(4))[0]\n", + " if crow != 28 or ccol != 28:\n", + " raise Exception('Invalid file: expected 28 rows/cols per image.')\n", + " # Read data.\n", + " res = np.fromstring(gz.read(cimg * crow * ccol), dtype = np.uint8)\n", + " finally:\n", + " os.remove(gzfname)\n", + " return res.reshape((cimg, crow * ccol))\n", + "\n", + "def loadLabels(src, cimg):\n", + " print ('Downloading ' + src)\n", + " gzfname, h = urlretrieve(src, './delete.me')\n", + " print ('Done.')\n", + " try:\n", + " with gzip.open(gzfname) as gz:\n", + " n = struct.unpack('I', gz.read(4))\n", + " # Read magic number.\n", + " if n[0] != 0x1080000:\n", + " raise Exception('Invalid file: unexpected magic number.')\n", + " # Read number of entries.\n", + " n = struct.unpack('>I', gz.read(4))\n", + " if n[0] != cimg:\n", + " raise Exception('Invalid file: expected {0} rows.'.format(cimg))\n", + " # Read labels.\n", + " res = np.fromstring(gz.read(cimg), dtype = np.uint8)\n", + " finally:\n", + " os.remove(gzfname)\n", + " return res.reshape((cimg, 1))\n", + "\n", + "def try_download(dataSrc, labelsSrc, cimg):\n", + " data = loadData(dataSrc, cimg)\n", + " labels = loadLabels(labelsSrc, cimg)\n", + " return np.hstack((data, labels))\n", + "\n", + "# Save the data files into a format compatible with CNTK text reader\n", + "def savetxt(filename, ndarray):\n", + " dir = os.path.dirname(filename)\n", + "\n", + " if not os.path.exists(dir):\n", + " os.makedirs(dir)\n", + "\n", + " if not os.path.isfile(filename):\n", + " print(\"Saving\", filename )\n", + " with open(filename, 'w') as f:\n", + " labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))\n", + " for row in ndarray:\n", + " row_str = row.astype(str)\n", + " label_str = labels[row[-1]]\n", + " feature_str = ' '.join(row_str[:-1])\n", + " f.write('|labels {} |features {}\\n'.format(label_str, feature_str))\n", + " else:\n", + " print(\"File already exists\", filename)\n", + "\n", + "# Read a CTF formatted text (as mentioned above) using the CTF deserializer from a file\n", + "def create_reader(path, is_training, input_dim, num_label_classes):\n", + " return C.io.MinibatchSource(C.io.CTFDeserializer(path, C.io.StreamDefs(\n", + " labels = C.io.StreamDef(field='labels', shape=num_label_classes, is_sparse=False),\n", + " features = C.io.StreamDef(field='features', shape=input_dim, is_sparse=False)\n", + " )), randomize = is_training, max_sweeps = C.io.INFINITELY_REPEAT if is_training else 1)\n", + "\n", + "# Defines a utility that prints the training progress\n", + "def print_training_progress(trainer, mb, frequency, verbose=1):\n", + " training_loss = \"NA\"\n", + " eval_error = \"NA\"\n", + "\n", + " if mb%frequency == 0:\n", + " training_loss = trainer.previous_minibatch_loss_average\n", + " eval_error = trainer.previous_minibatch_evaluation_average\n", + " if verbose: \n", + " print (\"Minibatch: {0}, Loss: {1:.4f}, Error: {2:.2f}%\".format(mb, training_loss, eval_error*100))\n", + " \n", + " return mb, training_loss, eval_error\n", + "\n", + "# Create the network architecture\n", + "def create_model(features):\n", + " with C.layers.default_options(init = C.layers.glorot_uniform(), activation = C.ops.relu):\n", + " h = features\n", + " for _ in range(num_hidden_layers):\n", + " h = C.layers.Dense(hidden_layers_dim)(h)\n", + " r = C.layers.Dense(num_output_classes, activation = None)(h)\n", + " return r\n", + "\n", + "\n", + "if __name__ == '__main__':\n", + " run = Run.get_submitted_run()\n", + "\n", + " try: \n", + " from urllib.request import urlretrieve \n", + " except ImportError: \n", + " from urllib import urlretrieve\n", + "\n", + " # Select the right target device when this script is being used:\n", + " if 'TEST_DEVICE' in os.environ:\n", + " if os.environ['TEST_DEVICE'] == 'cpu':\n", + " C.device.try_set_default_device(C.device.cpu())\n", + " else:\n", + " C.device.try_set_default_device(C.device.gpu(0))\n", + "\n", + " # URLs for the train image and labels data\n", + " url_train_image = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'\n", + " url_train_labels = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'\n", + " num_train_samples = 60000\n", + "\n", + " print(\"Downloading train data\")\n", + " train = try_download(url_train_image, url_train_labels, num_train_samples)\n", + "\n", + " url_test_image = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'\n", + " url_test_labels = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'\n", + " num_test_samples = 10000\n", + "\n", + " print(\"Downloading test data\")\n", + " test = try_download(url_test_image, url_test_labels, num_test_samples)\n", + "\n", + "\n", + " # Save the train and test files (prefer our default path for the data\n", + " rank = os.environ.get(\"OMPI_COMM_WORLD_RANK\") \n", + " data_dir = os.path.join(\"outputs\", \"MNIST\")\n", + " sentinel_path = os.path.join(data_dir, \"complete.txt\") \n", + " if rank == '0': \n", + " print ('Writing train text file...')\n", + " savetxt(os.path.join(data_dir, \"Train-28x28_cntk_text.txt\"), train)\n", + "\n", + " print ('Writing test text file...')\n", + " savetxt(os.path.join(data_dir, \"Test-28x28_cntk_text.txt\"), test)\n", + " with open(sentinel_path, 'w+') as f:\n", + " f.write(\"download complete\")\n", + "\n", + " print('Done with downloading data.')\n", + " else:\n", + " while not os.path.exists(sentinel_path):\n", + " time.sleep(0.01)\n", + " \n", + "\n", + " # Ensure we always get the same amount of randomness\n", + " np.random.seed(0)\n", + "\n", + " # Define the data dimensions\n", + " input_dim = 784\n", + " num_output_classes = 10\n", + "\n", + " # Ensure the training and test data is generated and available for this tutorial.\n", + " # We search in two locations in the toolkit for the cached MNIST data set.\n", + " data_found = False\n", + " for data_dir in [os.path.join(\"..\", \"Examples\", \"Image\", \"DataSets\", \"MNIST\"),\n", + " os.path.join(\"data_\" + str(rank), \"MNIST\"),\n", + " os.path.join(\"outputs\", \"MNIST\")]:\n", + " train_file = os.path.join(data_dir, \"Train-28x28_cntk_text.txt\")\n", + " test_file = os.path.join(data_dir, \"Test-28x28_cntk_text.txt\")\n", + " if os.path.isfile(train_file) and os.path.isfile(test_file):\n", + " data_found = True\n", + " break\n", + " if not data_found:\n", + " raise ValueError(\"Please generate the data by completing CNTK 103 Part A\")\n", + " print(\"Data directory is {0}\".format(data_dir))\n", + "\n", + " num_hidden_layers = args.num_hidden_layers\n", + " hidden_layers_dim = 400\n", + "\n", + " input = C.input_variable(input_dim)\n", + " label = C.input_variable(num_output_classes)\n", + "\n", + " \n", + " z = create_model(input)\n", + " # Scale the input to 0-1 range by dividing each pixel by 255.\n", + " z = create_model(input/255.0)\n", + "\n", + " loss = C.cross_entropy_with_softmax(z, label)\n", + " label_error = C.classification_error(z, label)\n", + "\n", + "\n", + " # Instantiate the trainer object to drive the model training\n", + " learning_rate = args.learning_rate\n", + " lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)\n", + " learner = C.sgd(z.parameters, lr_schedule)\n", + " trainer = C.Trainer(z, (loss, label_error), [learner])\n", + "\n", + "\n", + " # Initialize the parameters for the trainer\n", + " minibatch_size = args.minibatch_size\n", + " num_samples_per_sweep = 60000\n", + " num_sweeps_to_train_with = 10\n", + " num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size\n", + "\n", + " # Create the reader to training data set\n", + " reader_train = create_reader(train_file, True, input_dim, num_output_classes)\n", + "\n", + " # Map the data streams to the input and labels.\n", + " input_map = {\n", + " label : reader_train.streams.labels,\n", + " input : reader_train.streams.features\n", + " } \n", + "\n", + " # Run the trainer on and perform model training\n", + " training_progress_output_freq = 500\n", + " \n", + " errors = []\n", + " losses = []\n", + " for i in range(0, int(num_minibatches_to_train)): \n", + " # Read a mini batch from the training data file\n", + " data = reader_train.next_minibatch(minibatch_size, input_map = input_map)\n", + " \n", + " trainer.train_minibatch(data)\n", + " batchsize, loss, error = print_training_progress(trainer, i, training_progress_output_freq, verbose=1)\n", + " if (error != 'NA') and (loss != 'NA'):\n", + " errors.append(float(error))\n", + " losses.append(float(loss))\n", + " \n", + " # log the losses\n", + " if rank == '0': \n", + " run.log_list(\"Loss\", losses)\n", + " run.log_list(\"Error\",errors)\n", + "\n", + " # Read the training data\n", + " reader_test = create_reader(test_file, False, input_dim, num_output_classes)\n", + "\n", + " test_input_map = {\n", + " label : reader_test.streams.labels,\n", + " input : reader_test.streams.features,\n", + " }\n", + "\n", + " # Test data for trained model\n", + " test_minibatch_size = 512\n", + " num_samples = 10000\n", + " num_minibatches_to_test = num_samples // test_minibatch_size\n", + " test_result = 0.0\n", + "\n", + " \n", + " for i in range(num_minibatches_to_test): \n", + " # We are loading test data in batches specified by test_minibatch_size\n", + " # Each data point in the minibatch is a MNIST digit image of 784 dimensions \n", + " # with one pixel per dimension that we will encode / decode with the \n", + " # trained model.\n", + " data = reader_test.next_minibatch(test_minibatch_size,\n", + " input_map = test_input_map)\n", + "\n", + " eval_error = trainer.test_minibatch(data)\n", + " test_result = test_result + eval_error\n", + " \n", + "\n", + " # Average of evaluation errors of all test minibatches\n", + " print(\"Average test error: {0:.2f}%\".format(test_result*100 / num_minibatches_to_test))\n", + "\n", + " out = C.softmax(z)\n", + "\n", + " # Read the data for evaluation\n", + " reader_eval = create_reader(test_file, False, input_dim, num_output_classes)\n", + "\n", + " eval_minibatch_size = 25\n", + " eval_input_map = {input: reader_eval.streams.features} \n", + "\n", + " data = reader_test.next_minibatch(eval_minibatch_size, input_map = test_input_map)\n", + "\n", + " img_label = data[label].asarray()\n", + " img_data = data[input].asarray()\n", + " predicted_label_prob = [out.eval(img_data[i]) for i in range(len(img_data))]\n", + "\n", + " # Find the index with the maximum value for both predicted as well as the ground truth\n", + " pred = [np.argmax(predicted_label_prob[i]) for i in range(len(predicted_label_prob))]\n", + " gtlabel = [np.argmax(img_label[i]) for i in range(len(img_label))]\n", + "\n", + " print(\"Label :\", gtlabel[:25])\n", + " print(\"Predicted:\", pred)\n", + " \n", + " # save model to outputs folder\n", + " z.save('outputs/cntk.model')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.estimator import *\n", + "pip_packages=['cntk==2.5.1', 'pandas==0.23.4']\n", + "cntk_estimator = Estimator(source_directory=project_folder,\n", + " compute_target=compute_target,\n", + " entry_script='cntk_mnist.py',\n", + " node_count=2,\n", + " process_count_per_node=1,\n", + " distributed_backend=\"mpi\", \n", + " pip_packages=pip_packages,\n", + " custom_docker_base_image=\"microsoft/mmlspark:0.12\",\n", + " use_gpu=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = experiment.submit(cntk_estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/training/53.distributed-pytorch-with-horovod/53.distributed-pytorch-with-horovod.ipynb b/training/53.distributed-pytorch-with-horovod/53.distributed-pytorch-with-horovod.ipynb new file mode 100644 index 000000000..46db6dab0 --- /dev/null +++ b/training/53.distributed-pytorch-with-horovod/53.distributed-pytorch-with-horovod.ipynb @@ -0,0 +1,376 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PyTorch Distributed Demo\n", + "\n", + "In this demo, we will run a sample PyTorch job using Horovod on a multi-node Batch AI cluster." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set experiment name and create project\n", + "Choose a name for your run history container in the workspace, and create a folder for the project." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "experiment_name = 'pytorch-dist-hvd'\n", + "\n", + "# project folder\n", + "project_folder = './sample_projects/pytorch-dist-hvd'\n", + "os.makedirs(project_folder, exist_ok = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Write demo PyTorch code\n", + "\n", + "We will use a distributed PyTorch implementation of the classic MNIST problem. The following cell writes the main implementation to the project folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile {project_folder}/pytorch_horovod_mnist.py\n", + "\n", + "from __future__ import print_function\n", + "import argparse\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torchvision import datasets, transforms\n", + "from torch.autograd import Variable\n", + "import torch.utils.data.distributed\n", + "import horovod.torch as hvd\n", + "\n", + "# Training settings\n", + "parser = argparse.ArgumentParser(description='PyTorch MNIST Example')\n", + "parser.add_argument('--batch-size', type=int, default=64, metavar='N',\n", + " help='input batch size for training (default: 64)')\n", + "parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',\n", + " help='input batch size for testing (default: 1000)')\n", + "parser.add_argument('--epochs', type=int, default=10, metavar='N',\n", + " help='number of epochs to train (default: 10)')\n", + "parser.add_argument('--lr', type=float, default=0.01, metavar='LR',\n", + " help='learning rate (default: 0.01)')\n", + "parser.add_argument('--momentum', type=float, default=0.5, metavar='M',\n", + " help='SGD momentum (default: 0.5)')\n", + "parser.add_argument('--no-cuda', action='store_true', default=False,\n", + " help='disables CUDA training')\n", + "parser.add_argument('--seed', type=int, default=42, metavar='S',\n", + " help='random seed (default: 42)')\n", + "parser.add_argument('--log-interval', type=int, default=10, metavar='N',\n", + " help='how many batches to wait before logging training status')\n", + "args = parser.parse_args()\n", + "args.cuda = not args.no_cuda and torch.cuda.is_available()\n", + "\n", + "hvd.init()\n", + "torch.manual_seed(args.seed)\n", + "\n", + "if args.cuda:\n", + " # Horovod: pin GPU to local rank.\n", + " torch.cuda.set_device(hvd.local_rank())\n", + " torch.cuda.manual_seed(args.seed)\n", + "\n", + "\n", + "kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}\n", + "train_dataset = \\\n", + " datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True,\n", + " transform=transforms.Compose([\n", + " transforms.ToTensor(),\n", + " transforms.Normalize((0.1307,), (0.3081,))\n", + " ]))\n", + "train_sampler = torch.utils.data.distributed.DistributedSampler(\n", + " train_dataset, num_replicas=hvd.size(), rank=hvd.rank())\n", + "train_loader = torch.utils.data.DataLoader(\n", + " train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs)\n", + "\n", + "test_dataset = \\\n", + " datasets.MNIST('data-%d' % hvd.rank(), train=False, transform=transforms.Compose([\n", + " transforms.ToTensor(),\n", + " transforms.Normalize((0.1307,), (0.3081,))\n", + " ]))\n", + "test_sampler = torch.utils.data.distributed.DistributedSampler(\n", + " test_dataset, num_replicas=hvd.size(), rank=hvd.rank())\n", + "test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size,\n", + " sampler=test_sampler, **kwargs)\n", + "\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.conv1 = nn.Conv2d(1, 10, kernel_size=5)\n", + " self.conv2 = nn.Conv2d(10, 20, kernel_size=5)\n", + " self.conv2_drop = nn.Dropout2d()\n", + " self.fc1 = nn.Linear(320, 50)\n", + " self.fc2 = nn.Linear(50, 10)\n", + "\n", + " def forward(self, x):\n", + " x = F.relu(F.max_pool2d(self.conv1(x), 2))\n", + " x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))\n", + " x = x.view(-1, 320)\n", + " x = F.relu(self.fc1(x))\n", + " x = F.dropout(x, training=self.training)\n", + " x = self.fc2(x)\n", + " return F.log_softmax(x)\n", + "\n", + "\n", + "model = Net()\n", + "\n", + "if args.cuda:\n", + " # Move model to GPU.\n", + " model.cuda()\n", + "\n", + "# Horovod: broadcast parameters.\n", + "hvd.broadcast_parameters(model.state_dict(), root_rank=0)\n", + "\n", + "# Horovod: scale learning rate by the number of GPUs.\n", + "optimizer = optim.SGD(model.parameters(), lr=args.lr * hvd.size(),\n", + " momentum=args.momentum)\n", + "\n", + "# Horovod: wrap optimizer with DistributedOptimizer.\n", + "optimizer = hvd.DistributedOptimizer(\n", + " optimizer, named_parameters=model.named_parameters())\n", + "\n", + "\n", + "def train(epoch):\n", + " model.train()\n", + " train_sampler.set_epoch(epoch)\n", + " for batch_idx, (data, target) in enumerate(train_loader):\n", + " if args.cuda:\n", + " data, target = data.cuda(), target.cuda()\n", + " data, target = Variable(data), Variable(target)\n", + " optimizer.zero_grad()\n", + " output = model(data)\n", + " loss = F.nll_loss(output, target)\n", + " loss.backward()\n", + " optimizer.step()\n", + " if batch_idx % args.log_interval == 0:\n", + " print('Train Epoch: {} [{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n", + " epoch, batch_idx * len(data), len(train_sampler),\n", + " 100. * batch_idx / len(train_loader), loss.data[0]))\n", + "\n", + "\n", + "def metric_average(val, name):\n", + " tensor = torch.FloatTensor([val])\n", + " avg_tensor = hvd.allreduce(tensor, name=name)\n", + " return avg_tensor[0]\n", + "\n", + "\n", + "def test():\n", + " model.eval()\n", + " test_loss = 0.\n", + " test_accuracy = 0.\n", + " for data, target in test_loader:\n", + " if args.cuda:\n", + " data, target = data.cuda(), target.cuda()\n", + " data, target = Variable(data, volatile=True), Variable(target)\n", + " output = model(data)\n", + " # sum up batch loss\n", + " test_loss += F.nll_loss(output, target, size_average=False).data[0]\n", + " # get the index of the max log-probability\n", + " pred = output.data.max(1, keepdim=True)[1]\n", + " test_accuracy += pred.eq(target.data.view_as(pred)).cpu().float().sum()\n", + "\n", + " test_loss /= len(test_sampler)\n", + " test_accuracy /= len(test_sampler)\n", + "\n", + " test_loss = metric_average(test_loss, 'avg_loss')\n", + " test_accuracy = metric_average(test_accuracy, 'avg_accuracy')\n", + "\n", + " if hvd.rank() == 0:\n", + " print('\\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\\n'.format(\n", + " test_loss, 100. * test_accuracy))\n", + "\n", + "\n", + "for epoch in range(1, args.epochs + 1):\n", + " train(epoch)\n", + " test()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Deploy Batch AI cluster\n", + "\n", + "To run this in a distributed context, we'll need a Batch AI cluster with at least two nodes.\n", + "\n", + "Here, we use exactly two CPU nodes, to conserve resources. If you want to try it with some other number or SKU, just change the relevant values in the following code block." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import BatchAiCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "batchai_cluster_name='gpucluster'\n", + "\n", + "\n", + "try:\n", + " # Check for existing cluster\n", + " compute_target = ComputeTarget(ws,batchai_cluster_name)\n", + " print('Found existing compute target')\n", + "except:\n", + " # Else, create new one\n", + " print('Creating a new compute target...')\n", + " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\", # NC6 is GPU-enabled\n", + " #vm_priority = 'lowpriority', # optional\n", + " autoscale_enabled = True,\n", + " cluster_min_nodes = 0, \n", + " cluster_max_nodes = 4)\n", + " compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it will use the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + "\n", + " # For a more detailed view of current BatchAI cluster status, use the 'status' property \n", + "print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit job\n", + "\n", + "Now that we have a cluster ready to go, let's submit our job.\n", + "\n", + "We need to use a custom estimator here, and specify that we want the `pytorch`, `horovod` and `torchvision` packages installed to our image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import PyTorch\n", + "\n", + "estimator = PyTorch(source_directory=project_folder,\n", + " compute_target=compute_target,\n", + " entry_script='pytorch_horovod_mnist.py',\n", + " node_count=2,\n", + " process_count_per_node=1,\n", + " distributed_backend=\"mpi\",\n", + " use_gpu=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.experiment import Experiment\n", + "\n", + "experiment = Experiment(workspace=ws, name=experiment_name)\n", + "run = experiment.submit(estimator)\n", + "print(run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(run).show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/01.train-models.ipynb b/tutorials/01.train-models.ipynb index 5c5688f03..38205bdfe 100644 --- a/tutorials/01.train-models.ipynb +++ b/tutorials/01.train-models.ipynb @@ -110,7 +110,7 @@ "experiment_name = 'sklearn-mnist'\n", "\n", "from azureml.core import Experiment\n", - "exp = Experiment(workspace = ws, name = experiment_name)" + "exp = Experiment(workspace=ws, name=experiment_name)" ] }, { @@ -143,25 +143,25 @@ "\n", "try:\n", " # look for the existing cluster by name\n", - " compute_target = ComputeTarget(workspace = ws, name = batchai_cluster_name)\n", + " compute_target = ComputeTarget(workspace=ws, name=batchai_cluster_name)\n", " if compute_target is BatchAiCompute:\n", " print('found compute target {}, just use it.'.format(batchai_cluster_name))\n", " else:\n", " print('{} exists but it is not a Batch AI cluster. Please choose a different name.'.format(batchai_cluster_name))\n", "except ComputeTargetException:\n", " print('creating a new compute target...')\n", - " compute_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # small CPU-based VM\n", - " #vm_priority = 'lowpriority', # optional\n", - " autoscale_enabled = True,\n", - " cluster_min_nodes = 0, \n", - " cluster_max_nodes = 4)\n", + " compute_config = BatchAiCompute.provisioning_configuration(vm_size=\"STANDARD_D2_V2\", # small CPU-based VM\n", + " #vm_priority='lowpriority', # optional\n", + " autoscale_enabled=True,\n", + " cluster_min_nodes=0, \n", + " cluster_max_nodes=4)\n", "\n", " # create the cluster\n", " compute_target = ComputeTarget.create(ws, batchai_cluster_name, compute_config)\n", " \n", " # can poll for a minimum number of nodes and for a specific timeout. \n", " # if no min node count is provided it uses the scale settings for the cluster\n", - " compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", " \n", " # Use the 'status' property to get a detailed status for the current cluster. \n", " print(compute_target.status.serialize())" @@ -197,10 +197,10 @@ "\n", "os.makedirs('./data', exist_ok = True)\n", "\n", - "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename = './data/train-images.gz')\n", - "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename = './data/train-labels.gz')\n", - "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/test-images.gz')\n", - "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/test-labels.gz')" + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename='./data/train-images.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename='./data/train-labels.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename='./data/test-images.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename='./data/test-labels.gz')" ] }, { @@ -237,8 +237,8 @@ " plt.subplot(1, sample_size, count)\n", " plt.axhline('')\n", " plt.axvline('')\n", - " plt.text(x = 10, y = -10, s = y_train[i], fontsize = 18)\n", - " plt.imshow(X_train[i].reshape(28, 28), cmap = plt.cm.Greys)\n", + " plt.text(x=10, y=-10, s=y_train[i], fontsize=18)\n", + " plt.imshow(X_train[i].reshape(28, 28), cmap=plt.cm.Greys)\n", "plt.show()" ] }, @@ -264,7 +264,7 @@ "ds = ws.get_default_datastore()\n", "print(ds.datastore_type, ds.account_name, ds.container_name)\n", "\n", - "ds.upload(src_dir = './data', target_path = 'mnist', overwrite = True, show_progress = True)" + "ds.upload(src_dir='./data', target_path='mnist', overwrite=True, show_progress=True)" ] }, { @@ -339,7 +339,7 @@ "source": [ "import os\n", "script_folder = './sklearn-mnist'\n", - "os.makedirs(script_folder, exist_ok = True)" + "os.makedirs(script_folder, exist_ok=True)" ] }, { @@ -371,8 +371,8 @@ "\n", "# let user feed in 2 parameters, the location of the data files (from datastore), and the regularization rate of the logistic regression model\n", "parser = argparse.ArgumentParser()\n", - "parser.add_argument('--data-folder', type = str, dest = 'data_folder', help = 'data folder mounting point')\n", - "parser.add_argument('--regularization', type = float, dest = 'reg', default = 0.01, help = 'regularization rate')\n", + "parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')\n", + "parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regularization rate')\n", "args = parser.parse_args()\n", "\n", "data_folder = os.path.join(args.data_folder, 'mnist')\n", @@ -389,25 +389,23 @@ "# get hold of the current run\n", "run = Run.get_submitted_run()\n", "\n", - "# train a logistic regression model with specified regularization rate\n", "print('Train a logistic regression model with regularizaion rate of', args.reg)\n", - "clf = LogisticRegression(C = 1.0/args.reg, random_state = 42)\n", + "clf = LogisticRegression(C=1.0/args.reg, random_state=42)\n", "clf.fit(X_train, y_train)\n", "\n", "print('Predict the test set')\n", - "# predict on the test set\n", "y_hat = clf.predict(X_test)\n", "\n", "# calculate accuracy on the prediction\n", "acc = np.average(y_hat == y_test)\n", "print('Accuracy is', acc)\n", "\n", - "# log regularization rate and accuracy \n", "run.log('regularization rate', np.float(args.reg))\n", "run.log('accuracy', np.float(acc))\n", "\n", - "os.makedirs('outputs', exist_ok = True)\n", - "joblib.dump(value = clf, filename = 'outputs/sklearn_mnist_model.pkl')" + "os.makedirs('outputs', exist_ok=True)\n", + "# note file saved in the outputs folder is automatically uploaded into experiment record\n", + "joblib.dump(value=clf, filename='outputs/sklearn_mnist_model.pkl')" ] }, { @@ -417,7 +415,7 @@ "Notice how the script gets data and saves models:\n", "\n", "+ The training script reads an argument to find the directory containing the data. When you submit the job later, you point to the datastore for this argument:\n", - "`parser.add_argument('--data-folder', type = str, dest = 'data_folder', help = 'data directory mounting point')`" + "`parser.add_argument('--data-folder', type=str, dest='data_folder', help='data directory mounting point')`" ] }, { @@ -426,7 +424,7 @@ "source": [ "\n", "+ The training script saves your model into a directory named outputs.
\n", - "`joblib.dump(value = clf, filename = 'outputs/sklearn_mnist_model.pkl')`
\n", + "`joblib.dump(value=clf, filename='outputs/sklearn_mnist_model.pkl')`
\n", "Anything written in this directory is automatically uploaded into your workspace. You'll access your model from this directory later in the tutorial." ] }, @@ -477,11 +475,11 @@ " '--regularization': 0.8\n", "}\n", "\n", - "est = Estimator(source_directory = script_folder,\n", - " script_params = script_params,\n", - " compute_target = compute_target,\n", - " entry_script = 'train.py',\n", - " conda_packages = ['scikit-learn'])" + "est = Estimator(source_directory=script_folder,\n", + " script_params=script_params,\n", + " compute_target=compute_target,\n", + " entry_script='train.py',\n", + " conda_packages=['scikit-learn'])" ] }, { @@ -562,7 +560,7 @@ "metadata": {}, "outputs": [], "source": [ - "run.wait_for_completion(show_output = True) # specify True for a verbose log" + "run.wait_for_completion(show_output=True) # specify True for a verbose log" ] }, { @@ -623,7 +621,7 @@ "outputs": [], "source": [ "# register model \n", - "model = run.register_model(model_name = 'sklearn_mnist', model_path = 'outputs/sklearn_mnist_model.pkl')\n", + "model = run.register_model(model_name='sklearn_mnist', model_path='outputs/sklearn_mnist_model.pkl')\n", "print(model.name, model.id, model.version, sep = '\\t')" ] }, diff --git a/tutorials/02.deploy-models.ipynb b/tutorials/02.deploy-models.ipynb index 7fc4e1a14..616f48e0f 100644 --- a/tutorials/02.deploy-models.ipynb +++ b/tutorials/02.deploy-models.ipynb @@ -34,7 +34,45 @@ "\n", "Complete the model training in the [Tutorial #1: Train an image classification model with Azure Machine Learning](01.train-models.ipynb) notebook. \n", "\n", + "If you did NOT complete the tutorial, you can instead run this cell to create a model and download the data needed for this tutorial:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# These prerequisites are created in the training tutorial\n", + "# Feel free to skip this cell if you completed the training tutorial \n", + "\n", + "# register a model\n", + "from azureml.core import Workspace\n", + "ws = Workspace.from_config()\n", + "\n", + "from azureml.core.model import Model\n", + "\n", + "model_name = \"sklearn_mnist\"\n", + "model = Model.register(model_path=\"sklearn_mnist_model.pkl\",\n", + " model_name=model_name,\n", + " tags={\"data\": \"mnist\", \"model\": \"classification\"},\n", + " description=\"Mnist handwriting recognition\",\n", + " workspace=ws)\n", + "\n", + "# download test data\n", + "import os\n", + "import urllib.request\n", + "\n", + "os.makedirs('./data', exist_ok=True)\n", "\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename='./data/test-images.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename='./data/test-labels.gz')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "## Set up the environment\n", "\n", "Start by setting up a testing environment.\n", @@ -113,9 +151,8 @@ "from utils import load_data\n", "\n", "# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster\n", - "\n", "X_test = load_data('./data/test-images.gz', False) / 255.0\n", - "y_test = load_data('./data/test-labels.gz', True).reshape(-1)\n" + "y_test = load_data('./data/test-labels.gz', True).reshape(-1)" ] }, { @@ -175,13 +212,14 @@ "metadata": {}, "outputs": [], "source": [ - "row_sums = conf_mx.sum(axis = 1, keepdims = True)\n", + "# normalize the diagnal cells so that they don't overpower the rest of the cells when visualized\n", + "row_sums = conf_mx.sum(axis=1, keepdims=True)\n", "norm_conf_mx = conf_mx / row_sums\n", "np.fill_diagonal(norm_conf_mx, 0)\n", "\n", - "fig = plt.figure(figsize = (8,5))\n", + "fig = plt.figure(figsize=(8,5))\n", "ax = fig.add_subplot(111)\n", - "cax = ax.matshow(norm_conf_mx, cmap = plt.cm.bone)\n", + "cax = ax.matshow(norm_conf_mx, cmap=plt.cm.bone)\n", "ticks = np.arange(0, 10, 1)\n", "ax.set_xticks(ticks)\n", "ax.set_yticks(ticks)\n", @@ -232,12 +270,11 @@ "from sklearn.externals import joblib\n", "from sklearn.linear_model import LogisticRegression\n", "\n", - "#from azureml.assets.persistence.persistence import get_model_path\n", "from azureml.core.model import Model\n", "\n", "def init():\n", " global model\n", - " # retreive the local path to the model using the model name\n", + " # retreive the path to the model file using the model name\n", " model_path = Model.get_model_path('sklearn_mnist')\n", " model = joblib.load(model_path)\n", "\n", @@ -263,16 +300,29 @@ "metadata": {}, "outputs": [], "source": [ - "%%writefile myenv.yml\n", - "name: myenv\n", - "channels:\n", - " - defaults\n", - "dependencies:\n", - " - scikit-learn\n", - " - pip:\n", - " # Required packages for AzureML execution, history, and data preparation.\n", - " - --extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1\n", - " - azureml-core" + "from azureml.core.conda_dependencies import CondaDependencies \n", + "\n", + "myenv = CondaDependencies()\n", + "myenv.add_conda_package(\"scikit-learn\")\n", + "\n", + "with open(\"myenv.yml\",\"w\") as f:\n", + " f.write(myenv.serialize_to_string())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Review the content of the file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pfile myenv.yml" ] }, { @@ -281,7 +331,7 @@ "source": [ "### Create configuration file\n", "\n", - "Create a deployment configuration file and specify the number of CPUs and gigabyte of RAM needed for your ACI container. While it depends on your model, the default of 1 core and 1 gigabyte of RAM is usually sufficient for many models. If you feel you need more later, you would have to recreate the image and redeploy the service." + "Create a deployment configuration file and specify the number of CPUs and gigabyte of RAM needed for your ACI container. While it depends on your model, the default of 1 core and 1 gigabyte of RAM is usually sufficient for many models. If you feel you need more later, you can always modify the configuration and redeploy the service." ] }, { @@ -292,10 +342,10 @@ "source": [ "from azureml.core.webservice import AciWebservice\n", "\n", - "aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n", - " memory_gb = 1, \n", - " tags = {\"data\": \"MNIST\", \"method\" : \"sklearn\"}, \n", - " description = 'Predict MNIST with sklearn')" + "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", + " memory_gb=1, \n", + " tags={\"data\": \"MNIST\", \"method\" : \"sklearn\"}, \n", + " description='Predict MNIST with sklearn')" ] }, { @@ -328,17 +378,17 @@ "from azureml.core.image import ContainerImage\n", "\n", "# configure the image\n", - "image_config = ContainerImage.image_configuration(execution_script = \"score.py\", \n", - " runtime = \"python\", \n", - " conda_file = \"myenv.yml\")\n", + "image_config = ContainerImage.image_configuration(execution_script=\"score.py\", \n", + " runtime=\"python\", \n", + " conda_file=\"myenv.yml\")\n", "\n", - "service = Webservice.deploy_from_model(workspace = ws,\n", - " name = 'sklearn-mnist-model',\n", - " deployment_config = aciconfig,\n", - " models = [model],\n", - " image_config = image_config)\n", + "service = Webservice.deploy_from_model(workspace=ws,\n", + " name='sklearn-mnist-model',\n", + " deployment_config=aciconfig,\n", + " models=[model],\n", + " image_config=image_config)\n", "\n", - "service.wait_for_deployment(show_output = True)" + "service.wait_for_deployment(show_output=True)" ] }, { @@ -391,7 +441,7 @@ "test_samples = bytes(test_samples, encoding = 'utf8')\n", "\n", "# predict using the deployed model\n", - "result = json.loads(service.run(input_data = test_samples))\n", + "result = json.loads(service.run(input_data=test_samples))\n", "\n", "# compare actual value vs. the predicted values:\n", "i = 0\n", @@ -406,8 +456,8 @@ " font_color = 'red' if y_test[s] != result[i] else 'black'\n", " clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n", " \n", - " plt.text(x = 10, y = -10, s = result[i], fontsize = 18, color = font_color)\n", - " plt.imshow(X_test[s].reshape(28, 28), cmap = clr_map)\n", + " plt.text(x=10, y =-10, s=result[i], fontsize=18, color=font_color)\n", + " plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n", " \n", " i = i + 1\n", "plt.show()" diff --git a/tutorials/sklearn_mnist_model.pkl b/tutorials/sklearn_mnist_model.pkl new file mode 100644 index 000000000..135dd09ec Binary files /dev/null and b/tutorials/sklearn_mnist_model.pkl differ