Azure · rastala · Sep 14, 2018 · Sep 14, 2018
diff --git a/00.Getting Started/01.train-within-notebook/01.train-within-notebook.ipynb b/00.Getting Started/01.train-within-notebook/01.train-within-notebook.ipynb
diff --git a/00.Getting Started/02.train-on-local/02.train-on-local.ipynb b/00.Getting Started/02.train-on-local/02.train-on-local.ipynb
diff --git a/00.Getting Started/02.train-on-local/train.py b/00.Getting Started/02.train-on-local/train.py
@@ -0,0 +1,45 @@
+from sklearn.datasets import load_diabetes
+from sklearn.linear_model import Ridge
+from sklearn.metrics import mean_squared_error
+from sklearn.model_selection import train_test_split
+from azureml.core.run import Run
+from sklearn.externals import joblib
+
+import numpy as np
+
+# os.makedirs('./outputs', exist_ok = True)
+
+X, y = load_diabetes(return_X_y=True)
+
+run = Run.get_submitted_run()
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+data = {"train": {"X": X_train, "y": y_train},
+        "test": {"X": X_test, "y": y_test}}
+
+# list of numbers from 0.0 to 1.0 with a 0.05 interval
+alphas = np.arange(0.0, 1.0, 0.05)
+
+for alpha in alphas:
+    # Use Ridge algorithm to create a regression model
+    reg = Ridge(alpha=alpha)
+    reg.fit(data["train"]["X"], data["train"]["y"])
+
+    preds = reg.predict(data["test"]["X"])
+    mse = mean_squared_error(preds, data["test"]["y"])
+    run.log('alpha', alpha)
+    run.log('mse', mse)
+
+    model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
+    # save model in the outputs folder so it automatically get uploaded
+    with open(model_file_name, "wb") as file:
+        joblib.dump(value=reg, filename=model_file_name)
+
+    # upload the model file explicitly into artifacts
+    run.upload_file(name=model_file_name, path_or_stream=model_file_name)
+
+    # register the model
+    # commented out for now until a bug is fixed
+    # run.register_model(file_name = model_file_name)
+
+    print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))
diff --git a/00.Getting Started/03.train-on-aci/03.train-on-aci.ipynb b/00.Getting Started/03.train-on-aci/03.train-on-aci.ipynb
@@ -0,0 +1,342 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+    "\n",
+    "Licensed under the MIT License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 03. Train on Azure Container Instance (EXPERIMENTAL)\n",
+    "\n",
+    "* Create Workspace\n",
+    "* Create Project\n",
+    "* Create `train.py` in the project folder.\n",
+    "* Configure an ACI (Azure Container Instance) run\n",
+    "* Execute in ACI"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check core SDK version number\n",
+    "import azureml.core\n",
+    "\n",
+    "print(\"SDK version:\", azureml.core.VERSION)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initialize Workspace\n",
+    "\n",
+    "Initialize a workspace object from persisted configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "create workspace"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "from azureml.core import Workspace\n",
+    "\n",
+    "ws = Workspace.from_config()\n",
+    "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create An Experiment\n",
+    "\n",
+    "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azureml.core import Experiment\n",
+    "experiment_name = 'train-on-aci'\n",
+    "experiment = Experiment(workspace = ws, name = experiment_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a folder to store the training script."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "script_folder = './samples/train-on-aci'\n",
+    "os.makedirs(script_folder, exist_ok = True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Remote execution on ACI\n",
+    "\n",
+    "Use `%%writefile` magic to write training code to `train.py` file under the project folder."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile $script_folder/train.py\n",
+    "\n",
+    "import os\n",
+    "from sklearn.datasets import load_diabetes\n",
+    "from sklearn.linear_model import Ridge\n",
+    "from sklearn.metrics import mean_squared_error\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from azureml.core.run import Run\n",
+    "from sklearn.externals import joblib\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "os.makedirs('./outputs', exist_ok=True)\n",
+    "\n",
+    "X, y = load_diabetes(return_X_y = True)\n",
+    "\n",
+    "run = Run.get_submitted_run()\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
+    "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
+    "        \"test\": {\"X\": X_test, \"y\": y_test}}\n",
+    "\n",
+    "# list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
+    "alphas = np.arange(0.0, 1.0, 0.05)\n",
+    "\n",
+    "for alpha in alphas:\n",
+    "    # Use Ridge algorithm to create a regression model\n",
+    "    reg = Ridge(alpha = alpha)\n",
+    "    reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
+    "\n",
+    "    preds = reg.predict(data[\"test\"][\"X\"])\n",
+    "    mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
+    "    run.log('alpha', alpha)\n",
+    "    run.log('mse', mse)\n",
+    "    \n",
+    "    model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n",
+    "    with open(model_file_name, \"wb\") as file:\n",
+    "        joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n",
+    "\n",
+    "    print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Configure for using ACI\n",
+    "Linux-based ACI is available in `westus`, `eastus`, `westeurope`, `northeurope`, `westus2` and `southeastasia` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "configure run"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "from azureml.core.runconfig import RunConfiguration\n",
+    "from azureml.core.conda_dependencies import CondaDependencies\n",
+    "\n",
+    "# create a new runconfig object\n",
+    "run_config = RunConfiguration()\n",
+    "\n",
+    "# signal that you want to use ACI to execute script.\n",
+    "run_config.target = \"containerinstance\"\n",
+    "\n",
+    "# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n",
+    "run_config.container_instance.region = 'eastus'\n",
+    "\n",
+    "# set the ACI CPU and Memory \n",
+    "run_config.container_instance.cpu_cores = 1\n",
+    "run_config.container_instance.memory_gb = 2\n",
+    "\n",
+    "# enable Docker \n",
+    "run_config.environment.docker.enabled = True\n",
+    "\n",
+    "# set Docker base image to the default CPU-based image\n",
+    "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
+    "#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n",
+    "\n",
+    "# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
+    "run_config.environment.python.user_managed_dependencies = False\n",
+    "\n",
+    "# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
+    "run_config.auto_prepare_environment = True\n",
+    "\n",
+    "# specify CondaDependencies obj\n",
+    "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Submit the Experiment\n",
+    "Finally, run the training job on the ACI"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "remote run",
+     "aci"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "%%time \n",
+    "from azureml.core.script_run_config import ScriptRunConfig\n",
+    "\n",
+    "script_run_config = ScriptRunConfig(source_directory = script_folder,\n",
+    "                                    script= 'train.py',\n",
+    "                                    run_config = run_config)\n",
+    "\n",
+    "run = experiment.submit(script_run_config)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "remote run",
+     "aci"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "# Shows output of the run on stdout.\n",
+    "run.wait_for_completion(show_output = True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "query history"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# Show run details\n",
+    "\n",
+    "run"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Navigate to the above URL using Chrome, and you should see a graph of alpha values, and a graph of MSE.\n",
+    "\n",
+    "![graphs](../images/mse.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "get metrics"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# get all metris logged in the run\n",
+    "run.get_metrics()\n",
+    "metrics = run.get_metrics()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
+    "    min(metrics['mse']), \n",
+    "    metrics['alpha'][np.argmin(metrics['mse'])]\n",
+    "))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}