Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/CMK8s-Samples/sample_notebooks/002%20single%20node%20job/sklearn/sklearn.png)

# Sklearn
In this notebook, you will submit a job of Sklearn GaussianNB algorithm to a CMAKS compute target. 

## Prerequisites
* Go through the [configuration notebook](../../../configuration.ipynb) to:
    * install the AML SDK
    ``` bash
    pip install --upgrade azureml-sdk --upgrade 
    ```
    * create a workspace and its configuration file (`config.json`)
* install CMAKS SDK
    ``` bash
    pip install azureml-contrib-k8s --extra-index-url https://azuremlsdktestpypi.azureedge.net/CmAks-Compute-Test/D58E86006C65
    ```

## Set up
### Config workspace

In [None]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

### Define variables of compute context and attach CMAKS compute

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute

In [None]:
from azureml.contrib.core.compute.cmakscompute import CmAksCompute
# will raise a expection about win32 at first run

In [None]:
## define variables needed
# cluster_name = <cluster_name> # aks clutser name in the same sub
# resource_group = <resource_group> # resource group of this aks cluster
# node_pool = <node_pool> # node pool of this aks clutser which used to attached with this workspace
# compute_name = <compute_name> # compute name

In [None]:
# attach_config = CmAksCompute.attach_configuration(node_pool=<node_pool>
#                                                  , resource_group =<resource_group>
#                                                  , cluster_name = <cluster_name>
#                                                 ) 

In [None]:
# cmaks_target = CmAksCompute.attach(ws, compute_name, attach_config)

In [None]:
# validate whether compute is attached
print("compute targets after attach:\n")
for targetName in ws.compute_targets:
    print(targetName)

## Train model on the CMAKS compute
### Specify existing CMAKS compute

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.contrib.core.compute.cmakscompute import CmAksCompute
# Choose a name for cmaks compute
compute_name = 'gpu-eastus-0806'
cmaks_compute = ComputeTarget(workspace=ws, name=compute_name)

### Create a project directory and prepare scripts

In [None]:
project_folder = '.'
script = 'sklearn-nb.py'

### Define your Environment

In [None]:
from azureml.core import Environment

myenv = Environment.from_conda_specification(name = "myenv", file_path = "sklearn-env.yml")
myenv.docker.enabled = True

### Create a ScriptRunConfig

In [None]:
from azureml.core import ScriptRunConfig
from azureml.core import RunConfiguration
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

# create a new runconfig object
run_config = RunConfiguration()
# enable Docker 
run_config.environment.docker.enabled = True
# set Docker base image to the default CPU-based image
# run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/base:0.2.1"
# Set environment
run_config.environment = myenv
# Set compute target to the one created in previous step
run_config.target = cmaks_compute

src = ScriptRunConfig(source_directory=project_folder, script=script, run_config=run_config)

### Create an experiment to track all the runs in your workspace

In [None]:
from azureml.core import Experiment
experiment_name = 'sklearn-nb'
experiment = Experiment(workspace = ws, name = experiment_name)

### Submit your run

In [None]:
run = experiment.submit(config=src)
run.wait_for_completion(show_output=True)

## Pipeline
You can also set train script as a step of your pipeline

In [None]:
### Create pipeline
from azureml.pipeline.steps import PythonScriptStep
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
# create a new runconfig object
run_config = RunConfiguration()
# enable Docker 
run_config.environment.docker.enabled = True
# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False
# specify CondaDependencies obj
run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
sk_step = PythonScriptStep( name="train_step",
                            script_name=script,
                            compute_target=cmaks_compute,
                            source_directory=project_folder,
                            allow_reuse=True,
                            runconfig=run_config
                            )
print("sk_step created")
steps = [sk_step]

### Build and validate the pipeline
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(workspace=ws, steps = sk_step)
pipeline.validate()
print("Pipeline validation completed")

### Submit the pipeline
pipeline_run = experiment.submit(pipeline, regenerate_outputs=False)
print("Pipeline is submitted for execution")

from azureml.widgets import RunDetails
RunDetails(pipeline_run).show()