# Fashion MNIST

## Validate Azure ML SDK installation and get version number for debugging purposes

In [None]:
# Check core SDK version number
import azureml.core
print("SDK version:", azureml.core.VERSION)

## Initialize Workspace
Initialize a workspace object from persisted configuration.

In [None]:
# Initialize Workspace
from azureml.core import Workspace

ws = Workspace.from_config()
print("Resource group: ", ws.resource_group)
print("Location: ", ws.location)
print("Workspace name: ", ws.name)

## Create a project directory
Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on.

In [None]:
import os
import shutil

project_folder = '../projects/fashion_mnist_remote_vm'
os.makedirs(project_folder, exist_ok=True)

shutil.copy('./scripts/train_Fashion_MNIST.py', project_folder)

## Create An Experiment
**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments.

In [None]:
from azureml.core import Experiment
experiment_name = 'fashion-mnist'
experiment = Experiment(workspace = ws, name = experiment_name)

## Create Linux DSVM as a compute target
**Note:** If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select "Want to create programmatically" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.

**Note:** By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can specify the port number in the provisioning configuration object.

**Note:** To streamline the compute that Azure Machine Learning creates, we are making updates to support creating only single to multi-node AmlCompute. The DSVMCompute class will be deprecated in a later release, but the DSVM can be created using the below single line command and then attached(like any VM) using the sample code below. Also note, that we only support Linux VMs and the commands below will spin a Linux VM only.

```
# create a DSVM in your resource group
# note you need to be at least a contributor to the resource group in order to execute this command successfully.
(myenv) $ az vm create --resource-group <resource_group_name> --name <some_vm_name> --image microsoft-dsvm:linux-data-science-vm-ubuntu:linuxdsvmubuntu:latest --admin-username <username> --admin-password <password> --generate-ssh-keys --authentication-type password
```

**Note:** You can also use [this url](https://portal.azure.com/#create/microsoft-dsvm.linux-data-science-vm-ubuntulinuxdsvmubuntu) to create the VM using the Azure Portal

In [None]:
'''
from azureml.core.compute import DsvmCompute
from azureml.core.compute_target import ComputeTargetException

compute_target_name = 'mydsvm'

try:
    dsvm_compute = DsvmCompute(workspace=ws, name=compute_target_name)
    print('found existing:', dsvm_compute.name)
except ComputeTargetException:
    print('creating new.')
    dsvm_config = DsvmCompute.provisioning_configuration(vm_size="Standard_D2_v2")
    dsvm_compute = DsvmCompute.create(ws, name=compute_target_name, provisioning_configuration=dsvm_config)
    dsvm_compute.wait_for_completion(show_output=True)
'''

In [None]:
!az login

In [None]:
!az vm create --resource-group tensorflow101 --name mydsvm --image microsoft-dsvm:linux-data-science-vm-ubuntu:linuxdsvmubuntu:latest --admin-username tf101 --admin-password "ztBV8C4}3b@c" --generate-ssh-keys --authentication-type password

## Attach an existing Linux DSVM
You can also attach an existing Linux VM as a compute target. The default port is 22.

In [None]:
from azureml.core.compute import RemoteCompute, ComputeTarget

attach_config = RemoteCompute.attach_configuration(
    address="<public ip of the previous step>",
    ssh_port=22,
    username="tf101",
    password="ztBV8C4}3b@c"
)
dsvm_compute = ComputeTarget.attach(ws, "mydsvm", attach_config)

## Check VM state
Check if the remove vm is ready to receive tasks.

In [None]:
import time

status = dsvm_compute.get_status()
while status != 'Succeeded' and status != 'Failed':
    print('current status: {} - waiting...'.format(status))
    time.sleep(10)
    status = dsvm_compute.get_status()

## Configure for using a Remote VM

In [None]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

# create a new RunConfig object
run_config = RunConfiguration(framework="python")

# Set compute target to the Linux DSVM
run_config.target = dsvm_compute.name

# specify CondaDependencies obj
run_config.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['tensorflow=1.10.0', 'keras=2.1.5', 'matplotlib=3.0.1', 'pyparsing=2.2.2'])

## Submit the Experiment
Finally, run the training job on the DSVM

In [None]:
from azureml.core.script_run_config import ScriptRunConfig

script_run_config = ScriptRunConfig(source_directory=project_folder,
                                    script='train_Fashion_MNIST.py',
                                    run_config=run_config)

run = experiment.submit(script_run_config)
run.tag("Description","Remote VM trained Fashion MNIST model")

In [None]:
run

In [None]:
run.wait_for_completion(show_output=True)

## Show Metrics

In [None]:
# get all metris logged in the run
run.get_metrics()
metrics = run.get_metrics()

import numpy as np
print('loss is {0:.2f}, and accuracy is {1:0.2f}'.format(
    metrics['loss'], 
    metrics['accuracy']
))

In [None]:
# Plot data to see relationships in training and validation data
import numpy as np
import matplotlib.pyplot as plt
epoch_list = list(range(1, len(metrics['Training Accuracy']) + 1))  # values for x axis [1, 2, ..., # of epochs]
plt.plot(epoch_list, metrics['Training Accuracy'], epoch_list, metrics['Validation Accuracy'])
plt.legend(('Training Accuracy', 'Validation Accuracy'))
plt.show()

## Download Model

In [None]:
# show all the files stored within the run record
run.get_file_names()

In [None]:
import os
import shutil

os.makedirs('../outputs', exist_ok=True)
run.download_file('outputs/saved_model.tar.gz', output_file_path='../outputs')
shutil.unpack_archive('../outputs/saved_model.tar.gz', './models/fashion_mnist/' + run.id.split('_')[1])
os.remove(path='../outputs/saved_model.tar.gz')

## Clean up compute resource

In [None]:
dsvm_compute.detach()

In [None]:
!az vm delete --resource-group tensorflow101 --name mydsvm --yes