# Fashion MNIST

## Validate Azure ML SDK installation and get version number for debugging purposes

In [None]:
# Check core SDK version number
import azureml.core
print("SDK version:", azureml.core.VERSION)

## Initialize Workspace
Initialize a workspace object from persisted configuration.

In [None]:
# Initialize Workspace
from azureml.core import Workspace

ws = Workspace.from_config()
print("Resource group: ", ws.resource_group)
print("Location: ", ws.location)
print("Workspace name: ", ws.name)

## Create a project directory
Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on.

In [None]:
import os
import shutil

project_folder = '../projects/fashion_mnist_remote_vm'
os.makedirs(project_folder, exist_ok=True)

shutil.copy('./scripts/train_Fashion_MNIST.py', project_folder)

## Create An Experiment
**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments.

In [None]:
from azureml.core import Experiment
experiment_name = 'fashion-mnist'
experiment = Experiment(workspace = ws, name = experiment_name)

## Create Azure ML Compute Instance
**Note:** Use a compute instance as your fully configured and managed development environment in the cloud. For development and testing, you can also use the instance as a training compute target or for an inference target. A compute instance can run multiple jobs in parallel and has a job queue. As a development environment, a compute instance cannot be shared with other users in your workspace.

In [None]:
from azureml.core.compute import ComputeTarget, ComputeInstance
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your instance
# Compute instance name should be unique across the azure region
compute_name = "ci{}".format(ws._workspace_id)[:10]

# Verify that instance does not exist already
try:
    instance = ComputeInstance(workspace=ws, name=compute_name)
    print('Found existing instance, use it.')
except ComputeTargetException:
    compute_config = ComputeInstance.provisioning_configuration(
        vm_size='STANDARD_D3_V2',
        ssh_public_access=False,
        # vnet_resourcegroup_name='<my-resource-group>',
        # vnet_name='<my-vnet-name>',
        # subnet_name='default',
        # admin_user_ssh_public_key='<my-sshkey>'
    )
    instance = ComputeInstance.create(ws, compute_name, compute_config)
    instance.wait_for_completion(show_output=True)

In [None]:
# Start ComputeInstance target, if necessary
if instance.get_status().state=="Stopped":
    instance.start(wait_for_completion=True, show_output=True)

## Create a Training Environment

In [None]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

myenv = Environment(name="mytfenv")

# Adds dependencies to PythonSection of myenv
conda_dep = CondaDependencies()
conda_dep.add_conda_package("matplotlib=3.3.3")
conda_dep.add_pip_package("tensorflow-gpu==2.4.1")
myenv.python.conda_dependencies=conda_dep

myenv.register(workspace=ws)

In [None]:
# OPTIONAL: Pre-Build Docker Image for Training
from azureml.core import Image
build = myenv.build(workspace=ws)
build.wait_for_completion(show_output=True)

## Upload Data to Azure
1. Download the [Fashion MNIST Dataset](https://github.com/zalandoresearch/fashion-mnist) from GitHub
1. Upload the files to the Azure ML Default DataStore

In [None]:
import os
import urllib.request

data_folder = '../data/fashion_mnist'
os.makedirs(data_folder, exist_ok = True)
urllib.request.urlretrieve('https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/train-images-idx3-ubyte.gz', filename=os.path.join(data_folder, "train-images.gz"))
urllib.request.urlretrieve('https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/train-labels-idx1-ubyte.gz', filename=os.path.join(data_folder, "train-labels.gz"))
urllib.request.urlretrieve('https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/t10k-images-idx3-ubyte.gz', filename=os.path.join(data_folder, "test-images.gz"))
urllib.request.urlretrieve('https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion/t10k-labels-idx1-ubyte.gz', filename=os.path.join(data_folder, "test-labels.gz"))

In [None]:
ds = ws.get_default_datastore()

print("Datastore details:")
print("Type:", ds.datastore_type)
print("Storage Account:", ds.account_name)
print("Blob Container Name:", ds.container_name)

ds.upload(src_dir=data_folder, target_path='fashion_mnist', overwrite=True, show_progress=True)

## Submit the Experiment
Finally, run the training job on the DSVM

In [None]:
from azureml.core import ScriptRunConfig

script_params = [ '--data-folder', str(ds.as_mount()), '--batch-size', 128, '--epochs', 24]

runconfig = ScriptRunConfig(source_directory=project_folder,
                            compute_target=instance,
                            environment=myenv,
                            script='train_Fashion_MNIST.py',
                            arguments=script_params)

runconfig.run_config.data_references = {
    ds.as_mount().data_reference_name: ds.as_mount().to_config()
}

In [None]:
run = experiment.submit(runconfig)
run.tag("Description","Compute Instance trained Fashion MNIST model")

In [None]:
run

In [None]:
from azureml.widgets import RunDetails
RunDetails(run).show()

In [None]:
run.wait_for_completion(show_output=True)

## Show Metrics

In [None]:
# get all metris logged in the run
run.get_metrics()
metrics = run.get_metrics()

import numpy as np
print('loss is {0:.2f}, and accuracy is {1:0.2f}'.format(
    metrics['loss'], 
    metrics['accuracy']
))

In [None]:
# Plot data to see relationships in training and validation data
import numpy as np
import matplotlib.pyplot as plt
epoch_list = list(range(1, len(metrics['Training Accuracy']) + 1))  # values for x axis [1, 2, ..., # of epochs]
plt.plot(epoch_list, metrics['Training Accuracy'], epoch_list, metrics['Validation Accuracy'])
plt.legend(('Training Accuracy', 'Validation Accuracy'))
plt.show()

## Download Model

In [None]:
# show all the files stored within the run record
run.get_file_names()

In [None]:
import os

outputs_path = os.path.join(project_folder, "outputs")
os.makedirs(outputs_path, exist_ok=True)

for filename in run.get_file_names():
    if filename.startswith('outputs'):
        path = os.path.join(project_folder, filename)
        print("Downloading " + filename)
        run.download_file(filename, output_file_path=outputs_path)

## Clean up compute resource

In [None]:
# instance.stop(wait_for_completion=True, show_output=True)
instance.delete(wait_for_completion=True, show_output=True)