## Imports

In [None]:
# Import the required libraries to use Azure ML
import os
import azureml.core
from azureml.core import Workspace, Datastore, Dataset, Environment
from azureml.data.datapath import DataPath
from azureml.exceptions import UserErrorException

print("Currently using version", azureml.core.VERSION, "of the Azure ML SDK")

In [None]:
# Import libraries specific to the ML model and code implementation
import mlflow
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

## AzureML Workspace Configuration

In [None]:
# Make sure to replace these with values match your Azure resources 
subscription_id = "YOUR_SUBSCRIPTION_ID"
resource_group = "YOUR_RESOURCE_GROUP_NAME"
workspace_name = "YOUR_WORKSPACE_NAME"
workspace_region = "YOUR_WORKSPACE_REGION"
container_registry_name = "YOUR_CONTAINTER_REGISTRY_NAME"
storage_name = "YOUR_STORAGE_ACCOUNT_NAME"
blob_datastore_name = "YOUR_DATASTORE_NAME"
key_vault_name = "YOUR_KEY_VAULT_NAME"
container_name = "YOUR_STORAGE_CONTAINER_NAME"
storage_key = "YOUR_STORAGE_KEY"

In [None]:
try:
    ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)
    # Write the details of the workspace to a configuration file to the notebook library
    print("Workspace configuration succeeded.")
except:
    # Create a new workspace using the specified parameters
    ws = Workspace.create(name = workspace_name,
                        subscription_id = subscription_id,
                        resource_group = resource_group, 
                        location = workspace_region,
                        create_resource_group = True,
                        sku = 'basic',
                        exist_ok = True)
    ws.get_details()
    print("Workspace not accessible. Created a new workspace")
    
# TODO: Work with environments
myenv = Environment.from_pip_requirements(name = "myenv",
                                        file_path = "../../pipeline/environment/docker-contexts/python-and-pip/requirements.txt")
myenv.register(workspace=ws)
ws.write_config()


In [None]:
# Connect to the workspace
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

## Import data from Azure Blobstorage

In [None]:
try:
    blob_datastore = Datastore.get(ws, blob_datastore_name)
    print("Found Blob Datastore with name: %s" % blob_datastore_name)
except UserErrorException:
    blob_datastore = Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name=blob_datastore_name,
        account_name=storage_name, 
        container_name=container_name,
        account_key=storage_key)
    print("Registered Blob Datastore with name: %s" % blob_datastore_name)

ws.set_default_datastore(blob_datastore_name)

train = Dataset.Tabular.from_delimited_files(DataPath(blob_datastore, 'mnist_train.csv'))
test = Dataset.Tabular.from_delimited_files(DataPath(blob_datastore, 'mnist_test.csv'))


In [None]:
# TODO: Dataset versioning?
train.register(workspace=ws, name='demo_train_set_mnist', description='training dataset', create_new_version=True)
test.register(workspace=ws, name='demo_test_set_mnist', description='testing dataset', create_new_version=True)

## Data preparation

In [None]:
# https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/datasets-tutorial/train-with-datasets/train-with-datasets.ipynb
train_df = train.to_pandas_dataframe()
test_df = test.to_pandas_dataframe()

# Separate labels from features, and normalize features on the fly
x_train = train_df.iloc[:, 1:] / 255
y_train = train_df.loc[:,"label"]
x_test = test_df.iloc[:, 1:] / 255
y_test = test_df.loc[:, "label"]

In [None]:
# Show some randomly chosen images from the traininng set.
count = 0
sample_size = 30
plt.figure(figsize = (16, 6))
for i in np.random.permutation(len(x_train))[:sample_size]:
    count = count + 1
    plt.subplot(1, sample_size, count)
    plt.axhline('')
    plt.axvline('')
    plt.text(x=10, y=-10, s=y_train[i], fontsize=18)
    plt.imshow(x_train.iloc[i].values.reshape((28,28)), cmap=plt.cm.Greys)
plt.show()

## Train model with MLFlow

In [None]:
# connect to your workspace
ws = Workspace.from_config()

# create experiment and start logging to a new run in the experiment
experiment_name = "mnist-experiment-local"

# set up MLflow to track the metrics
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
mlflow.set_experiment(experiment_name)
mlflow.autolog()

# set up the Logistic regression model
reg = 0.5
clf = LogisticRegression(
    C=1.0 / reg, solver="liblinear", multi_class="auto", random_state=42
)

# train the model
with mlflow.start_run() as run:
    clf.fit(x_train.values, y_train.values)

## Register the model

In [None]:
# register the model
model_uri = "runs:/{}/model".format(run.info.run_id)
model = mlflow.register_model(model_uri, "sklearn_mnist_model")