# Chapter 08 code snippets
This notebook contains all code snippets from chapter 8.

## Training a simple sklearn model within notebooks

In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()
diabetes_ds = ws.datasets['diabetes']

training_data, validation_data = diabetes_ds.random_split(percentage = 0.8)

X_train = training_data.drop_columns('target').to_pandas_dataframe()
y_train = training_data.keep_columns('target').to_pandas_dataframe()

X_validate = validation_data.drop_columns('target').to_pandas_dataframe()
y_validate = validation_data.keep_columns('target').to_pandas_dataframe()

In [None]:
from sklearn.linear_model import LassoLars

alpha = 0.1

model = LassoLars(alpha=alpha)
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import mean_squared_error

predictions = model.predict(X_validate)
rmse = mean_squared_error(predictions, y_validate, squared = False)

# Range of data using the peak to peak numpy function
range_y_validate = y_validate.to_numpy().ptp()
# Normalize dividing by the range of the data
nrmse = rmse/range_y_validate

print(f"Normalized Root Mean Squared Error: {nrmse}")

In [None]:
import os
import joblib

os.makedirs('./outputs', exist_ok=True)

model_file_name = f'model_{nrmse:.4f}_{alpha:.4f}.pkl'
joblib.dump(value=model, filename=os.path.join('./outputs/',model_file_name))

## Tracking metrics in experiments

In [None]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="chapter08")

In [None]:
run = exp.start_logging(snapshot_directory=None, display_name="Run in notebook")
print(run.get_details())

In [None]:
run.log("nrmse", 0.01)
run.log(name="nrmse", value=0.015, description="2nd measure")

In [None]:
run.log_list("accuracies", [0.5, 0.57, 0.62])

In [None]:
run.log_table("table", {"x":[1, 2], "y":[0.1, 0.2]})
run.log_row("table", x=3, y=0.3)

In [None]:
child_run = run.child_run()
child_run.log("child_metric", 0.01)
child_run.parent.log("metric_from_child", 0.02)

In [None]:
child_run.cancel()
run.complete()
print(run.get_status())

### Tracking model evolution

In [None]:
import shutil

try:
  shutil.rmtree("./outputs")
except FileNotFoundError: 
  pass

In [None]:
from sklearn.linear_model import LassoLars
from sklearn.metrics import mean_squared_error

def train_and_evaluate(alpha, X_t, y_t, X_v, y_v):
  model = LassoLars(alpha=alpha)
  model.fit(X_t, y_t)
  predictions = model.predict(X_v)
  rmse = mean_squared_error(predictions, y_v, squared = False)
  range_y_validate = y_v.to_numpy().ptp()
  nrmse = rmse/range_y_validate
  print(f"NRMSE: {nrmse}")
  return model, nrmse

trained_model, model_nrmse = train_and_evaluate(0.1, 
                        X_train, y_train,
                        X_validate, y_validate)

In [None]:
def train_and_evaluate(run, alpha, X_t, y_t, X_v, y_v):
  model = LassoLars(alpha=alpha)
  model.fit(X_t, y_t)
  predictions = model.predict(X_v)
  rmse = mean_squared_error(predictions, y_v, squared = False)
  range_y_validate = y_v.to_numpy().ptp()
  nrmse = rmse/range_y_validate
  run.log("nrmse", nrmse)
  run.log_row("nrmse over α", α=alpha, nrmse=nrmse)
  return model, nrmse

In [None]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="chapter08")
with exp.start_logging(snapshot_directory=None) as run:
    print(run.get_portal_url())
    for a in [0.001, 0.01, 0.1, 0.25, 0.5]:
        train_and_evaluate(run, a, 
                            X_train, y_train,
                            X_validate, y_validate)

### Using MLFlow to track experiments

In [None]:
from sklearn.linear_model import LassoLars
from sklearn.metrics import mean_squared_error
import mlflow

def train_and_evaluate(alpha, X_t, y_t, X_v, y_v):
  model = LassoLars(alpha=alpha)
  model.fit(X_t, y_t)
  predictions = model.predict(X_v)
  rmse = mean_squared_error(predictions, y_v, squared = False)
  range_y_validate = y_v.to_numpy().ptp()
  nrmse = rmse/range_y_validate
  mlflow.log_metric("nrmse", nrmse)
  return model, nrmse

mlflow.set_experiment("chapter08-mlflow")
with mlflow.start_run():
    mlflow.sklearn.autolog()
    trained_model, model_nrmse = train_and_evaluate(0.1, 
                                    X_train, y_train,
                                    X_validate, y_validate)

You can open the MLflow ui by posting the `mlflow ui` command in a terminal.
This will open a web service listening at port 5000 of your compute instance.
To view that port you need to visit `https://<compute-instance-name>-5000.<location>.instances.azureml.ms/`.

If you are having troubles constructing this url:
- Open a JupyterLab. 
- Copy the url that should be like `https://<compute-instance-name>.<location>.instances.azureml.ms`.
- Modify the url to include the 5000 port number.

In [None]:
!pip install azureml-mlflow

In [None]:
import mlflow
from azureml.core import Workspace

ws = Workspace.from_config()
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

mlflow.set_experiment("chapter08-mlflow")
with mlflow.start_run():
    mlflow.sklearn.autolog()
    trained_model, model_nrmse = train_and_evaluate(0.1, 
                                    X_train, y_train,
                                    X_validate, y_validate)

## Scaling the training process with compute clusters

In [None]:
from azureml.core import Workspace, Experiment
from azureml.core import ScriptRunConfig

ws = Workspace.from_config()
target = ws.compute_targets['cpu-sm-cluster']

script = ScriptRunConfig(
    source_directory='greeter-job',
    script='greeter.py',
    compute_target=target,
    arguments=['--greet-name', 'packt']
)

exp = Experiment(ws, 'greet-packt')
run = exp.submit(script)
print(run.get_portal_url())
run.wait_for_completion(show_output=True)

### Exploring the outputs and logs of a run

In [None]:
from azureml.widgets import RunDetails

run = exp.submit(script)
RunDetails(run).show()

In [None]:
run.get_details_with_logs()

### Understanding execution environments

In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()

In [None]:
from azureml.core import Environment

minimal_env = Environment.get(ws, name="AzureML-Minimal")

print(minimal_env.name, minimal_env.version)
print(minimal_env.python.conda_dependencies.serialize_to_string())

In [None]:
from azureml.core import Experiment, ScriptRunConfig

target = ws.compute_targets['cpu-sm-cluster']

script = ScriptRunConfig(
    source_directory='greeter-job',
    script='greeter.py',
    environment=minimal_env,
    compute_target=target,
    arguments=['--greet-name', 'packt']
)

exp = Experiment(ws, 'greet-packt')
run = exp.submit(script)
print(run.get_portal_url())
run.wait_for_completion(show_output=True)

In [None]:
from azureml.core import Environment

banner_env = Environment.from_conda_specification(
                            name = "banner-env",
                            file_path = "greeter-banner-job.yml")
banner_env.environment_variables["GREET_HEADER"] = \
                                 "Env. var. header:"

In [None]:
# To register in the workspace, uncomment the following line
# banner_env.register(ws)

In [None]:
script = ScriptRunConfig(
    source_directory='greeter-banner-job',
    script='greeter.py',
    environment=banner_env,
    compute_target=target,
    arguments=['--greet-name', 'packt']
)

exp = Experiment(ws, 'greet-packt')
run = exp.submit(script)
print(run.get_portal_url())
run.wait_for_completion(show_output=True)

### Training the diabetes model on a compute cluster

In [None]:
!pip show scikit-learn

In [None]:
import sklearn
print(sklearn.__version__)

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies 
import sklearn

diabetes_env = Environment(name="diabetes-training-env")
diabetes_env.python.conda_dependencies = CondaDependencies()
diabetes_env.python.conda_dependencies.add_conda_package(f"scikit-learn=={sklearn.__version__}")
diabetes_env.python.conda_dependencies.add_pip_package("azureml-dataset-runtime[pandas]")
# Optionally 
# import joblib
# diabetes_env.python.conda_dependencies.add_pip_package(f"joblib=={joblib.__version__}")
# Alternatively you can use the following single line code:
# diabetes_env.python.conda_dependencies = CondaDependencies.create(
#               conda_packages=[f"scikit-learn=={sklearn.__version__}"],
#               pip_packages=["azureml-dataset-runtime[pandas]", f"joblib=={joblib.__version__}"])

In [None]:
from azureml.core import Workspace, Experiment
from azureml.core import ScriptRunConfig

ws = Workspace.from_config()
target = ws.compute_targets['cpu-sm-cluster']

script = ScriptRunConfig(
    source_directory='diabetes-training',
    script='training.py',
    environment=diabetes_env,
    compute_target=target,
    arguments=['--alpha', 0.01]
)

exp = Experiment(ws, 'chapter08-diabetes')
run = exp.submit(script)
run.wait_for_completion(show_output=True)