In [None]:
import azureml.core
print("SDK Version:", azureml.core.VERSION)

# ESML - accelerator: Quick 3 step DEMO
- 1) `AutoMap datalake` & init ESML project
- 2) `Train model & compare` scoring to existing active model in DEV, TEST, or PROD
- 3) `Deploy model & Test` AKS webservice




Uses `ESML accelerators` & factories, to `code faster` to abstract away `Azure ML Studio` (datasets/versioning/experiments) automated createion, and adds `NEW` concepts.
- EMSL enables `enterprise CONCEPTS` (Project/Model/Dev_Test_Prod)` - able to scale across Azure subscriptions in DEV, TEST, PROD for a model.
- ESML includes `accelerators for data refinement, with CONCEPTS`: Bronze, Silver, Gold, able to `share refined data ACROSS projects` & models
- ESML includes `accelerators for ML CONCEPTS` such as `Split to TRAIN,VALIDATE, TEST` (X_test, y_test to auto-generate charting)
![](./images/split_gold_and_train_automl_small.png)

# 1) `ESML - Autolake-mapping` generates Azure ML Datasets + Feature engineering: `Bronze->Silver->Gold`

In [None]:
######  NB! This,InteractiveLoginAuthentication, is only needed to run 1st time, then when ws_config is written, use later CELL in notebook, that just reads that file
from azureml.core import Workspace
from azureml.core.authentication import InteractiveLoginAuthentication
sys.path.append(os.path.abspath("../azure-enterprise-scale-ml/esml/common/"))  # NOQA: E402
from esml import ESMLDataset, ESMLProject

p = ESMLProject()
p.dev_test_prod="dev"
auth = InteractiveLoginAuthentication(tenant_id = p.tenant)
ws, config_name = p.authenticate_workspace_and_write_config(auth)
######  NB!

In [None]:
demo_mode = False
unregister_all_datasets=False

In [None]:
import sys, os
import pandas as pd 
from azureml.core import Workspace
sys.path.append(os.path.abspath("./common/"))  # NOQA: E402
from esml import ESMLDataset, ESMLProject

if(demo_mode):
    p = ESMLProject(True) # Demo settings, will search in internal TEMPLATE SETTINGS folder '../settings'
    print("DEMO mode - using internal TEMPLATE settings")
else:
    p = ESMLProject() # Will search in ROOT for your copied SETTINGS folder '../../../settings', you should copy template settings from '../settings'

ws = p.get_workspace_from_config() #2) Load DEV or TEST or PROD Azure ML Studio workspace

if(unregister_all_datasets):
    p.unregister_all_datasets(ws) # For DEMO purpose
    

In [None]:
p.describe()

In [None]:
datastore = p.init(ws) # 3) Automapping from datalake to Azure ML datasets

In [None]:
# Feture engineering: Bronze 2 Gold - working with Azure ML Datasets with Bronze, Silver, Gold concept
esml_dataset = p.DatasetByName("ds01_diabetes") # Get dataset
df_bronze = esml_dataset.Bronze.to_pandas_dataframe()
p.save_silver(esml_dataset,df_bronze) #Bronze -> Silver

df = esml_dataset.Silver.to_pandas_dataframe() 
df_filtered = df[df.AGE > 0.015] 
gold_train = p.save_gold(df_filtered)  #Silver -> Gold

## SUMMARY - step 1
- ESML has now `Automap` and `Autoregister` Azure ML Datasets as: `IN, SILVER, BRONZE, GOLD`
- ESML has read configuration for correct environment (DEV, TEST, PROD). 
    - Both small customers, and large Enterprise customers often wants:  DEV, TEST, PROD in `diffferent Azure ML workspaces` (and different subscriptions)
- User has done feature engineering, and saved GOLD `p.save_gold`

# 2) `ESML` Train model in `5 codelines`

In [None]:
from esml import ESMLDataset, ESMLProject
from baselayer_azure_ml import AutoMLFactory,azure_metric_regression,azure_metric_classification
from azureml.train.automl import AutoMLConfig

automl_performance_config = p.get_automl_performance_config() # 1)Get config, for active environment (dev,test or prod)
aml_compute = p.get_training_aml_compute(ws) # 2)Get compute, for active environment

label = "Y"
train_6, validate_set_2, test_set_2 = p.split_gold_3(0.6,label) # 3) Auto-registerin AZURE (M03_GOLD_TRAIN | M03_GOLD_VALIDATE | M03_GOLD_TEST)          # Alt: p.Gold.random_split(percentage=0.8, seed=23)

automl_config = AutoMLConfig(task = 'regression', # 4) Override the ENV config, for model(that inhertits from enterprise DEV_TEST_PROD config baseline)
                            primary_metric = azure_metric_regression.MAE, # # Note: Regression(MAPE) are not possible in AutoML
                            compute_target = aml_compute,
                            training_data = p.GoldTrain, # is 'train_6' pandas dataframe, but as an Azure ML Dataset
                            experiment_exit_score = '0.208', # DEMO purpose
                            label_column_name = label,
                            **automl_performance_config
                        )
via_pipeline = False
best_run, fitted_model, experiment = AutoMLFactory(p).train_pipeline(automl_config) if via_pipeline else AutoMLFactory(p).train_as_run(automl_config)

- ESML has now fetched `configuration & train compute` for enterprise `environment (DEV,TEST or PROD)`
- ESML has `autogenerated` a AutoML-experiment, optinally as `pipline`, in correct environment.
- User has overridden some AutoML settings (`label, split percentage`, `target metric`), and use the `1-liner TRAIN` code snippet 

## 2b) ESML Scoring compare `1-codeline`: Promote model or not? If better, then `Register model`
- `IF` newly trained model in `current` environment (`DEV`, `TEST` or `PROD`) scores BETTER than existing model in `target` environment, then `new model` can be registered and promoted.

In [None]:
p.dev_test_prod

In [None]:
from baselayer_azure_ml import AutoMLFactory
target_env = p.dev_test_prod #"dev", test, prod  = Target environment. Does Model A score better than Model B?
print("Example: If new model scores better in DEV, we can promote this to TEST")

promote, m1_name, r1_id, m2_name, r2_run_id = AutoMLFactory(p).compare_scoring_current_vs_new_model(target_env)

print("Promote model?  {}".format(promote))
print("New Model: {} in environment {}".format(m1_name, p.dev_test_prod))
print("Existing Model: {} in environment {}".format(m2_name,target_env))

if (promote and p.dev_test_prod == target_env):# Can only register a model in same workspace (test->test) - need to retrain if going from dev->test
    AutoMLFactory(p).register_active_model(target_env)


# 3) ESML `Deploy model ONLINE` in `2 lines of code` (AKS) 
- Deploy "offline" MODEL from old `run` in environment To →  `DEV`, `TEST` or `PROD` environment
- ESML saves `API_key in Azure keyvault automatically`
- ESML auto-config solves 4 common 'errors/things': `correct compute name` and `valid replicas, valid agents, valid auto scaling`
    - Tip: You can adjust the number of replicas, and different CPU/memory configuration, or using a different compute target.

In [None]:
p.dev_test_prod,ws.name

In [None]:
inference_config, model, best_run = p.get_active_model_inference_config(ws) #  AutoML support 
service,api_uri, kv_aks_api_secret= p.deploy_automl_model_to_aks(model,inference_config)

#Howto: Inject your own deployment config
# own_deploy_config_to_inject = p.ComputeFactory.get_deploy_config(p.dev_test_prod,False, "02","01")  # 1) Create your config, or use config-baseline, then modify
# service,api_uri, kv_aks_api_secret= p.deploy_automl_model_to_aks(model,inference_config,own_deploy_config_to_inject) #2) pass as argument
#

## 3b) ESML Test AKS webservice, `2 lines of code`

In [None]:
X_test, y_test, tags = p.get_gold_validate_Xy() # Get the X_test data, ESML knows the SPLIT and LABEL already (due to training)
print(tags)

caller_user_id = '81965d9c-40ca-4e47-9723-5a608a32a0e4'
df = p.call_webservice(p.ws, X_test,caller_user_id) # Auto-fetch key from keyvault, and calls the webservice
df.head()

# Get Scoring from AutoLake - with filter
- `Filters`:  Date, caller_id, model_v_used_that_day

In [None]:
import datetime as dt
now = dt.datetime.now()
date_filter = now.strftime('%Y_%m_%d') 

# FILTERS
print(date_filter)
caller_user_id = '81965d9c-40ca-4e47-9723-5a608a32a0e4'
model_v_used_when_scoring_that_day = "1"
ds_list1, df_all1 = p.get_scored(date_filter, model_v_used_when_scoring_that_day,caller_user_id)

print("Datasets found in filter", len(ds_list1))
print("Example: How many rows in 1st dataset? ", ds_list1[0].to_pandas_dataframe().shape[0])
print("All rows", df_all1.shape[0])

df_all1.head()

# END