### Azure Environment

In [None]:
import azureml
from azureml.core import Workspace, Run
from azureml.core import Experiment
from azureml.core.authentication import  InteractiveLoginAuthentication
cli_auth = InteractiveLoginAuthentication(force=True, tenant_id=None)

In [None]:
ws = Workspace.from_config(path='./aml_config/config.json', auth = cli_auth)

### get or create computation target

In [None]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

#choos a name for cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpuML")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 2)

# This example uses CPU STANDARD_D2_V2 VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")

if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

     # For a more detailed view of current AmlCompute status, use the 'status' property    
    print(compute_target.status.serialize())

### data storage

In [None]:
ds = ws.get_default_datastore()
print(ds.datastore_type, ds.account_name, ds.container_name, sep='\t')

### upload data

In [None]:
ds.upload(src_dir='./data', 
          target_path='data', 
          overwrite=True, 
          show_progress=True)

### Experiments

In [None]:
exps = Experiment(workspace=ws, name='FlightDelays')

### Estimators

In [None]:
from azureml.train.estimator import Estimator

In [None]:
script_params = {
    '--data-folder': ds.as_mount(),
    '--max_depth': 4,
    '--random_state': 42,
    '--n_estimators': 100
}

ests = Estimator(source_directory='./script/',
                script_params=script_params,
                compute_target=compute_target,
                entry_script='main.py',
                conda_packages=['scikit-learn', 'pandas'],
                pip_packages=['joblib'])

### run experiments

In [None]:
run = exps.submit(config=ests)

In [None]:
run

##### if runs are alrady active

In [None]:
runs = exps.get_runs()

In [None]:
next(runs)

### Show Run

In [None]:
from azureml.widgets import RunDetails
RunDetails(run).show()

In [None]:
#run.cancel()
#run.wait_for_completion(show_output=True)

### collect results (metrics)

In [None]:
run.get_metrics()

#### Filenames

In [None]:
print(run.get_file_names())

## register models

In [None]:
#register model:

model = run.register_model(model_name='RandomForest', model_path='outputs/RandomForest.joblib')

print(model.name, model.id, model.version, sep = '\t')