In [1]:
import azureml.core

print("Azure SDK version:", azureml.core.VERSION)

Azure SDK version: 1.0.76


In [2]:
from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

cesardl-automl-northcentralus-ws
automlpmdemo
northcentralus
102a16c3-37d3-48a8-9237-4c9b1e8e80e0


In [3]:
from azureml.core import Environment

envs = Environment.list(workspace=ws)
        
# Use curated environment for Spark
spark_curated_environment = Environment.get(workspace=ws, name="AzureML-PySpark-MmlSpark-0.15")

# Copy based on curated environment
spark_env = spark_curated_environment
spark_env.name = "Custom-AzureML-PySpark-Environment"


In [4]:
# Update Environment with newer Docker Spark Image
from azureml.core import ContainerRegistry

# Set base Docker Image
spark_env.docker.enabled = True

# Specify custom Docker base image and registry, if you don't want to use the defaults
spark_env.docker.base_image="mcr.microsoft.com/mmlspark/release" 
container_registry = ContainerRegistry()
container_registry.address = "mcr.microsoft.com"
# container_registry.username = ""   # Use username if using a private Docker Registry like ACR
# container_registry.password = ""   # Use password if using a private Docker Registry like ACR
spark_env.docker.base_image_registry=container_registry

spark_env.save_to_directory(path="./spark_environment_definition", overwrite=True)

In [5]:
from azureml.core import Experiment
experiment_name = 'spark-experiment-on-aml-compute'
experiment = Experiment(workspace=ws, name=experiment_name)

In [6]:
compute = ws.compute_targets["cpu-cluster"]

In [7]:
# Create project directory and copy the training script into the project directory
import os
import shutil

project_folder = './project-submit-folder'
os.makedirs(project_folder, exist_ok=True)

# Copy the needed files
shutil.copy('spark-job.py', project_folder)
shutil.copy('spark-job-simple.py', project_folder)
shutil.copy('iris.csv', project_folder)

'./project-submit-folder/iris.csv'

In [8]:
from azureml.core import ScriptRunConfig, RunConfiguration, Experiment
from azureml.core.conda_dependencies import CondaDependencies

## use pyspark framework
spark_run_config = RunConfiguration(framework="pyspark")
spark_run_config.environment = spark_env
spark_run_config.target = compute

scriptconfig = ScriptRunConfig(source_directory="project-submit-folder", 
                               script="spark-job.py",
                               run_config = spark_run_config)

In [9]:
run = experiment.submit(scriptconfig)
run

Experiment,Id,Type,Status,Details Page,Docs Page
spark-experiment-on-aml-compute,spark-experiment-on-aml-compute_1580154909_5540b098,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


In [10]:
# Monitor run

from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'NOTSET',…

In [11]:
# See files associated with the 'Best run'
print(run.get_file_names())

[]


In [70]:
# get all metris logged in the run
metrics = run.get_metrics()
print(metrics)

{'Regularization Rate': 0.01, 'Accuracy': 0.9787234042553191}
