In [None]:
import os
import azureml.core
from azureml.core import (
    Workspace,
    Experiment,
    Dataset,
    Datastore,
    ComputeTarget,
    Environment,
    ScriptRunConfig
)
from azureml.data import OutputFileDatasetConfig
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.pipeline.core import PublishedPipeline
from azureml.pipeline.core import PipelineData

print("Azure ML SDK Version: ", azureml.core.VERSION)

In [None]:
workspace = Workspace.from_config()
print(f"workspace: {workspace}")

exp = Experiment(workspace=workspace, name="gpu-fashion")

In [None]:
cluster_name = "gpu-cluster"

found = False

cts = workspace.compute_targets
if cluster_name in cts and cts[cluster_name].type == "AmlCompute":
    found = True
    print("Found existing compute target.")
    compute_target = cts[cluster_name]
if not found:
    print("Creating a new compute target...")
    compute_config = AmlCompute.provisioning_configuration(
        vm_size= "Standard_NV6",
        max_nodes=1,
    )
    compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=10
    )

In [None]:
run_config = RunConfiguration()
run_config.environment.docker.enabled = True

run_config.environment.docker.base_image = (
    "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.3-cudnn8-ubuntu20.04"
)

conda = CondaDependencies()
conda.add_pip_package('azureml-sdk[automl]')
conda.add_pip_package('opencv-python-headless')
conda.add_pip_package('tensorflow-gpu==2.11.0')

run_config.environment.python.conda_dependencies = conda

In [None]:
from azureml.pipeline.core import PipelineParameter
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference
from azureml.core import Dataset

script_folder = "./gpu-python"

training = PythonScriptStep(
    name="GPU Step",
    script_name="gpu.py",
    source_directory=script_folder,
    compute_target=compute_target,
    runconfig=run_config,
    allow_reuse=False,
)


pipeline = Pipeline(workspace, steps=[training])

print("Pipeline is built")
pipeline.validate()

In [None]:
run = exp.submit(pipeline, regenerate_outputs=True)