From 8a462a3f98a6dfc52e93628767eba0c8f7d7dc21 Mon Sep 17 00:00:00 2001 From: kdestin <101366538+kdestin@users.noreply.github.com> Date: Fri, 5 May 2023 14:29:38 -0400 Subject: [PATCH] Remove `kubernetes-compute-*` workflows that aren't testing a customer facing sample (#2252) * Remove kubernetes-compute* workflows that don't appear to have a customer facing example * Remove .github/kubernetes-compute --- .github/kubernetes-compute/convert.py | 107 --- .../papermill_count_failed_case.py | 48 -- .../sdk_testcase_collector.py | 42 - .github/kubernetes-compute/tool.sh | 793 ------------------ ...bernetes-compute-cpu-resources-cleanup.yml | 124 --- ...kubernetes-compute-cpu-resources-setup.yml | 127 --- ...bernetes-compute-gpu-resources-cleanup.yml | 124 --- ...kubernetes-compute-gpu-resources-setup.yml | 140 ---- .../kubernetes-compute-sdk-endpoints.yml | 191 ----- ...ompute-sdk-jobs-automl-standalone-jobs.yml | 190 ----- .../kubernetes-compute-sdk-jobs-pipeline.yml | 192 ----- ...ubernetes-compute-sdk-jobs-single-step.yml | 189 ----- ...-training-cli-jobs-automl-dynamic-data.yml | 119 --- ...netes-compute-training-cli-jobs-automl.yml | 94 --- ...es-compute-training-cli-jobs-bad-cases.yml | 85 -- ...netes-compute-training-cli-jobs-basics.yml | 94 --- ...es-compute-training-cli-jobs-pipelines.yml | 93 -- ...te-training-cli-jobs-single-step-spark.yml | 94 --- ...-compute-training-cli-jobs-single-step.yml | 93 -- .../kubernetes-compute-workspace-setup.yml | 262 ------ 20 files changed, 3201 deletions(-) delete mode 100644 .github/kubernetes-compute/convert.py delete mode 100644 .github/kubernetes-compute/papermill_count_failed_case.py delete mode 100644 .github/kubernetes-compute/sdk_testcase_collector.py delete mode 100644 .github/kubernetes-compute/tool.sh delete mode 100644 .github/workflows/kubernetes-compute-cpu-resources-cleanup.yml delete mode 100644 .github/workflows/kubernetes-compute-cpu-resources-setup.yml delete mode 100644 .github/workflows/kubernetes-compute-gpu-resources-cleanup.yml delete mode 100644 .github/workflows/kubernetes-compute-gpu-resources-setup.yml delete mode 100644 .github/workflows/kubernetes-compute-sdk-endpoints.yml delete mode 100644 .github/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml delete mode 100644 .github/workflows/kubernetes-compute-sdk-jobs-pipeline.yml delete mode 100644 .github/workflows/kubernetes-compute-sdk-jobs-single-step.yml delete mode 100644 .github/workflows/kubernetes-compute-training-cli-jobs-automl-dynamic-data.yml delete mode 100644 .github/workflows/kubernetes-compute-training-cli-jobs-automl.yml delete mode 100644 .github/workflows/kubernetes-compute-training-cli-jobs-bad-cases.yml delete mode 100644 .github/workflows/kubernetes-compute-training-cli-jobs-basics.yml delete mode 100644 .github/workflows/kubernetes-compute-training-cli-jobs-pipelines.yml delete mode 100644 .github/workflows/kubernetes-compute-training-cli-jobs-single-step-spark.yml delete mode 100644 .github/workflows/kubernetes-compute-training-cli-jobs-single-step.yml delete mode 100644 .github/workflows/kubernetes-compute-workspace-setup.yml diff --git a/.github/kubernetes-compute/convert.py b/.github/kubernetes-compute/convert.py deleted file mode 100644 index e9cd6dd24c..0000000000 --- a/.github/kubernetes-compute/convert.py +++ /dev/null @@ -1,107 +0,0 @@ -import argparse -import yaml -import os - - -def convert(input_file, compute_target, instance_type, common_runtime, output_file): - def _convert(input_file, data, job_schema): - # check job type - is_pipeline_job = False - is_sweep_job = False - if "pipelineJob" in job_schema or "jobs" in data: - is_pipeline_job = True - if "sweepJob" in job_schema or data.get("type") == "sweep": - is_sweep_job = True - - print("Job type: pipelineJob", is_pipeline_job, "sweepJob:", is_sweep_job) - - # change compute target - if compute_target: - data["compute"] = "azureml:%s" % compute_target - if is_pipeline_job: - settings = data.get("settings", {}) - settings["default_compute"] = "azureml:%s" % compute_target - data["settings"] = settings - - # set instance type - if not is_pipeline_job and instance_type: - resources = data.get("resources", {}) - resources["instance_type"] = instance_type - data["resources"] = resources - - for field in ["trial", "component"]: - if field not in data: - continue - - file_field = data[field] - if not isinstance(file_field, str): - continue - - if file_field.startswith("file:"): - file_field = file_field.split(":", 1)[1] - - print("Found sub job spec:", file_field) - dirname = os.path.dirname(input_file) - convert( - os.path.join(dirname, file_field), - compute_target, - instance_type, - common_runtime, - "", - ) - - if is_pipeline_job: - jobs = data.get("jobs", {}) - for step in jobs: - print("Found step:", step) - _convert(input_file, jobs[step], "") - return - - print("Processing file:", input_file) - if not os.path.exists(input_file): - print("Warning: File doesn't exist: ", input_file) - return - with open(input_file, "r") as f: - data = yaml.load(f, Loader=yaml.FullLoader) - job_schema = data.get("$schema", "") - _convert(input_file, data, job_schema) - - # write to output file if output file is specified, otherwise change inplace. - if output_file: - with open(output_file, "w") as f: - yaml.dump(data, f) - else: - with open(input_file, "w") as f: - yaml.dump(data, f) - - -if __name__ == "__main__": - # Parse command line arguments - parser = argparse.ArgumentParser( - description="Convert test case to AMLARC-compatible files." - ) - parser.add_argument("-i", "--input", required=True, help="Input test case file") - parser.add_argument( - "-o", - "--output", - required=False, - help="Output AMLARC-compatible file, if not provides, " "replace file inplace", - ) - parser.add_argument("-c", "--compute-target", required=False, help="Compute target") - parser.add_argument("-it", "--instance-type", required=False, help="Instance type") - parser.add_argument( - "-cr", - "--common-runtime", - required=False, - default=False, - action="store_true", - help='Enable common runtime explicitly, default is "false"', - ) - args = parser.parse_args() - convert( - args.input, - args.compute_target, - args.instance_type, - args.common_runtime, - args.output, - ) diff --git a/.github/kubernetes-compute/papermill_count_failed_case.py b/.github/kubernetes-compute/papermill_count_failed_case.py deleted file mode 100644 index 2e1f4f263b..0000000000 --- a/.github/kubernetes-compute/papermill_count_failed_case.py +++ /dev/null @@ -1,48 +0,0 @@ -import argparse -import json -import os - - -def check_test_case(input_file): - error_count = 0 - error_list = [] - not_run_list = [] - - with open(input_file) as f: - files = f.readlines() - for file in files: - file = file.replace("\n", "") - if ".ipynb" in file: - file = file.replace(".ipynb", ".output.ipynb") - if not os.path.isfile(file): - not_run_list.append(file) - continue - with open(file) as output_file: - output_file_obj = json.load(output_file) - if ( - "An Exception was encountered at" - in output_file_obj["cells"][0]["source"][0] - ): - error_count += 1 - error_list.append(file) - - if error_count != 0: - for err in error_list: - print(err) - - if len(not_run_list) > 0: - print("\nThese test case are skipped") - for not_run in not_run_list: - print(not_run) - - raise Exception("Error occurs in these test cases") - - -if __name__ == "__main__": - # Parse command line arguments - parser = argparse.ArgumentParser(description="Check all papermill output files.") - parser.add_argument("-i", "--input", required=True, help="job ipynb file list") - - args = parser.parse_args() - - check_test_case(args.input) diff --git a/.github/kubernetes-compute/sdk_testcase_collector.py b/.github/kubernetes-compute/sdk_testcase_collector.py deleted file mode 100644 index c077c9055a..0000000000 --- a/.github/kubernetes-compute/sdk_testcase_collector.py +++ /dev/null @@ -1,42 +0,0 @@ -import argparse -import pathlib -import yaml -import re - - -def collect_test_cases(output_file, regex): - root_dir = ".github/workflows" - root = pathlib.Path(root_dir) - - testcases = [] - for item in root.iterdir(): - testcase_filename = str(item).split("/")[-1] - # print(testcase_filename) - if re.match(regex, testcase_filename) is not None: - print(testcase_filename) - # testcases.append(testcase_filename) - yaml_stream = open(item) - yaml_obj = yaml.load(yaml_stream, Loader=yaml.Loader) - for step in yaml_obj["jobs"]["build"]["steps"]: - if ".ipynb" in step["name"]: - work_dir = step["working-directory"] - notebook_name = step["name"].split("/")[-1] - testcases.append(f"{work_dir}/{notebook_name}\n") - - with open(output_file, "w") as f: - f.writelines(testcases) - - -if __name__ == "__main__": - # Parse command line arguments - parser = argparse.ArgumentParser( - description="Collect all sdk test case with a regex." - ) - parser.add_argument("-r", "--regex", required=True, help="test case name selector") - parser.add_argument( - "-o", "--output", required=False, help="the file selected test case send to" - ) - - args = parser.parse_args() - - collect_test_cases(args.output, args.regex) diff --git a/.github/kubernetes-compute/tool.sh b/.github/kubernetes-compute/tool.sh deleted file mode 100644 index 86b9ec885b..0000000000 --- a/.github/kubernetes-compute/tool.sh +++ /dev/null @@ -1,793 +0,0 @@ -## This script provides functions to facilitate cluster setup and job testing on Arc Enabled ML compute -set -x - -# Global variables -export SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) -export LOCK_FILE=${SCRIPT_DIR}/"$(basename ${BASH_SOURCE[0]})".lock -export RESULT_FILE=${SCRIPT_DIR}/kubernetes-compute-test-result.txt -export MAX_RETRIES=60 -export SLEEP_SECONDS=20 - -# Resource group -export SUBSCRIPTION="${SUBSCRIPTION:-subscription}" -export RESOURCE_GROUP="${RESOURCE_GROUP:-amlarc-examples-rg}" -export LOCATION="${LOCATION:-eastus}" - -# AKS -export AKS_CLUSTER_PREFIX="${AKS_CLUSTER_PREFIX:-amlarc-aks}" -export VM_SKU="${VM_SKU:-Standard_D4s_v3}" -export MIN_COUNT="${MIN_COUNT:-3}" -export MAX_COUNT="${MAX_COUNT:-8}" -export AKS_CLUSTER_NAME=${AKS_CLUSTER_NAME:-$(echo ${AKS_CLUSTER_PREFIX}-${VM_SKU} | tr -d '_')} -export AKS_LOCATION="${AKS_LOCATION:-$LOCATION}" -export AKS_RESOURCE_ID="/subscriptions/$SUBSCRIPTION/resourceGroups/$RESOURCE_GROUP/providers/Microsoft.ContainerService/managedClusters/$AKS_CLUSTER_NAME" - -# ARC -export ARC_CLUSTER_PREFIX="${ARC_CLUSTER_PREFIX:-amlarc-arc}" -export ARC_CLUSTER_NAME=${ARC_CLUSTER_NAME:-$(echo ${ARC_CLUSTER_PREFIX}-${VM_SKU} | tr -d '_')} -export ARC_LOCATION="${ARC_LOCATION:-$LOCATION}" -export ARC_RESOURCE_ID="/subscriptions/$SUBSCRIPTION/resourceGroups/$RESOURCE_GROUP/providers/Microsoft.Kubernetes/ConnectedClusters/$ARC_CLUSTER_NAME" - -# Extension -export RELEASE_TRAIN="${RELEASE_TRAIN:-staging}" -export RELEASE_NAMESPACE="${RELEASE_NAMESPACE:-azureml}" -export EXTENSION_NAME="${EXTENSION_NAME:-amlarc-extension}" -export EXTENSION_TYPE="${EXTENSION_TYPE:-Microsoft.AzureML.Kubernetes}" -export EXTENSION_SETTINGS="${EXTENSION_SETTINGS:-enableTraining=True enableInference=True allowInsecureConnections=True inferenceRouterServiceType=loadBalancer}" -export CLUSTER_TYPE="${CLUSTER_TYPE:-connectedClusters}" # or managedClusters -if [ "${CLUSTER_TYPE}" == "connectedClusters" ]; then - export CLUSTER_NAME=${CLUSTER_NAME:-$ARC_CLUSTER_NAME} - export RESOURCE_ID=${RESOURCE_ID:-$ARC_RESOURCE_ID} -else - # managedClusters - export CLUSTER_NAME=${CLUSTER_NAME:-$AKS_CLUSTER_NAME} - export RESOURCE_ID=${RESOURCE_ID:-$AKS_RESOURCE_ID} -fi - -# Workspace and Compute -export WORKSPACE="${WORKSPACE:-amlarc-githubtest-ws}" # $((1 + $RANDOM % 100)) -export COMPUTE="${COMPUTE:-githubtest}" -export INSTANCE_TYPE_NAME="${INSTANCE_TYPE_NAME:-defaultinstancetype}" -export CPU="${CPU:-1}" -export MEMORY="${MEMORY:-4Gi}" -export GPU="${GPU:-null}" - -refresh_lock_file(){ - rm -f $LOCK_FILE - echo $(date) > $LOCK_FILE -} - -remove_lock_file(){ - rm -f $LOCK_FILE -} - -check_lock_file(){ - if [ -f $LOCK_FILE ]; then - echo true - return 0 - else - echo false - return 1 - fi -} - -set_default_env(){ - echo "SUBSCRIPTION=6560575d-fa06-4e7d-95fb-f962e74efd7a" | tee -a $GITHUB_ENV - echo "RESOURCE_GROUP=azureml-examples" | tee -a $GITHUB_ENV - echo "WORKSPACE=amlarc-githubtest-ws" | tee -a $GITHUB_ENV - echo "LOCATION=eastus" | tee -a $GITHUB_ENV - echo "FILE_TICKET=true" | tee -a $GITHUB_ENV - echo "KEY_VAULT_NAME=amlarcgithubworkflowkv" | tee -a $GITHUB_ENV - echo "REPOSITORY=https://github.com/Azure/azureml-examples" | tee -a $GITHUB_ENV -} - -install_tools(){ - - az upgrade --all --yes - az extension add -n connectedk8s --yes - az extension add -n k8s-extension --yes - az extension add -n ml --yes - - curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl \ - && chmod +x ./kubectl \ - && sudo mv ./kubectl /usr/local/bin/kubectl - - pip install azureml-core - pip install shyaml - - pip list || true - az version || true -} - -register_provider(){ - - # For aks - az provider register --namespace Microsoft.ContainerService - - # For arc - az provider register -n 'Microsoft.Kubernetes' - - # For amlarc extension - az provider register --namespace Microsoft.Relay - az provider register --namespace Microsoft.KubernetesConfiguration - az provider register --namespace Microsoft.ContainerService - az feature register --namespace Microsoft.ContainerService -n AKS-ExtensionManager - - # For workspace - az provider register --namespace Microsoft.Storage - -} - -# setup RG -setup_resource_group(){ - # create resource group - az group show \ - --subscription $SUBSCRIPTION \ - -n "$RESOURCE_GROUP" || \ - az group create \ - --subscription $SUBSCRIPTION \ - -l "$LOCATION" \ - -n "$RESOURCE_GROUP" -} - -# setup AKS -setup_aks(){ - # create aks cluster - az aks show \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $AKS_CLUSTER_NAME || \ - az aks create \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --location $AKS_LOCATION \ - --name $AKS_CLUSTER_NAME \ - --enable-cluster-autoscaler \ - --node-count $MIN_COUNT \ - --min-count $MIN_COUNT \ - --max-count $MAX_COUNT \ - --node-vm-size ${VM_SKU} \ - --no-ssh-key \ - $@ - - check_aks_status - -} - -check_aks_status(){ - for i in $(seq 1 $MAX_RETRIES); do - provisioningState=$(az aks show \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $AKS_CLUSTER_NAME \ - --query provisioningState -o tsv) - echo "provisioningState: $provisioningState" - if [[ $provisioningState != "Succeeded" ]]; then - sleep ${SLEEP_SECONDS} - else - break - fi - done - - [[ $provisioningState == "Succeeded" ]] -} - -get_kubeconfig(){ - az aks get-credentials \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $AKS_CLUSTER_NAME \ - --overwrite-existing -} - -# connect cluster to ARC -connect_arc(){ - # get aks kubeconfig - get_kubeconfig - - # attach cluster to Arc - az connectedk8s show \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $ARC_CLUSTER_NAME || \ - az connectedk8s connect \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --location $ARC_LOCATION \ - --name $ARC_CLUSTER_NAME --no-wait \ - $@ - - check_arc_status -} - -check_arc_status(){ - for i in $(seq 1 $MAX_RETRIES); do - connectivityStatus=$(az connectedk8s show \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $ARC_CLUSTER_NAME \ - --query connectivityStatus -o tsv) - echo "connectivityStatus: $connectivityStatus" - if [[ $connectivityStatus != "Connected" ]]; then - sleep ${SLEEP_SECONDS} - else - break - fi - done - - [[ $connectivityStatus == "Connected" ]] -} - -# install extension -install_extension(){ - REINSTALL_EXTENSION="${REINSTALL_EXTENSION:-true}" - - if [[ $REINSTALL_EXTENSION == "true" ]]; then - # remove extension if exists to avoid missing the major version upgrade. - az k8s-extension delete \ - --cluster-name $CLUSTER_NAME \ - --cluster-type $CLUSTER_TYPE \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $EXTENSION_NAME \ - --yes || true - - # install extension - az k8s-extension create \ - --cluster-name $CLUSTER_NAME \ - --cluster-type $CLUSTER_TYPE \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $EXTENSION_NAME \ - --extension-type $EXTENSION_TYPE \ - --scope cluster \ - --release-train $RELEASE_TRAIN \ - --configuration-settings $EXTENSION_SETTINGS \ - --no-wait \ - $@ - else - az k8s-extension show \ - --cluster-name $CLUSTER_NAME \ - --cluster-type $CLUSTER_TYPE \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $EXTENSION_NAME || \ - az k8s-extension create \ - --cluster-name $CLUSTER_NAME \ - --cluster-type $CLUSTER_TYPE \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $EXTENSION_NAME \ - --extension-type $EXTENSION_TYPE \ - --scope cluster \ - --release-train $RELEASE_TRAIN \ - --configuration-settings $EXTENSION_SETTINGS \ - --no-wait \ - $@ - fi - - check_extension_status -} - -check_extension_status(){ - for i in $(seq 1 $MAX_RETRIES); do - provisioningState=$(az k8s-extension show \ - --cluster-name $CLUSTER_NAME \ - --cluster-type $CLUSTER_TYPE \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $EXTENSION_NAME \ - --query provisioningState -o tsv) - echo "provisioningState: $provisioningState" - if [[ $provisioningState != "Succeeded" ]]; then - sleep ${SLEEP_SECONDS} - else - break - fi - done - - [[ $provisioningState == "Succeeded" ]] -} - -# setup workspace -setup_workspace(){ - - az ml workspace show \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $WORKSPACE || \ - az ml workspace create \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --location $LOCATION \ - --name $WORKSPACE \ - $@ - - az ml workspace update \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --name $WORKSPACE \ - --public-network-access Enabled - -} - -# setup compute -setup_compute(){ - - COMPUTE_NS=${COMPUTE_NS:-default} - - az ml compute attach \ - --subscription $SUBSCRIPTION \ - --resource-group $RESOURCE_GROUP \ - --workspace-name $WORKSPACE \ - --type Kubernetes \ - --resource-id "$RESOURCE_ID" \ - --namespace "$COMPUTE_NS" \ - --name $COMPUTE \ - $@ - -} - -setup_instance_type(){ - INSTANCE_TYPE_NAME="${1:-$INSTANCE_TYPE_NAME}" - CPU="${2:-$CPU}" - MEMORY="${3:-$MEMORY}" - GPU="${4:-$GPU}" - - cat < .azureml/config.json -{ - "subscription_id": "$SUBSCRIPTION", - "resource_group": "$RESOURCE_GROUP", - "workspace_name": "$WORKSPACE" -} -EOF -} - -install_jupyter_dependency(){ - pip install jupyter - pip install notebook - ipython kernel install --name "amlarc" --user - pip install matplotlib numpy scikit-learn==0.22.1 numpy joblib glob2 - pip install azureml.core - pip install azure.cli.core - pip install azureml.opendatasets - pip install azureml.widgets - pip list || true -} - -# run jupyter test -run_jupyter_test(){ - JOB_SPEC="${1:-examples/training/simple-train-sdk/img-classification-training.ipynb}" - JOB_DIR=$(dirname $JOB_SPEC) - JOB_FILE=$(basename $JOB_SPEC) - - echo "[JobSubmission] $JOB_SPEC" | tee -a $RESULT_FILE - - cd $JOB_DIR - jupyter nbconvert --debug --execute $JOB_FILE --to python - status=$? - cd - - - echo $status - if [[ "$status" == "0" ]] - then - echo "[JobStatus] $JOB_SPEC ${JOB_STATUS_COMPLETED}" | tee -a $RESULT_FILE - else - echo "[JobStatus] $JOB_SPEC ${JOB_STATUS_FAILED}" | tee -a $RESULT_FILE - return 1 - fi -} - -# run python test -run_py_test(){ - JOB_SPEC="${1:-python-sdk/workflows/train/fastai/mnist/job.py}" - JOB_DIR=$(dirname $JOB_SPEC) - JOB_FILE=$(basename $JOB_SPEC) - - echo "[JobSubmission] $JOB_SPEC" | tee -a $RESULT_FILE - - cd $JOB_DIR - python $JOB_FILE - status=$? - cd - - - echo $status - if [[ "$status" == "0" ]] - then - echo "[JobStatus] $JOB_SPEC ${JOB_STATUS_COMPLETED}" | tee -a $RESULT_FILE - else - echo "[JobStatus] $JOB_SPEC ${JOB_STATUS_FAILED}" | tee -a $RESULT_FILE - return 1 - fi -} - -# count result -count_result(){ - - MIN_SUCCESS_NUM=${MIN_SUCCESS_NUM:--1} - - [ ! -f $RESULT_FILE ] && touch $RESULT_FILE - - echo "RESULT:" - cat $RESULT_FILE - - total=$(grep -c "\[JobSubmission\]" $RESULT_FILE) - success=$(grep "\[JobStatus\]" $RESULT_FILE | grep -ic ${JOB_STATUS_COMPLETED}) - unhealthy=$(( $total - $success )) - - echo "Total: ${total}, Success: ${success}, Unhealthy: ${unhealthy}, MinSuccessNum: ${MIN_SUCCESS_NUM}." - - if (( 10#${unhealthy} > 0 )) ; then - echo "There are $unhealthy unhealthy jobs." - echo "Unhealthy jobs:" - grep "\[JobStatus\]" $RESULT_FILE | grep -iv ${JOB_STATUS_COMPLETED} - return 1 - fi - - if (( 10#${MIN_SUCCESS_NUM} > 10#${success} )) ; then - echo "There should be at least ${MIN_SUCCESS_NUM} success jobs. Found ${success} success jobs." - return 1 - fi - - echo "All tests passed." -} - - -######################################## -## -## Upload metrics funcs -## -######################################## -export CERT_PATH=$(pwd)/certs -export CONTAINER_NAME=amltestmdmcontinaer -export STATSD_PORT=38125 -export REPOSITORY="${REPOSITORY:-Repository}" -export WORKFLOW="${WORKFLOW:-Workflow}" -export REPEAT="${REPEAT:-5}" - -report_metrics(){ - # download metrics dependency - install_mdm_dependency - - # download certificates - export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD - export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD - export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD - export KEY_PEM_NAME=AMLARC-KEY-PEM - export CERT_PEM_NAME=AMLARC-CERT-PEM - bash .github/kubernetes-compute/tool.sh download_metrics_info - - # start mdm - bash .github/kubernetes-compute/tool.sh start_mdm_container - - # upload metrics - $@ - - # stop mdm - stop_mdm_container - - rm -f metric_endpoint.txt - rm -f mdm_account.txt - rm -f mdm_namespace.txt - rm -f $CERT_PATH/key.pem - rm -f $CERT_PATH/cert.pem -} - -install_mdm_dependency(){ - sudo apt install socat -} - -download_metrics_info(){ - KEY_VAULT_NAME=${KEY_VAULT_NAME:-kvname} - METRIC_ENDPOINT_NAME=${METRIC_ENDPOINT_NAME:-METRIC-ENDPOINT} - MDM_ACCOUNT_NAME=${MDM_ACCOUNT_NAME:-MDM-ACCOUNT} - MDM_NAMESPACE_NAME=${MDM_NAMESPACE_NAME:-MDM-NAMESPACE} - KEY_PEM_NAME=${KEY_PEM_NAME:-KEY-PEM} - CERT_PEM_NAME=${CERT_PEM_NAME:-CERT-PEM} - - mkdir -p $CERT_PATH - - az keyvault secret download --vault-name $KEY_VAULT_NAME --name $METRIC_ENDPOINT_NAME -f metric_endpoint.txt - az keyvault secret download --vault-name $KEY_VAULT_NAME --name $MDM_ACCOUNT_NAME -f mdm_account.txt - az keyvault secret download --vault-name $KEY_VAULT_NAME --name $MDM_NAMESPACE_NAME -f mdm_namespace.txt - az keyvault secret download --vault-name $KEY_VAULT_NAME --name $KEY_PEM_NAME -f $CERT_PATH/key.pem - az keyvault secret download --vault-name $KEY_VAULT_NAME --name $CERT_PEM_NAME -f $CERT_PATH/cert.pem -} - -start_mdm_container(){ - - METRIC_ENDPOINT="${METRIC_ENDPOINT:-$(cat metric_endpoint.txt)}" - MDM_ACCOUNT="${MDM_ACCOUNT:-$(cat mdm_account.txt )}" - MDM_NAMESPACE="${MDM_NAMESPACE:-$(cat mdm_namespace.txt)}" - - METRIC_ENDPOINT_ARG="-e METRIC_ENDPOINT=${METRIC_ENDPOINT}" - if [ "$METRIC_ENDPOINT" = "METRIC-ENDPOINT-PROD" ]; then - METRIC_ENDPOINT_ARG="" - fi - - docker run -d \ - --name=$CONTAINER_NAME \ - -v ${CERT_PATH}:/certs \ - --net=host --uts=host \ - -e MDM_ACCOUNT=${MDM_ACCOUNT} \ - -e MDM_NAMESPACE=${MDM_NAMESPACE} \ - -e MDM_INPUT=statsd_udp \ - -e STATSD_PORT=${STATSD_PORT} \ - -e MDM_LOG_LEVEL=Debug \ - -e CERT_FILE=/certs/cert.pem \ - -e KEY_FILE=/certs/key.pem \ - linuxgeneva-microsoft.azurecr.io/genevamdm \ - $METRIC_ENDPOINT_ARG - - show_mdm_container -} - -show_mdm_container(){ - docker ps -a \ - --format "table {{.ID}}\t{{.Names}}\t{{.Networks}}\t{{.State}}\t{{.CreatedAt}}\t{{.Image}}" \ - -f name=$CONTAINER_NAME -} - -stop_mdm_container(){ - show_mdm_container - docker stop $CONTAINER_NAME - docker rm -f $CONTAINER_NAME - show_mdm_container -} - -upload_cluster_setup_metrics(){ - MDM_ACCOUNT="${MDM_ACCOUNT:-$(cat mdm_account.txt )}" - MDM_NAMESPACE="${MDM_NAMESPACE:-$(cat mdm_namespace.txt)}" - METRIC_NAME="${METRIC_NAME:-GithubWorkflowClusterSetup}" - VALUE="${VALUE:-1}" - - for i in $(seq 1 $REPEAT); do - echo '{"Account":"'${MDM_ACCOUNT}'","Namespace":"'${MDM_NAMESPACE}'","Metric":"'${METRIC_NAME}'", "Dims": { "Repository":"'${REPOSITORY}'", "Workflow":"'${WORKFLOW}'"}}:'${VALUE}'|g' | socat -t 1 - UDP-SENDTO:127.0.0.1:${STATSD_PORT} - sleep 60 - done - -} - -upload_test_result_metrics(){ - MDM_ACCOUNT="${MDM_ACCOUNT:-$(cat mdm_account.txt )}" - MDM_NAMESPACE="${MDM_NAMESPACE:-$(cat mdm_namespace.txt)}" - METRIC_HEARTBEAT_NAME="${METRIC_HEARTBEAT_NAME:-GithubWorkflowHeartBeat}" - METRIC_NAME="${METRIC_NAME:-GithubWorkflowTestResult}" - - jobs=$(grep "\[JobSubmission\]" $RESULT_FILE) - echo "Found $(echo "$jobs"| wc -l) jobs" - - for i in $(seq 1 $REPEAT); do - # Report heartbeat - VALUE=100 - echo '{"Account":"'${MDM_ACCOUNT}'","Namespace":"'${MDM_NAMESPACE}'","Metric":"'${METRIC_HEARTBEAT_NAME}'", "Dims": { "Repository":"'${REPOSITORY}'", "Workflow":"'${WORKFLOW}'"}}:'${VALUE}'|g' | socat -t 1 - UDP-SENDTO:127.0.0.1:${STATSD_PORT} - - while IFS= read -r job; do - job=$(echo $job| awk '{print $2}') - jobstatus=$(grep "\[JobStatus\]" $RESULT_FILE | grep $job | awk '{print $3}') - echo "Report metrics for job: $job status: $jobstatus" - - VALUE=0 - if [ "${jobstatus}" == "${JOB_STATUS_COMPLETED}" ]; then - VALUE=100 - fi - - # Report test result - echo '{"Account":"'${MDM_ACCOUNT}'","Namespace":"'${MDM_NAMESPACE}'","Metric":"'${METRIC_NAME}'", "Dims": {"Job":"'${job}'", "REPOSITORY":"'${REPOSITORY}'", "Workflow":"'${WORKFLOW}'"}}:'${VALUE}'|g' | socat -t 1 - UDP-SENDTO:127.0.0.1:${STATSD_PORT} - sleep 2 - done <<< $(echo "$jobs") - - sleep 60 - done - -} - - -if [ "$0" = "$BASH_SOURCE" ]; then - $@ -fi diff --git a/.github/workflows/kubernetes-compute-cpu-resources-cleanup.yml b/.github/workflows/kubernetes-compute-cpu-resources-cleanup.yml deleted file mode 100644 index 0e3521c37a..0000000000 --- a/.github/workflows/kubernetes-compute-cpu-resources-cleanup.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: kubernetes-compute-cpu-resources-cleanup -on: - schedule: - - cron: "0 16 * * *" - workflow_dispatch: - inputs: - DELETE_ENDPOINTS: - description: 'Whether to delete endpoints: true or false' - required: true - default: 'false' - CLEANUP_WORKSPACE: - description: 'Whether to delete workspace: true or false' - required: true - default: 'false' - UNINSTALL_EXTENSION: - description: 'Whether to uninstall extension: true or false' - required: true - default: 'true' - CLEANUP_CLUSTER: - description: 'Whether to delete cluster: true or false' - required: true - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - VM_SKU: - description: 'The VM SKU of the cluster' - required: false - default: 'Standard_D4s_v3' - AKS_CLUSTER_PREFIX: - description: 'The prefix of the cluster' - required: false - default: 'amlarc-aks' - -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - VM_SKU: Standard_D4s_v3 - MIN_COUNT: 5 - COMPUTE: "cpu-cluster" - CLUSTER_TYPE: managedClusters - - DELETE_ENDPOINTS: true - CLEANUP_WORKSPACE: false - UNINSTALL_EXTENSION: true - CLEANUP_CLUSTER: false - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "DELETE_ENDPOINTS=${{ github.event.inputs.DELETE_ENDPOINTS }}" | tee -a $GITHUB_ENV - echo "CLEANUP_WORKSPACE=${{ github.event.inputs.CLEANUP_WORKSPACE }}" | tee -a $GITHUB_ENV - echo "UNINSTALL_EXTENSION=${{ github.event.inputs.UNINSTALL_EXTENSION }}" | tee -a $GITHUB_ENV - echo "CLEANUP_CLUSTER=${{ github.event.inputs.CLEANUP_CLUSTER }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - echo "VM_SKU=${{ github.event.inputs.VM_SKU }}" | tee -a $GITHUB_ENV - echo "AKS_CLUSTER_PREFIX=${{ github.event.inputs.AKS_CLUSTER_PREFIX }}" | tee -a $GITHUB_ENV - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: install tools - run: bash .github/kubernetes-compute/tool.sh install_tools - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - # cleanup resources - - name: delete_endpoints - if: ${{ always() }} - run: | - if [ "$DELETE_ENDPOINTS" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_endpoints - fi - timeout-minutes: 60 - - name: delete_compute - if: ${{ always() }} - run: | - if [ "$CLEANUP_WORKSPACE" == 'true' ] || [ "$UNINSTALL_EXTENSION" == 'true' ] ; then - bash .github/kubernetes-compute/tool.sh delete_compute || true - fi - timeout-minutes: 60 - - name: delete_workspace - if: ${{ always() }} - run: | - if [ "$CLEANUP_WORKSPACE" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_workspace - fi - timeout-minutes: 60 - - name: delete_extension - if: ${{ always() }} - run: | - if [ "$UNINSTALL_EXTENSION" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_extension - fi - timeout-minutes: 60 - - name: delete_cluster - if: ${{ always() }} - run: | - if [ "$CLEANUP_CLUSTER" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_aks - fi - timeout-minutes: 60 - - - name: cleanup terminating pods - if: ${{ always() }} - run: | - set +e - bash .github/kubernetes-compute/tool.sh get_kubeconfig - for i in $(kubectl get pod | grep Terminating | awk '{print $1}') ; do echo $i ; kubectl get pod $i; kubectl delete pod $i --force; done - timeout-minutes: 60 - - diff --git a/.github/workflows/kubernetes-compute-cpu-resources-setup.yml b/.github/workflows/kubernetes-compute-cpu-resources-setup.yml deleted file mode 100644 index 7f1ec29c3e..0000000000 --- a/.github/workflows/kubernetes-compute-cpu-resources-setup.yml +++ /dev/null @@ -1,127 +0,0 @@ -name: kubernetes-compute-cpu-resources-setup -on: - schedule: - - cron: "0 17 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - RELEASE_TRAIN: - description: 'Release version: experimental, staging or stable' - required: false - default: 'stable' - REINSTALL_EXTENSION: - description: 'Whether to reinstall extension: true or false' - required: false - default: 'false' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - EXTENSION_VERSION: - description: 'The version of k8s-extension' - required: false - default: '' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - VM_SKU: - description: 'The VM SKU of the cluster' - required: false - default: 'Standard_D4s_v3' - AKS_CLUSTER_PREFIX: - description: 'The prefix of the cluster' - required: false - default: 'amlarc-aks' - CPU_INSTANCE_TYPE: - description: 'cpu instance type' - required: false - default: '2 4Gi' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - LOCATION: eastus - WORKSPACE: amlarc-githubtest-ws - VM_SKU: Standard_D4s_v3 - MIN_COUNT: 5 - AKS_CLUSTER_PREFIX: amlarc-aks - CLUSTER_TYPE: managedClusters - RELEASE_TRAIN: stable - COMPUTE: "cpu-cluster" - EXTENSION_SETTINGS: "enableTraining=True enableInference=True inferenceRouterServiceType=loadBalancer allowInsecureConnections=True" - REINSTALL_EXTENSION: false - EXTENSION_VERSION: "" - CPU_INSTANCE_TYPE: "2 4Gi" - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - REPOSITORY: https://github.com/Azure/azureml-examples - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-cpu-resources-setup.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "RELEASE_TRAIN=${{ github.event.inputs.RELEASE_TRAIN }}" | tee -a $GITHUB_ENV - echo "REINSTALL_EXTENSION=${{ github.event.inputs.REINSTALL_EXTENSION }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "EXTENSION_VERSION=${{ github.event.inputs.EXTENSION_VERSION }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - echo "VM_SKU=${{ github.event.inputs.VM_SKU }}" | tee -a $GITHUB_ENV - echo "AKS_CLUSTER_PREFIX=${{ github.event.inputs.AKS_CLUSTER_PREFIX }}" | tee -a $GITHUB_ENV - echo "CPU_INSTANCE_TYPE=${{ github.event.inputs.CPU_INSTANCE_TYPE }}" | tee -a $GITHUB_ENV - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: install tools - run: bash .github/kubernetes-compute/tool.sh install_tools - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - # provision resources - - name: setup_aks - run: bash .github/kubernetes-compute/tool.sh setup_aks - timeout-minutes: 30 - - name: install_extension - run: | - if [ "$EXTENSION_VERSION" == "" ]; then - bash .github/kubernetes-compute/tool.sh install_extension - else - bash .github/kubernetes-compute/tool.sh install_extension --version $EXTENSION_VERSION - fi - timeout-minutes: 30 - - name: setup_workspace - run: bash .github/kubernetes-compute/tool.sh setup_workspace - timeout-minutes: 30 - - name: setup_compute - run: | - bash .github/kubernetes-compute/tool.sh setup_compute - timeout-minutes: 30 - - name: setup_instance_type - run: | - bash .github/kubernetes-compute/tool.sh get_kubeconfig - bash .github/kubernetes-compute/tool.sh setup_instance_type defaultinstancetype $CPU_INSTANCE_TYPE - bash .github/kubernetes-compute/tool.sh setup_instance_type cpu $CPU_INSTANCE_TYPE - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_cluster_setup_metrics - fi - timeout-minutes: 120 \ No newline at end of file diff --git a/.github/workflows/kubernetes-compute-gpu-resources-cleanup.yml b/.github/workflows/kubernetes-compute-gpu-resources-cleanup.yml deleted file mode 100644 index c3f72e7d95..0000000000 --- a/.github/workflows/kubernetes-compute-gpu-resources-cleanup.yml +++ /dev/null @@ -1,124 +0,0 @@ -name: kubernetes-compute-gpu-resources-cleanup -on: - schedule: - - cron: "0 16 * * *" - workflow_dispatch: - inputs: - DELETE_ENDPOINTS: - description: 'Whether to delete endpoints: true or false' - required: true - default: 'false' - CLEANUP_WORKSPACE: - description: 'Whether to delete workspace: true or false' - required: true - default: 'false' - UNINSTALL_EXTENSION: - description: 'Whether to uninstall extension: true or false' - required: true - default: 'true' - CLEANUP_CLUSTER: - description: 'Whether to delete cluster: true or false' - required: true - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - VM_SKU: - description: 'The VM SKU of the cluster' - required: false - default: 'STANDARD_NC12' - AKS_CLUSTER_PREFIX: - description: 'The prefix of the cluster' - required: false - default: 'amlarc-aks' - -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - VM_SKU: STANDARD_NC12 - COMPUTE: "gpu-cluster" - CLUSTER_TYPE: managedClusters - - DELETE_ENDPOINTS: true - CLEANUP_WORKSPACE: false - UNINSTALL_EXTENSION: true - CLEANUP_CLUSTER: false - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "DELETE_ENDPOINTS=${{ github.event.inputs.DELETE_ENDPOINTS }}" | tee -a $GITHUB_ENV - echo "CLEANUP_WORKSPACE=${{ github.event.inputs.CLEANUP_WORKSPACE }}" | tee -a $GITHUB_ENV - echo "UNINSTALL_EXTENSION=${{ github.event.inputs.UNINSTALL_EXTENSION }}" | tee -a $GITHUB_ENV - echo "CLEANUP_CLUSTER=${{ github.event.inputs.CLEANUP_CLUSTER }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - echo "VM_SKU=${{ github.event.inputs.VM_SKU }}" | tee -a $GITHUB_ENV - echo "AKS_CLUSTER_PREFIX=${{ github.event.inputs.AKS_CLUSTER_PREFIX }}" | tee -a $GITHUB_ENV - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: install tools - run: bash .github/kubernetes-compute/tool.sh install_tools - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - # cleanup resources - - name: delete_endpoints - if: ${{ always() }} - run: | - if [ "$DELETE_ENDPOINTS" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_endpoints - fi - timeout-minutes: 60 - - name: delete_compute - if: ${{ always() }} - run: | - if [ "$CLEANUP_WORKSPACE" == 'true' ] || [ "$UNINSTALL_EXTENSION" == 'true' ] ; then - bash .github/kubernetes-compute/tool.sh delete_compute || true - COMPUTE="cpu-cluster-lg" bash .github/kubernetes-compute/tool.sh delete_compute || true - fi - timeout-minutes: 60 - - name: delete_workspace - if: ${{ always() }} - run: | - if [ "$CLEANUP_WORKSPACE" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_workspace - fi - timeout-minutes: 60 - - name: delete_extension - if: ${{ always() }} - run: | - if [ "$UNINSTALL_EXTENSION" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_extension - fi - timeout-minutes: 60 - - name: delete_cluster - if: ${{ always() }} - run: | - if [ "$CLEANUP_CLUSTER" == 'true' ]; then - bash .github/kubernetes-compute/tool.sh delete_aks - fi - timeout-minutes: 60 - - - name: cleanup terminating pods - if: ${{ always() }} - run: | - set +e - bash .github/kubernetes-compute/tool.sh get_kubeconfig - for i in $(kubectl get pod | grep Terminating | awk '{print $1}') ; do echo $i ; kubectl get pod $i; kubectl delete pod $i --force; done - timeout-minutes: 60 - - diff --git a/.github/workflows/kubernetes-compute-gpu-resources-setup.yml b/.github/workflows/kubernetes-compute-gpu-resources-setup.yml deleted file mode 100644 index 06635a2d24..0000000000 --- a/.github/workflows/kubernetes-compute-gpu-resources-setup.yml +++ /dev/null @@ -1,140 +0,0 @@ -name: kubernetes-compute-gpu-resources-setup -on: - schedule: - - cron: "0 17 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - RELEASE_TRAIN: - description: 'Release version: experimental, staging or stable' - required: false - default: 'stable' - REINSTALL_EXTENSION: - description: 'Whether to reinstall extension: true or false' - required: false - default: 'false' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - EXTENSION_VERSION: - description: 'The version of k8s-extension' - required: false - default: '' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - VM_SKU: - description: 'The VM SKU of the cluster' - required: false - default: 'STANDARD_NC12' - AKS_CLUSTER_PREFIX: - description: 'The prefix of the cluster' - required: false - default: 'amlarc-aks' - CPU_INSTANCE_TYPE: - description: 'cpu instance type' - required: false - default: '4 40Gi' - GPU_INSTANCE_TYPE: - description: 'gpu instance type' - required: false - default: '4 40Gi 2' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - LOCATION: eastus - WORKSPACE: amlarc-githubtest-ws - VM_SKU: STANDARD_NC12 - MIN_COUNT: 4 - CLUSTER_TYPE: managedClusters - RELEASE_TRAIN: stable - COMPUTE: "gpu-cluster" - EXTENSION_SETTINGS: "enableTraining=True enableInference=True inferenceRouterServiceType=loadBalancer allowInsecureConnections=True installNvidiaDevicePlugin=True installDcgmExporter=True" - REINSTALL_EXTENSION: false - EXTENSION_VERSION: "" - AKS_CLUSTER_PREFIX: "amlarc-aks" - CPU_INSTANCE_TYPE: "4 40Gi" - GPU_INSTANCE_TYPE: "4 40Gi 2" - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - REPOSITORY: https://github.com/Azure/azureml-examples - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-gpu-resources-setup.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "RELEASE_TRAIN=${{ github.event.inputs.RELEASE_TRAIN }}" | tee -a $GITHUB_ENV - echo "REINSTALL_EXTENSION=${{ github.event.inputs.REINSTALL_EXTENSION }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "EXTENSION_VERSION=${{ github.event.inputs.EXTENSION_VERSION }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - echo "VM_SKU=${{ github.event.inputs.VM_SKU }}" | tee -a $GITHUB_ENV - echo "AKS_CLUSTER_PREFIX=${{ github.event.inputs.AKS_CLUSTER_PREFIX }}" | tee -a $GITHUB_ENV - echo "CPU_INSTANCE_TYPE=${{ github.event.inputs.CPU_INSTANCE_TYPE }}" | tee -a $GITHUB_ENV - echo "GPU_INSTANCE_TYPE=${{ github.event.inputs.GPU_INSTANCE_TYPE }}" | tee -a $GITHUB_ENV - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: install tools - run: bash .github/kubernetes-compute/tool.sh install_tools - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - # provision resources - - name: setup_aks - run: bash .github/kubernetes-compute/tool.sh setup_aks - timeout-minutes: 30 - - name: install_extension - run: | - if [ "$EXTENSION_VERSION" == "" ]; then - bash .github/kubernetes-compute/tool.sh install_extension - else - bash .github/kubernetes-compute/tool.sh install_extension --version $EXTENSION_VERSION - fi - timeout-minutes: 30 - - name: setup_workspace - run: bash .github/kubernetes-compute/tool.sh setup_workspace - timeout-minutes: 30 - - name: setup_compute - run: | - bash .github/kubernetes-compute/tool.sh setup_compute - timeout-minutes: 30 - - name: setup_compute - run: | - COMPUTE="cpu-cluster-lg" bash .github/kubernetes-compute/tool.sh setup_compute - COMPUTE="spark31" bash .github/kubernetes-compute/tool.sh setup_compute - timeout-minutes: 30 - - name: setup_instance_type - run: | - bash .github/kubernetes-compute/tool.sh get_kubeconfig - bash .github/kubernetes-compute/tool.sh setup_instance_type defaultinstancetype $GPU_INSTANCE_TYPE - bash .github/kubernetes-compute/tool.sh setup_instance_type cpu $CPU_INSTANCE_TYPE - bash .github/kubernetes-compute/tool.sh setup_instance_type gpu $GPU_INSTANCE_TYPE - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_cluster_setup_metrics - fi - timeout-minutes: 120 - diff --git a/.github/workflows/kubernetes-compute-sdk-endpoints.yml b/.github/workflows/kubernetes-compute-sdk-endpoints.yml deleted file mode 100644 index 5946736ca8..0000000000 --- a/.github/workflows/kubernetes-compute-sdk-endpoints.yml +++ /dev/null @@ -1,191 +0,0 @@ -name: kubernetes-compute-sdk-endpoints -on: - schedule: - - cron: "0 21 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^sdk-endpoints.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: '' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - pull_request: - branches: - - main - paths: - - '.github/workflows/kubernetes-compute-sdk-endpoints.yml' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - LOCATION: eastus - - JOB_SELECTOR: '^sdk-endpoints.*yml$' - JOB_FILTER: '' - JOB_LIST_FILE: sdk-endpoints.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - REPOSITORY: https://github.com/Azure/azureml-examples - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-sdk-endpoints.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - - - name: check out repo - uses: actions/checkout@v2 - - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: pip install notebook reqs - run: pip install -r sdk/dev-requirements.txt - - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: setup SDK - run: bash setup.sh - working-directory: sdk - continue-on-error: true - - - name: setup CLI - run: bash setup.sh - working-directory: cli - continue-on-error: true - - - name: collect jobs - run: | - python .github/kubernetes-compute/sdk_testcase_collector.py -o "$JOB_LIST_FILE" -r "$JOB_SELECTOR" - timeout-minutes: 30 - - - name: run jobs - run: | - for job in $(cat $JOB_LIST_FILE); do - if [[ "$job" = *"ipynb" ]]; then - echo "Run job: $job" - - FOLDER=$(dirname "$job") - FILE=$(basename "$job") - FOLDERBACK=$(echo $FOLDER | sed -e "s/[a-zA-Z0-9\-_-]*\//\.\.\//g") - FOLDERBACK=$(echo $FOLDERBACK | sed -r "s/\/[a-zA-Z0-9\-_-]+/\/\.\./g") - - echo "$FOLDER|$FILE|$FOLDERBACK" - - cd $FOLDER - sed -i -e "s//$SUBSCRIPTION/g" $FILE - sed -i -e "s//$RESOURCE_GROUP/g" $FILE - sed -i -e "s//$WORKSPACE/g" $FILE - sed -i -e "s//amlarc-inference/g" $FILE - sed -i -e "s//inferencecompute/g" $FILE - sed -i -e "s/DefaultAzureCredential/AzureCliCredential/g" $FILE - OUTPUTJOB=$(echo $FILE | sed -e "s/.ipynb/.output.ipynb/g") - papermill -k python $FILE $OUTPUTJOB & - - sleep 60 - cd $FOLDERBACK - else - echo "Found invalid job: $job" - fi - done - - wait - timeout-minutes: 300 - - - name: check_jobs - if: ${{ always() }} - run: | - python .github/kubernetes-compute/papermill_count_failed_case.py -i "$JOB_LIST_FILE" - timeout-minutes: 30 - - # report metrics - - name: download_metrics_dependency - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh install_mdm_dependency - fi - timeout-minutes: 30 - - name: start_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - # download certificates - export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD - export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD - export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD - export KEY_PEM_NAME=AMLARC-KEY-PEM - export CERT_PEM_NAME=AMLARC-CERT-PEM - bash .github/kubernetes-compute/tool.sh download_metrics_info - bash .github/kubernetes-compute/tool.sh start_mdm_container - fi - timeout-minutes: 30 - - name: report_failure_metrics - if: ${{ failure() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Failed - export job="kubernetes-compute-sdk-endpoints" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: report_succes_metrics - if: ${{ success() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Completed - export job="kubernetes-compute-sdk-endpoints" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: stop_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh stop_mdm_container - fi - timeout-minutes: 30 - - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: component - path: sdk/endpoints \ No newline at end of file diff --git a/.github/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml b/.github/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml deleted file mode 100644 index dfe150813a..0000000000 --- a/.github/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml +++ /dev/null @@ -1,190 +0,0 @@ -name: kubernetes-compute-sdk-jobs-automl-standalone-jobs -on: - schedule: - - cron: "0 22 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^sdk-jobs-automl-standalone-jobs.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: '' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - pull_request: - branches: - - main - paths: - - '.github/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - LOCATION: eastus - - JOB_SELECTOR: '^sdk-jobs-automl-standalone-jobs.*yml$' - JOB_FILTER: '' - JOB_LIST_FILE: sdk-jobs-automl-standalone-jobs.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - REPOSITORY: https://github.com/Azure/azureml-examples - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-sdk-jobs-automl-standalone-jobs.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - - - name: check out repo - uses: actions/checkout@v2 - - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: pip install notebook reqs - run: pip install -r sdk/dev-requirements.txt - - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: setup SDK - run: bash setup.sh - working-directory: sdk - continue-on-error: true - - - name: setup CLI - run: bash setup.sh - working-directory: cli - continue-on-error: true - - - name: collect jobs - run: | - python .github/kubernetes-compute/sdk_testcase_collector.py -o "$JOB_LIST_FILE" -r "$JOB_SELECTOR" - timeout-minutes: 30 - - - name: run jobs - run: | - for job in $(cat $JOB_LIST_FILE); do - if [[ "$job" = *"ipynb" ]]; then - echo "Run job: $job" - - FOLDER=$(dirname "$job") - FILE=$(basename "$job") - FOLDERBACK=$(echo $FOLDER | sed -e "s/[a-zA-Z0-9\-_-]*\//\.\.\//g") - FOLDERBACK=$(echo $FOLDERBACK | sed -r "s/\/[a-zA-Z0-9\-_-]+/\/\.\./g") - - echo "$FOLDER|$FILE|$FOLDERBACK" - - cd $FOLDER - sed -i -e "s//$SUBSCRIPTION/g" $FILE - sed -i -e "s//$RESOURCE_GROUP/g" $FILE - sed -i -e "s//$WORKSPACE/g" $FILE - sed -i -e "s/DefaultAzureCredential/AzureCliCredential/g" $FILE - OUTPUTJOB=$(echo $FILE | sed -e "s/.ipynb/.output.ipynb/g") - papermill -k python $FILE $OUTPUTJOB & - - sleep 900 - - cd $FOLDERBACK - else - echo "Found invalid job: $job" - fi - done - - wait - timeout-minutes: 300 - - - name: check_jobs - if: ${{ always() }} - run: | - python .github/kubernetes-compute/papermill_count_failed_case.py -i "$JOB_LIST_FILE" - timeout-minutes: 30 - - # report metrics - - name: download_metrics_dependency - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh install_mdm_dependency - fi - timeout-minutes: 30 - - name: start_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - # download certificates - export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD - export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD - export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD - export KEY_PEM_NAME=AMLARC-KEY-PEM - export CERT_PEM_NAME=AMLARC-CERT-PEM - bash .github/kubernetes-compute/tool.sh download_metrics_info - bash .github/kubernetes-compute/tool.sh start_mdm_container - fi - timeout-minutes: 30 - - name: report_failure_metrics - if: ${{ failure() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Failed - export job="kubernetes-compute-sdk-jobs-automl-standalone-jobs" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: report_succes_metrics - if: ${{ success() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Completed - export job="kubernetes-compute-sdk-jobs-automl-standalone-jobs" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: stop_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh stop_mdm_container - fi - timeout-minutes: 30 - - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: component - path: sdk/jobs/automl-standalone-jobs \ No newline at end of file diff --git a/.github/workflows/kubernetes-compute-sdk-jobs-pipeline.yml b/.github/workflows/kubernetes-compute-sdk-jobs-pipeline.yml deleted file mode 100644 index bc96f4be74..0000000000 --- a/.github/workflows/kubernetes-compute-sdk-jobs-pipeline.yml +++ /dev/null @@ -1,192 +0,0 @@ -name: kubernetes-compute-sdk-jobs-pipeline -on: - schedule: - - cron: "0 20 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^sdk-jobs-pipeline.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: '' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - pull_request: - branches: - - main - paths: - - '.github/workflows/kubernetes-compute-sdk-jobs-pipeline.yml' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - LOCATION: eastus - - JOB_SELECTOR: '^sdk-jobs-pipeline.*yml$' - JOB_FILTER: '' - JOB_LIST_FILE: sdk-jobs-pipeline.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - REPOSITORY: https://github.com/Azure/azureml-examples - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-sdk-jobs-pipeline.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - - - name: check out repo - uses: actions/checkout@v2 - - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: pip install notebook reqs - run: pip install -r sdk/dev-requirements.txt - - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: setup SDK - run: bash setup.sh - working-directory: sdk - continue-on-error: true - - - name: setup CLI - run: bash setup.sh - working-directory: cli - continue-on-error: true - - - name: collect jobs - run: | - python .github/kubernetes-compute/sdk_testcase_collector.py -o "$JOB_LIST_FILE" -r "$JOB_SELECTOR" - timeout-minutes: 30 - - - name: run jobs - run: | - mkdir .azureml - echo '{"subscription_id": "6560575d-fa06-4e7d-95fb-f962e74efd7a", "resource_group": "azureml-examples", "workspace_name": "amlarc-githubtest-ws"}' > .azureml/config.json - for job in $(cat $JOB_LIST_FILE); do - if [[ "$job" = *"ipynb" ]]; then - echo "Run job: $job" - - FOLDER=$(dirname "$job") - FILE=$(basename "$job") - FOLDERBACK=$(echo $FOLDER | sed -e "s/[a-zA-Z0-9\-_-]*\//\.\.\//g") - FOLDERBACK=$(echo $FOLDERBACK | sed -r "s/\/[a-zA-Z0-9\-_-]+/\/\.\./g") - - echo "$FOLDER|$FILE|$FOLDERBACK" - - cd $FOLDER - sed -i -e "s//$SUBSCRIPTION/g" $FILE - sed -i -e "s//$RESOURCE_GROUP/g" $FILE - sed -i -e "s//$WORKSPACE/g" $FILE - sed -i -e "s/DefaultAzureCredential/AzureCliCredential/g" $FILE - sed -i "s/@pipeline(/&force_rerun=True,/" $FILE - OUTPUTJOB=$(echo $FILE | sed -e "s/.ipynb/.output.ipynb/g") - papermill -k python $FILE $OUTPUTJOB & - - sleep 60 - cd $FOLDERBACK - else - echo "Found invalid job: $job" - fi - done - - wait - timeout-minutes: 300 - - - name: check_jobs - if: ${{ always() }} - run: | - python .github/kubernetes-compute/papermill_count_failed_case.py -i "$JOB_LIST_FILE" - timeout-minutes: 30 - - # report metrics - - name: download_metrics_dependency - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh install_mdm_dependency - fi - timeout-minutes: 30 - - name: start_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - # download certificates - export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD - export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD - export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD - export KEY_PEM_NAME=AMLARC-KEY-PEM - export CERT_PEM_NAME=AMLARC-CERT-PEM - bash .github/kubernetes-compute/tool.sh download_metrics_info - bash .github/kubernetes-compute/tool.sh start_mdm_container - fi - timeout-minutes: 30 - - name: report_failure_metrics - if: ${{ failure() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Failed - export job="kubernetes-compute-sdk-jobs-pipeline" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: report_succes_metrics - if: ${{ success() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Completed - export job="kubernetes-compute-sdk-jobs-pipeline" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: stop_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh stop_mdm_container - fi - timeout-minutes: 30 - - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: component - path: sdk/jobs/pipelines \ No newline at end of file diff --git a/.github/workflows/kubernetes-compute-sdk-jobs-single-step.yml b/.github/workflows/kubernetes-compute-sdk-jobs-single-step.yml deleted file mode 100644 index 44add30123..0000000000 --- a/.github/workflows/kubernetes-compute-sdk-jobs-single-step.yml +++ /dev/null @@ -1,189 +0,0 @@ -name: kubernetes-compute-sdk-jobs-single-step -on: - schedule: - - cron: "0 19 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^sdk-jobs-single-step.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: '' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - pull_request: - branches: - - main - paths: - - '.github/workflows/kubernetes-compute-sdk-jobs-single-step.yml' -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - WORKSPACE: amlarc-githubtest-ws - LOCATION: eastus - - JOB_SELECTOR: '^sdk-jobs-single-step.*yml$' - JOB_FILTER: '' - JOB_LIST_FILE: sdk-jobs-single-step.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - - FILE_TICKET: true - KEY_VAULT_NAME: amlarcgithubworkflowkv - GITHUB_REPO: https://github.com/Azure/azureml-examples - WORKFLOW_URL: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-sdk-jobs-single-step.yml - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - - - name: check out repo - uses: actions/checkout@v2 - - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: pip install notebook reqs - run: pip install -r sdk/dev-requirements.txt - - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: setup SDK - run: bash setup.sh - working-directory: sdk - continue-on-error: true - - - name: setup CLI - run: bash setup.sh - working-directory: cli - continue-on-error: true - - - name: collect jobs - run: | - python .github/kubernetes-compute/sdk_testcase_collector.py -o "$JOB_LIST_FILE" -r "$JOB_SELECTOR" - timeout-minutes: 30 - - - name: run jobs - run: | - for job in $(cat $JOB_LIST_FILE); do - if [[ "$job" = *"ipynb" ]]; then - echo "Run job: $job" - - FOLDER=$(dirname "$job") - FILE=$(basename "$job") - FOLDERBACK=$(echo $FOLDER | sed -e "s/[a-zA-Z0-9\-_-]*\//\.\.\//g") - FOLDERBACK=$(echo $FOLDERBACK | sed -r "s/\/[a-zA-Z0-9\-_-]+/\/\.\./g") - - echo "$FOLDER|$FILE|$FOLDERBACK" - - cd $FOLDER - sed -i -e "s//$SUBSCRIPTION/g" $FILE - sed -i -e "s//$RESOURCE_GROUP/g" $FILE - sed -i -e "s//$WORKSPACE/g" $FILE - sed -i -e "s/DefaultAzureCredential/AzureCliCredential/g" $FILE - OUTPUTJOB=$(echo $FILE | sed -e "s/.ipynb/.output.ipynb/g") - papermill -k python $FILE $OUTPUTJOB & - - sleep 60 - cd $FOLDERBACK - else - echo "Found invalid job: $job" - fi - done - - wait - timeout-minutes: 300 - - - name: check_jobs - if: ${{ always() }} - run: | - python .github/kubernetes-compute/papermill_count_failed_case.py -i "$JOB_LIST_FILE" - timeout-minutes: 30 - - # report metrics - - name: download_metrics_dependency - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh install_mdm_dependency - fi - timeout-minutes: 30 - - name: start_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - # download certificates - export METRIC_ENDPOINT_NAME=METRIC-ENDPOINT-PROD - export MDM_ACCOUNT_NAME=MDM-ACCOUNT-PROD - export MDM_NAMESPACE_NAME=MDM-NAMESPACE-PROD - export KEY_PEM_NAME=AMLARC-KEY-PEM - export CERT_PEM_NAME=AMLARC-CERT-PEM - bash .github/kubernetes-compute/tool.sh download_metrics_info - bash .github/kubernetes-compute/tool.sh start_mdm_container - fi - timeout-minutes: 30 - - name: report_failure_metrics - if: ${{ failure() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Failed - export job="kubernetes-compute-sdk-jobs-single-step" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: report_succes_metrics - if: ${{ success() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - export jobstatus=Completed - export job="kubernetes-compute-sdk-jobs-single-step" - bash .github/kubernetes-compute/tool.sh report_inference_metrics - fi - timeout-minutes: 30 - - name: stop_mdm - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh stop_mdm_container - fi - timeout-minutes: 30 - - - name: upload notebook's working folder as an artifact - if: ${{ always() }} - uses: actions/upload-artifact@v2 - with: - name: component - path: sdk/jobs/single-step diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-automl-dynamic-data.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-automl-dynamic-data.yml deleted file mode 100644 index 5a9f1c9963..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-automl-dynamic-data.yml +++ /dev/null @@ -1,119 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-automl-dynamic-data -on: - schedule: - - cron: "0 0 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '300m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-automl-dynamic-data.txt - TIMEOUT: '300m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-automl-dynamic-data.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - pip install azure-ai-textanalytics - pip install azure-identity - pip install azure-ai-ml - pip install azure-cli - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - echo "cli/jobs/pipelines/automl/image-instance-segmentation-task-fridge-items-pipeline/pipeline.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/pipelines/automl/image-multiclass-classification-fridge-items-pipeline/pipeline.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/pipelines/automl/image-multilabel-classification-fridge-items-pipeline/pipeline.yml" | tee -a $JOB_LIST_FILE - - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/cli-automl-image-classification-multiclass-task-fridge-items.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/cli-automl-image-classification-multiclass-task-fridge-items-automode.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/cli-automl-image-instance-segmentation-task-fridge-items.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/cli-automl-image-instance-segmentation-task-fridge-items-automode.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/cli-automl-image-classification-multilabel-task-fridge-items.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilabel-task-fridge-items/cli-automl-image-classification-multilabel-task-fridge-items-automode.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/cli-automl-image-object-detection-task-fridge-items.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/cli-automl-image-object-detection-task-fridge-items-automode.yml" | tee -a $JOB_LIST_FILE - timeout-minutes: 30 - - name: prepare data - run: | - ROOT_DIR=$(pwd) - for job in $(cat $JOB_LIST_FILE); do - cd $ROOT_DIR - if [[ "$job" = *"yml" ]]; then - echo "Prepare data for job: $job" - JOB_SPEC_FILE=$(basename $job) - JOB_DIR=$(dirname $job) - - cd $JOB_DIR - sed -i -e "s/from azure.identity import InteractiveBrowserCredential/from azureml.core.authentication import AzureCliAuthentication/g" prepare_data.py - sed -i -e "s/credential = InteractiveBrowserCredential()/credential = AzureCliAuthentication()/g" prepare_data.py - - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - - sleep 30 - else - echo "Found invalid job: $job" - fi - done - timeout-minutes: 300 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - # report metrics - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-automl.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-automl.yml deleted file mode 100644 index 7da1733b0e..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-automl.yml +++ /dev/null @@ -1,94 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-automl -on: - schedule: - - cron: "0 22 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^cli-jobs-automl.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: 'automl-forecasting-task-github-dau|cli-automl-forecasting-task-bike-share|multiclass-task-fridge-items|segmentation-task-fridge-items|multilabel-task-fridge-items|detection-task-fridge-items|spark|java' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_SELECTOR: '^cli-jobs-automl.*yml$|cli-jobs-basics-hello-automl-hello-automl-job-basic.yml' - JOB_FILTER: 'automl-forecasting-task-github-dau|cli-automl-forecasting-task-bike-share|multiclass-task-fridge-items|segmentation-task-fridge-items|multilabel-task-fridge-items|detection-task-fridge-items|spark|java' - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-automl.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-automl.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - bash .github/kubernetes-compute/tool.sh collect_jobs_from_workflows "$JOB_LIST_FILE" "$JOB_SELECTOR" "$JOB_FILTER" - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - # report metrics - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 \ No newline at end of file diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-bad-cases.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-bad-cases.yml deleted file mode 100644 index f06d35f58e..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-bad-cases.yml +++ /dev/null @@ -1,85 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-bad-cases -on: - schedule: - - cron: "0 4 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-bad-cases.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 0 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-bad-cases.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - echo "cli/jobs/single-step/pytorch/cifar-distributed/job.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/pipelines/automl/image-object-detection-task-fridge-items-pipeline/pipeline.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-forecasting-bike-share/cli-automl-forecasting-task-bike-share.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/pipelines-with-components/image_classification_with_densenet/pipeline.yml" | tee -a $JOB_LIST_FILE - echo "cli/jobs/automl-standalone-jobs/cli-automl-forecasting-task-github-dau/cli-automl-forecasting-task-github-dau.yml" | tee -a $JOB_LIST_FILE - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-basics.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-basics.yml deleted file mode 100644 index 087a09cc8a..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-basics.yml +++ /dev/null @@ -1,94 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-basics -on: - schedule: - - cron: "0 20 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^cli-jobs-basics.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: 'java' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_SELECTOR: '^cli-jobs-basics.*yml$' - JOB_FILTER: 'java' - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-basics.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-basics.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - bash .github/kubernetes-compute/tool.sh collect_jobs_from_workflows "$JOB_LIST_FILE" "$JOB_SELECTOR" "$JOB_FILTER" - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 - diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-pipelines.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-pipelines.yml deleted file mode 100644 index 433130eef4..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-pipelines.yml +++ /dev/null @@ -1,93 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-pipelines -on: - schedule: - - cron: "0 21 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^cli-jobs-pipelines.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: '4b_datastore_datapath_uri|image_classification_with_densenet|spark|image-instance-segmentation-task-fridge-items-pipeline|image-multiclass-classification-fridge-items-pipeline|image-multilabel-classification-fridge-items-pipeline|image-object-detection-task-fridge-items-pipeline' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_SELECTOR: '^cli-jobs-pipelines.*yml$' - JOB_FILTER: '4b_datastore_datapath_uri|image_classification_with_densenet|spark|image-instance-segmentation-task-fridge-items-pipeline|image-multiclass-classification-fridge-items-pipeline|image-multilabel-classification-fridge-items-pipeline|image-object-detection-task-fridge-items-pipeline' - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-pipelines.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-pipelines.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - bash .github/kubernetes-compute/tool.sh collect_jobs_from_workflows "$JOB_LIST_FILE" "$JOB_SELECTOR" "$JOB_FILTER" - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-single-step-spark.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-single-step-spark.yml deleted file mode 100644 index 58a269d535..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-single-step-spark.yml +++ /dev/null @@ -1,94 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-single-step-spark -on: - schedule: - - cron: "0 2 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^cli-jobs-single-step-spark.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: 'java' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '300m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_SELECTOR: '^cli-jobs-single-step-spark.*yml$' - JOB_FILTER: 'java' - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-single-step-spark.txt - TIMEOUT: '300m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-single-step-spark.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - echo "cli/jobs/single-step/spark/nyctaxi/job.yml" | tee -a $JOB_LIST_FILE - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 360 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - # report metrics - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 diff --git a/.github/workflows/kubernetes-compute-training-cli-jobs-single-step.yml b/.github/workflows/kubernetes-compute-training-cli-jobs-single-step.yml deleted file mode 100644 index c2d66cbea8..0000000000 --- a/.github/workflows/kubernetes-compute-training-cli-jobs-single-step.yml +++ /dev/null @@ -1,93 +0,0 @@ -name: kubernetes-compute-training-cli-jobs-single-step -on: - schedule: - - cron: "0 19 * * *" - workflow_dispatch: - inputs: - TEST_REGION: - description: 'Resource Region' - required: false - default: 'eastus' - JOB_SELECTOR: - description: 'Job selector used with grep command to select job workflows' - required: false - default: '^cli-jobs-single-step.*yml$' - JOB_FILTER: - description: 'Job filter used with grep command to filter out job workflows' - required: false - default: 'spark|java|sweep|pytorch-cifar-distributed-job' - TIMEOUT: - description: 'Timeout of a single job' - required: false - default: '120m' - FILE_TICKET: - description: 'Whether to file icm ticket: true or false' - required: false - default: 'false' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' -jobs: - test: - runs-on: ubuntu-latest - env: - JOB_SELECTOR: '^cli-jobs-single-step.*yml$' - JOB_FILTER: 'spark|java|sweep|pytorch-cifar-distributed-job' - JOB_LIST_FILE: kubernetes-compute-training-cli-jobs-single-step.txt - TIMEOUT: '120m' - MIN_SUCCESS_NUM: 1 - WORKFLOW: https://github.com/Azure/azureml-examples/actions/workflows/kubernetes-compute-training-cli-jobs-single-step.yml - - steps: - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: setup environment - run: | - bash .github/kubernetes-compute/tool.sh set_default_env - if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then - echo "LOCATION=${{ github.event.inputs.TEST_REGION }}" | tee -a $GITHUB_ENV - echo "JOB_SELECTOR=${{ github.event.inputs.JOB_SELECTOR }}" | tee -a $GITHUB_ENV - echo "JOB_FILTER=${{ github.event.inputs.JOB_FILTER }}" | tee -a $GITHUB_ENV - echo "TIMEOUT=${{ github.event.inputs.TIMEOUT }}" | tee -a $GITHUB_ENV - echo "FILE_TICKET=${{ github.event.inputs.FILE_TICKET }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - fi - bash .github/kubernetes-compute/tool.sh install_tools - set -x +e - bash cli/setup.sh - az version - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - - name: collect jobs - run: | - bash .github/kubernetes-compute/tool.sh collect_jobs_from_workflows "$JOB_LIST_FILE" "$JOB_SELECTOR" "$JOB_FILTER" - timeout-minutes: 30 - - - name: run jobs - run: | - bash .github/kubernetes-compute/tool.sh run_jobs_from_file "$JOB_LIST_FILE" - timeout-minutes: 300 - - - name: check jobs - if: ${{ always() }} - run: | - bash .github/kubernetes-compute/tool.sh count_result - timeout-minutes: 30 - - - name: report_metrics - if: ${{ always() && github.event_name != 'pull_request' }} - run: | - if [ "$FILE_TICKET" == "true" ]; then - bash .github/kubernetes-compute/tool.sh report_metrics upload_test_result_metrics - fi - timeout-minutes: 120 diff --git a/.github/workflows/kubernetes-compute-workspace-setup.yml b/.github/workflows/kubernetes-compute-workspace-setup.yml deleted file mode 100644 index fb7b5db85b..0000000000 --- a/.github/workflows/kubernetes-compute-workspace-setup.yml +++ /dev/null @@ -1,262 +0,0 @@ -name: kubernetes-compute-workspace-setup -on: - schedule: - - cron: "0 0 * * 3" - workflow_dispatch: - inputs: - LOCATION: - description: 'Resource Region' - required: false - default: 'eastus' - WORKSPACE: - description: 'The workspace of the cluster' - required: false - default: 'amlarc-githubtest-ws' - -jobs: - test: - runs-on: ubuntu-latest - env: - SUBSCRIPTION: 6560575d-fa06-4e7d-95fb-f962e74efd7a - RESOURCE_GROUP: azureml-examples - LOCATION: eastus - WORKSPACE: amlarc-githubtest-ws - - steps: - - name: replace env from workflow_dispatch - if: ${{ github.event_name == 'workflow_dispatch' }} - run: | - echo "LOCATION=${{ github.event.inputs.LOCATION }}" | tee -a $GITHUB_ENV - echo "WORKSPACE=${{ github.event.inputs.WORKSPACE }}" | tee -a $GITHUB_ENV - - name: check out repo - uses: actions/checkout@v2 - - name: setup python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: install tools - run: bash .github/kubernetes-compute/tool.sh install_tools - timeout-minutes: 30 - - name: azure login - uses: azure/login@v1 - with: - creds: ${{secrets.AZUREML_CREDENTIALS}} - timeout-minutes: 30 - - # provision resources - - name: setup_workspace - if: ${{ always() }} - run: bash .github/kubernetes-compute/tool.sh setup_workspace - timeout-minutes: 30 - - - name: set configure - if: ${{ always() }} - run: | - az account set --subscription $SUBSCRIPTION - az configure --defaults group=$RESOURCE_GROUP workspace=$WORKSPACE location=$LOCATION - pip install azure-identity - pip install azure-ai-ml - timeout-minutes: 30 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilablel-task-fridge-items/cli-automl-image-classification-multilabel-task-fridge-items.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilablel-task-fridge-items - timeout-minutes: 30 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilablel-task-fridge-items/cli-automl-image-classification-multilabel-task-fridge-items-automode.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multilablel-task-fridge-items - timeout-minutes: 60 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/cli-automl-image-classification-multiclass-task-fridge-items.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items - timeout-minutes: 30 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items/cli-automl-image-classification-multiclass-task-fridge-items-automode.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-classification-multiclass-task-fridge-items - timeout-minutes: 60 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/cli-automl-image-instance-segmentation-task-fridge-items.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items - timeout-minutes: 30 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items/cli-automl-image-instance-segmentation-task-fridge-items-automode.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-instance-segmentation-task-fridge-items - timeout-minutes: 60 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/cli-automl-image-object-detection-task-fridge-items.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items - timeout-minutes: 30 - continue-on-error: true - - - name: cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items/cli-automl-image-object-detection-task-fridge-items-automode.yml - if: ${{ always() }} - run: | - python prepare_data.py --subscription $SUBSCRIPTION --group $RESOURCE_GROUP --workspace $WORKSPACE - working-directory: cli/jobs/automl-standalone-jobs/cli-automl-image-object-detection-task-fridge-items - timeout-minutes: 60 - continue-on-error: true - - - name: set sampledata dataset - if: ${{ always() }} - run: | - set -x - pip install azure.cli.core - pip install azureml-dataset-runtime - python -c ' - - import sys - from azureml.core.workspace import Workspace - from azureml.core import Dataset - from azureml.core.authentication import AzureCliAuthentication - - cli_auth = AzureCliAuthentication() - ws = Workspace.get(subscription_id=sys.argv[1], - resource_group=sys.argv[2], - name=sys.argv[3], - auth=cli_auth) - datastore = ws.datastores["workspaceblobstore"] - dataset = Dataset.File.from_files(path=[(datastore, "example-data")]) - dataset.register(ws, "sampledata") - - ' "$SUBSCRIPTION" "$RESOURCE_GROUP" "$WORKSPACE" - timeout-minutes: 300 - continue-on-error: true - - - name: create asset for cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components - if: ${{ always() }} - run: | - set -x - az ml component create --file train.yml - az ml component create --file score.yml - az ml component create --file eval.yml - working-directory: cli/jobs/pipelines-with-components/basics/1b_e2e_registered_components - timeout-minutes: 30 - continue-on-error: true - - - - name: setup env for cli/jobs/pipelines-with-components/rai_pipeline_adult_analyse/ - if: ${{ always() }} - run: | - set -x - az ml environment create --file environment/responsibleai-environment.yaml - az ml data create --file data/data_adult_train.yaml - az ml data create --file data/data_adult_test.yaml - working-directory: cli/jobs/pipelines-with-components/rai_pipeline_adult_analyse/ - timeout-minutes: 30 - continue-on-error: true - - - name: Please manually setup uri_file for cli/jobs/pipelines-with-components/basics/4b_datastore_datapath_uri/pipeline.yml - if: ${{ always() }} - run: | - echo Please manually setup uri_file for cli/jobs/pipelines-with-components/basics/4b_datastore_datapath_uri/pipeline.yml - working-directory: cli/jobs/pipelines-with-components/basics/4b_datastore_datapath_uri - timeout-minutes: 30 - continue-on-error: true - - - name: setup_asset /cli/assets/data/local-folder.yml - if: ${{ always() }} - run: az ml data create -f local-folder.yml - working-directory: cli/assets/data - timeout-minutes: 300 - continue-on-error: true - - - name: setup_dataset - if: ${{ always() }} - run: bash create-datasets.sh - working-directory: setup-repo - timeout-minutes: 300 - continue-on-error: true - - - name: download azcopy - if: ${{ always() }} - run: | - set -x - wget https://azcopyvnext.azureedge.net/release20220511/azcopy_linux_amd64_10.15.0.tar.gz - tar zxf azcopy_linux_amd64_10.15.0.tar.gz - cp azcopy_linux_amd64_10.15.0/azcopy . - working-directory: setup-repo - timeout-minutes: 30 - continue-on-error: true - - - name: Please manually run copy-data.sh - if: ${{ always() }} - run: | - # bash copy-data.sh - echo 'Please manually run "bash copy-data.sh" in setup-repo directory' - working-directory: setup-repo - timeout-minutes: 300 - continue-on-error: true - - - name: Setup uri_folder - if: ${{ always() }} - run: | - echo 'Please manually setup local-folder-example uri_folder for cli/jobs/basics/hello-data-uri-folder.yml' - echo 'Please manually setup local-folder-example uri_folder for cli/jobs/pipelines-with-components/basics/4d_data_input/pipeline.yml' - cat << EOF > local_uri_floder.yml - \$schema: https://azuremlschemas.azureedge.net/latest/data.schema.json - name: local-folder-example - description: Dataset created from local folder. - type: uri_folder - path: cli/jobs/pipelines-with-components/basics/4d_data_input/data - EOF - - az ml data create --subscription $SUBSCRIPTION --resource-group $RESOURCE_GROUP --workspace $WORKSPACE -f local_uri_floder.yml - timeout-minutes: 300 - continue-on-error: true - - - name: Setup mltable - if: ${{ always() }} - run: | - echo 'Please manually setup local-folder-example mltable for cli/jobs/basics/hello-dataset.yml' - cat << EOF > local_mltable.yml - \$schema: https://azuremlschemas.azureedge.net/latest/data.schema.json - name: sampledata - description: Dataset created from local folder. - type: mltable - path: cli/jobs/basics/hello-automl - EOF - - # az ml data create --subscription $SUBSCRIPTION --resource-group $RESOURCE_GROUP --workspace $WORKSPACE -f local_mltable.yml - timeout-minutes: 300 - continue-on-error: true - - - name: Setup hello-world.txt - if: ${{ always() }} - run: | - echo 'Please manually setup hello-world.txt for cli/jobs/pipelines-with-components/basics/4b_datastore_datapath_uri/pipeline.yml' - # echo "hello world" > hello-world.txt - # azcopy cp hello-world.txt https://amlarcgistorage7a0860601.blob.core.windows.net/azureml-blobstore-68875c58-4a7d-46e4-bcb9-e17da409f580/azureml/ - - timeout-minutes: 300 - continue-on-error: true - - - - name: over - run: echo over!