diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py index 0bcfc95adbd6..22478c0ae52b 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py @@ -7,6 +7,7 @@ import hashlib import logging import os +import json import uuid import warnings from concurrent.futures import ThreadPoolExecutor, as_completed @@ -15,7 +16,17 @@ from os import PathLike from pathlib import Path from platform import system -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, + cast, +) from colorama import Fore from tqdm import TqdmWarning, tqdm @@ -56,7 +67,11 @@ from azure.ai.ml._restclient.v2023_04_01.models import PendingUploadRequestDto from azure.ai.ml._utils._pathspec import GitWildMatchPattern, normalize_file from azure.ai.ml._utils.utils import convert_windows_path_to_unix, retry, snake_to_camel -from azure.ai.ml.constants._common import MAX_AUTOINCREMENT_ATTEMPTS, DefaultOpenEncoding, OrderString +from azure.ai.ml.constants._common import ( + MAX_AUTOINCREMENT_ATTEMPTS, + DefaultOpenEncoding, + OrderString, +) from azure.ai.ml.entities._assets.asset import Asset from azure.ai.ml.exceptions import ( AssetPathException, @@ -247,6 +262,33 @@ def _get_file_hash(filename: Union[str, os.PathLike], _hash: hash_type) -> hash_ return _hash +def delete_two_catalog_files(path): + """ + Function that deletes the "catalog.json" and "catalog.json.sig" files located at 'path', if they exist + + :param path: Path to the folder for signing + :type path: Union[Path, str] + :return: None + """ + # catalog.json + file_path_json = os.path.join(path, "catalog.json") + if os.path.exists(file_path_json): + module_logger.warning("%s already exists. Deleting it", file_path_json) + os.remove(file_path_json) + # catalog.json.sig + file_path_json_sig = os.path.join(path, "catalog.json.sig") + if os.path.exists(file_path_json_sig): + module_logger.warning("%s already exists. Deleting it", file_path_json_sig) + os.remove(file_path_json_sig) + + +def create_catalog_files(path, json_stub): + with open(os.path.join(path, "catalog.json"), "w", encoding=DefaultOpenEncoding.WRITE) as jsonFile1: + json.dump(json_stub, jsonFile1) + with open(os.path.join(path, "catalog.json.sig"), "w", encoding=DefaultOpenEncoding.WRITE) as jsonFile2: + json.dump(json_stub, jsonFile2) + + def _get_dir_hash(directory: Union[str, os.PathLike], _hash: hash_type, ignore_file: IgnoreFile) -> hash_type: dir_contents = Path(directory).iterdir() sorted_contents = sorted(dir_contents, key=lambda path: str(path).lower()) @@ -349,7 +391,10 @@ def get_content_hash(path: Union[str, os.PathLike], ignore_file: IgnoreFile = Ig def get_upload_files_from_folder( - path: Union[str, os.PathLike], *, prefix: str = "", ignore_file: IgnoreFile = IgnoreFile() + path: Union[str, os.PathLike], + *, + prefix: str = "", + ignore_file: IgnoreFile = IgnoreFile(), ) -> List[str]: path = Path(path) upload_paths = [] @@ -432,7 +477,12 @@ def traverse_directory( # pylint: disable=unused-argument result = [] for origin_file_path in origin_file_paths: relative_path = origin_file_path.relative_to(root) - result.append((_resolve_path(origin_file_path).as_posix(), Path(prefix).joinpath(relative_path).as_posix())) + result.append( + ( + _resolve_path(origin_file_path).as_posix(), + Path(prefix).joinpath(relative_path).as_posix(), + ) + ) return result diff --git a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py index bcd4fa4fa99d..79530a092fc1 100644 --- a/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py +++ b/sdk/ml/azure-ai-ml/azure/ai/ml/operations/_component_operations.py @@ -4,31 +4,48 @@ # pylint: disable=protected-access,too-many-lines import time +import collections import types from functools import partial from inspect import Parameter, signature from os import PathLike from pathlib import Path from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast +import hashlib from azure.ai.ml._restclient.v2021_10_01_dataplanepreview import ( AzureMachineLearningWorkspaces as ServiceClient102021Dataplane, ) -from azure.ai.ml._restclient.v2024_01_01_preview import AzureMachineLearningWorkspaces as ServiceClient012024 -from azure.ai.ml._restclient.v2024_01_01_preview.models import ComponentVersion, ListViewType +from azure.ai.ml._restclient.v2024_01_01_preview import ( + AzureMachineLearningWorkspaces as ServiceClient012024, +) +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + ComponentVersion, + ListViewType, +) from azure.ai.ml._scope_dependent_operations import ( OperationConfig, OperationsContainer, OperationScope, _ScopeDependentOperations, ) -from azure.ai.ml._telemetry import ActivityType, monitor_with_activity, monitor_with_telemetry_mixin +from azure.ai.ml._telemetry import ( + ActivityType, + monitor_with_activity, + monitor_with_telemetry_mixin, +) from azure.ai.ml._utils._asset_utils import ( _archive_or_restore, _create_or_update_autoincrement, + _get_file_hash, _get_latest, _get_next_version_from_container, _resolve_label_to_asset, + get_ignore_file, + get_upload_files_from_folder, + IgnoreFile, + delete_two_catalog_files, + create_catalog_files, ) from azure.ai.ml._utils._azureml_polling import AzureMLPolling from azure.ai.ml._utils._endpoint_utils import polling_wait @@ -42,7 +59,12 @@ LROConfigurations, ) from azure.ai.ml.entities import Component, ValidationResult -from azure.ai.ml.exceptions import ComponentException, ErrorCategory, ErrorTarget, ValidationException +from azure.ai.ml.exceptions import ( + ComponentException, + ErrorCategory, + ErrorTarget, + ValidationException, +) from azure.core.exceptions import HttpResponseError, ResourceNotFoundError from .._utils._cache_utils import CachedNodeResolver @@ -282,7 +304,8 @@ def _localize_code(self, component: Component, base_dir: Path) -> None: target_code_value = "./code" self._code_operations.download( - **extract_name_and_version(code), download_path=base_dir.joinpath(target_code_value) + **extract_name_and_version(code), + download_path=base_dir.joinpath(target_code_value), ) setattr(component, component._get_code_field_name(), target_code_value) @@ -311,7 +334,13 @@ def _localize_environment(self, component: Component, base_dir: Path) -> None: @experimental @monitor_with_telemetry_mixin(ops_logger, "Component.Download", ActivityType.PUBLICAPI) - def download(self, name: str, download_path: Union[PathLike, str] = ".", *, version: Optional[str] = None) -> None: + def download( + self, + name: str, + download_path: Union[PathLike, str] = ".", + *, + version: Optional[str] = None, + ) -> None: """Download the specified component and its dependencies to local. Local component can be used to create the component in another workspace or for offline development. @@ -491,7 +520,11 @@ def _reset_version_if_no_change(self, component: Component, current_name: str, c return current_version, rest_component_resource def _create_or_update_component_version( - self, component: Component, name: str, version: Optional[str], rest_component_resource: Any + self, + component: Component, + name: str, + version: Optional[str], + rest_component_resource: Any, ) -> Any: try: if self._registry_name: @@ -652,6 +685,28 @@ def create_or_update( ) return component + @experimental + def prepare_for_sign(self, component: Component): + ignore_file = IgnoreFile() + + if isinstance(component, ComponentCodeMixin): + with component._build_code() as code: + delete_two_catalog_files(code.path) + ignore_file = get_ignore_file(code.path) if code._ignore_file is None else ignore_file + file_list = get_upload_files_from_folder(code.path, ignore_file=ignore_file) + json_stub = {} + json_stub["HashAlgorithm"] = "SHA256" + json_stub["CatalogItems"] = {} # type: ignore + + for file_path, file_name in sorted(file_list, key=lambda x: str(x[1]).lower()): + file_hash = _get_file_hash(file_path, hashlib.sha256()).hexdigest().upper() + json_stub["CatalogItems"][file_name] = file_hash # type: ignore + + json_stub["CatalogItems"] = collections.OrderedDict( # type: ignore + sorted(json_stub["CatalogItems"].items()) # type: ignore + ) + create_catalog_files(code.path, json_stub) + @monitor_with_telemetry_mixin(ops_logger, "Component.Archive", ActivityType.PUBLICAPI) def archive( self, @@ -860,7 +915,9 @@ def _resolve_binding_on_supported_fields_for_node(cls, node: BaseNode) -> None: :param node: The node :type node: BaseNode """ - from azure.ai.ml.entities._job.pipeline._attr_dict import try_get_non_arbitrary_attr + from azure.ai.ml.entities._job.pipeline._attr_dict import ( + try_get_non_arbitrary_attr, + ) from azure.ai.ml.entities._job.pipeline._io import PipelineInput # compute binding to pipeline input is supported on node. @@ -968,7 +1025,9 @@ def _try_resolve_compute_for_node(cls, node: BaseNode, _: str, resolver: _AssetR @classmethod def _divide_nodes_to_resolve_into_layers( - cls, component: PipelineComponent, extra_operations: List[Callable[[BaseNode, str], Any]] + cls, + component: PipelineComponent, + extra_operations: List[Callable[[BaseNode, str], Any]], ) -> List: """Traverse the pipeline component and divide nodes to resolve into layers. Note that all leaf nodes will be put in the last layer. @@ -1029,7 +1088,8 @@ def _divide_nodes_to_resolve_into_layers( def _get_workspace_key(self) -> str: try: workspace_rest = self._workspace_operations._operation.get( - resource_group_name=self._resource_group_name, workspace_name=self._workspace_name + resource_group_name=self._resource_group_name, + workspace_name=self._workspace_name, ) return str(workspace_rest.workspace_id) except HttpResponseError: @@ -1099,7 +1159,10 @@ def _resolve_dependencies_for_pipeline_component_jobs( extra_operations=[ # no need to do this as we now keep the original component name for anonymous components # self._set_default_display_name_for_anonymous_component_in_node, - partial(self._try_resolve_node_level_task_for_parallel_node, resolver=resolver), + partial( + self._try_resolve_node_level_task_for_parallel_node, + resolver=resolver, + ), partial(self._try_resolve_environment_for_component, resolver=resolver), partial(self._try_resolve_compute_for_node, resolver=resolver), # should we resolve code here after we do extra operations concurrently? diff --git a/sdk/ml/azure-ai-ml/samples/hello.py b/sdk/ml/azure-ai-ml/samples/hello.py new file mode 100644 index 000000000000..85011c4fe0bc --- /dev/null +++ b/sdk/ml/azure-ai-ml/samples/hello.py @@ -0,0 +1,29 @@ +import argparse +import os +from datetime import datetime + +parser = argparse.ArgumentParser() +parser.add_argument("--componentB_input", type=str) +parser.add_argument("--componentB_output", type=str) + +print("Hello Python World...\nI'm componentB :-)") + +args = parser.parse_args() + +print("componentB_input path: %s" % args.componentB_input) +print("componentB_output path: %s" % args.componentB_output) + +print("files in input path: ") +arr = os.listdir(args.componentB_input) +print(arr) + +for filename in arr: + print("reading file: %s ..." % filename) + with open(os.path.join(args.componentB_input, filename), "r") as handle: + print(handle.read()) + +cur_time_str = datetime.now().strftime("%b-%d-%Y-%H-%M-%S") + +print("Writing file: %s" % os.path.join(args.componentB_output, "file-" + cur_time_str + ".txt")) +with open(os.path.join(args.componentB_output, "file-" + cur_time_str + ".txt"), "wt") as text_file: + print(f"Logging date time: {cur_time_str}", file=text_file) diff --git a/sdk/ml/azure-ai-ml/samples/job.yaml b/sdk/ml/azure-ai-ml/samples/job.yaml new file mode 100644 index 000000000000..b8fb5ea3c8a7 --- /dev/null +++ b/sdk/ml/azure-ai-ml/samples/job.yaml @@ -0,0 +1,20 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json +code: ../src +command: >- + python main.py train_check --config ${{inputs.data}}/model.yaml --train ${{inputs.data}}/train.csv --sanity-check ${{inputs.data}}/sanity_check.csv --min-accuracy 0.99 --min-precision 0.95 --min-recall 0.95 --model-dir ${{outputs.model}} +inputs: + data: + path: . + mode: download +outputs: + model: + type: uri_folder +environment: + image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04 + conda_file: ../src/environment.yml +environment_variables: + AZUREML_COMMON_RUNTIME_USE_SBOM_CAPABILITY: "true" +compute: azureml:gpu-t4-spot-vpn +display_name: Compete +experiment_name: sensei-compete +description: Sensei Compete Model \ No newline at end of file diff --git a/sdk/ml/azure-ai-ml/samples/ml_samples_test_prepForSign.py b/sdk/ml/azure-ai-ml/samples/ml_samples_test_prepForSign.py new file mode 100644 index 000000000000..3aef29d15cf6 --- /dev/null +++ b/sdk/ml/azure-ai-ml/samples/ml_samples_test_prepForSign.py @@ -0,0 +1,31 @@ +from azure.identity import ( + DefaultAzureCredential, + AzureCliCredential, + InteractiveBrowserCredential, +) +from azure.ai.ml import MLClient, load_job +from azure.ai.ml.entities import Data, ManagedOnlineEndpoint, Job, CommandComponent +from azure.ai.ml.sweep import SweepJob, GridSamplingAlgorithm, Choice, Objective +from azure.ai.ml import command +from azure.ai.ml.constants import AssetTypes +from azure.ai.ml.entities._load_functions import load_component + +subscription_id = "2d385bf4-0756-4a76-aa95-28bf9ed3b625" +resource_group = "sdkv2-20240925-rg" +workspace_name = "sdkv2-20240925-ws" + + +credential = DefaultAzureCredential() + +print(credential) +ml_client = MLClient( + credential=credential, + subscription_id=subscription_id, + resource_group_name=resource_group, + workspace_name=workspace_name, +) + +component = load_component( + "C:\\Projects\\azure-sdk-for-python\\sdk\\ml\\azure-ai-ml\\azure\\ai\\ml\\YAMLsigning\\sum1.yaml" +) +ml_client.components.prepare_for_sign(component)