Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
648d0a9
Bug 3323988: Regex fix and indices correction for model download
kshitij-microsoft Jul 2, 2024
c21f605
Merge branch 'main' of github.com:Azure/azure-sdk-for-python
kshitij-microsoft Jul 4, 2024
331d7e3
Merge branch 'main' of github.com:Azure/azure-sdk-for-python
kshitij-microsoft Jul 5, 2024
f125053
fixing test case
kshitij-microsoft Jul 17, 2024
a5aa64a
Merge branch 'main' of github.com:Azure/azure-sdk-for-python
kshitij-microsoft Jul 23, 2024
921e805
adding mlflow tracking uri func
kshitij-microsoft Jul 23, 2024
d4785cd
Merge branch 'main' of github.com:Azure/azure-sdk-for-python
kshitij-microsoft Jul 31, 2024
e647204
passing service context to azureml mlflow
kshitij-microsoft Aug 2, 2024
2ed6544
Merge branch 'main' of github.com:Azure/azure-sdk-for-python
kshitij-microsoft Aug 28, 2024
a03c7a3
final flow APC complete
kshitij-microsoft Sep 5, 2024
357698f
Merge branch 'main' of github.com:Azure/azure-sdk-for-python
kshitij-microsoft Sep 6, 2024
d688c3c
Merge branch 'main' of https://github.com/Azure/azure-sdk-for-python
kshitij-microsoft Sep 24, 2024
3dbb2d2
modify host_url
kshitij-microsoft Sep 25, 2024
58a96dc
fixing unit test cases
kshitij-microsoft Sep 27, 2024
ae32d9d
Merge branch 'main' of https://github.com/Azure/azure-sdk-for-python
kshitij-microsoft Sep 30, 2024
0ab7bc4
changing mock for urlparse
kshitij-microsoft Oct 1, 2024
5a90da5
fixing the log msg
kshitij-microsoft Oct 24, 2024
c5f1343
Merge branch 'main' of https://github.com/Azure/azure-sdk-for-python
kshitij-microsoft Oct 25, 2024
f24849d
Merge branch 'main' of https://github.com/Azure/azure-sdk-for-python
kshitij-microsoft Nov 13, 2024
163c453
Merge branch 'main' of https://github.com/Azure/azure-sdk-for-python
kshitij-microsoft Dec 12, 2024
dc9cba1
first draft : YAML signing
kshitij-microsoft Dec 12, 2024
a1d670c
YAML signing
kshitij-microsoft Dec 12, 2024
88b7d7d
component ops adding prepare for sign
kshitij-microsoft Dec 27, 2024
d69b9e5
resolving comments
kshitij-microsoft Dec 31, 2024
997a7ed
fixing pylint and black
kshitij-microsoft Jan 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 54 additions & 4 deletions sdk/ml/azure-ai-ml/azure/ai/ml/_utils/_asset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import hashlib
import logging
import os
import json
import uuid
import warnings
from concurrent.futures import ThreadPoolExecutor, as_completed
Expand All @@ -15,7 +16,17 @@
from os import PathLike
from pathlib import Path
from platform import system
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union, cast
from typing import (
TYPE_CHECKING,
Any,
Dict,
Iterable,
List,
Optional,
Tuple,
Union,
cast,
)

from colorama import Fore
from tqdm import TqdmWarning, tqdm
Expand Down Expand Up @@ -56,7 +67,11 @@
from azure.ai.ml._restclient.v2023_04_01.models import PendingUploadRequestDto
from azure.ai.ml._utils._pathspec import GitWildMatchPattern, normalize_file
from azure.ai.ml._utils.utils import convert_windows_path_to_unix, retry, snake_to_camel
from azure.ai.ml.constants._common import MAX_AUTOINCREMENT_ATTEMPTS, DefaultOpenEncoding, OrderString
from azure.ai.ml.constants._common import (
MAX_AUTOINCREMENT_ATTEMPTS,
DefaultOpenEncoding,
OrderString,
)
from azure.ai.ml.entities._assets.asset import Asset
from azure.ai.ml.exceptions import (
AssetPathException,
Expand Down Expand Up @@ -247,6 +262,33 @@ def _get_file_hash(filename: Union[str, os.PathLike], _hash: hash_type) -> hash_
return _hash


def delete_two_catalog_files(path):
"""
Function that deletes the "catalog.json" and "catalog.json.sig" files located at 'path', if they exist

:param path: Path to the folder for signing
:type path: Union[Path, str]
:return: None
"""
# catalog.json
file_path_json = os.path.join(path, "catalog.json")
if os.path.exists(file_path_json):
module_logger.warning("%s already exists. Deleting it", file_path_json)
os.remove(file_path_json)
# catalog.json.sig
file_path_json_sig = os.path.join(path, "catalog.json.sig")
if os.path.exists(file_path_json_sig):
module_logger.warning("%s already exists. Deleting it", file_path_json_sig)
os.remove(file_path_json_sig)


def create_catalog_files(path, json_stub):
with open(os.path.join(path, "catalog.json"), "w", encoding=DefaultOpenEncoding.WRITE) as jsonFile1:
json.dump(json_stub, jsonFile1)
with open(os.path.join(path, "catalog.json.sig"), "w", encoding=DefaultOpenEncoding.WRITE) as jsonFile2:
json.dump(json_stub, jsonFile2)


def _get_dir_hash(directory: Union[str, os.PathLike], _hash: hash_type, ignore_file: IgnoreFile) -> hash_type:
dir_contents = Path(directory).iterdir()
sorted_contents = sorted(dir_contents, key=lambda path: str(path).lower())
Expand Down Expand Up @@ -349,7 +391,10 @@ def get_content_hash(path: Union[str, os.PathLike], ignore_file: IgnoreFile = Ig


def get_upload_files_from_folder(
path: Union[str, os.PathLike], *, prefix: str = "", ignore_file: IgnoreFile = IgnoreFile()
path: Union[str, os.PathLike],
*,
prefix: str = "",
ignore_file: IgnoreFile = IgnoreFile(),
) -> List[str]:
path = Path(path)
upload_paths = []
Expand Down Expand Up @@ -432,7 +477,12 @@ def traverse_directory( # pylint: disable=unused-argument
result = []
for origin_file_path in origin_file_paths:
relative_path = origin_file_path.relative_to(root)
result.append((_resolve_path(origin_file_path).as_posix(), Path(prefix).joinpath(relative_path).as_posix()))
result.append(
(
_resolve_path(origin_file_path).as_posix(),
Path(prefix).joinpath(relative_path).as_posix(),
)
)
return result


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,48 @@

# pylint: disable=protected-access,too-many-lines
import time
import collections
import types
from functools import partial
from inspect import Parameter, signature
from os import PathLike
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
import hashlib

from azure.ai.ml._restclient.v2021_10_01_dataplanepreview import (
AzureMachineLearningWorkspaces as ServiceClient102021Dataplane,
)
from azure.ai.ml._restclient.v2024_01_01_preview import AzureMachineLearningWorkspaces as ServiceClient012024
from azure.ai.ml._restclient.v2024_01_01_preview.models import ComponentVersion, ListViewType
from azure.ai.ml._restclient.v2024_01_01_preview import (
AzureMachineLearningWorkspaces as ServiceClient012024,
)
from azure.ai.ml._restclient.v2024_01_01_preview.models import (
ComponentVersion,
ListViewType,
)
from azure.ai.ml._scope_dependent_operations import (
OperationConfig,
OperationsContainer,
OperationScope,
_ScopeDependentOperations,
)
from azure.ai.ml._telemetry import ActivityType, monitor_with_activity, monitor_with_telemetry_mixin
from azure.ai.ml._telemetry import (
ActivityType,
monitor_with_activity,
monitor_with_telemetry_mixin,
)
from azure.ai.ml._utils._asset_utils import (
_archive_or_restore,
_create_or_update_autoincrement,
_get_file_hash,
_get_latest,
_get_next_version_from_container,
_resolve_label_to_asset,
get_ignore_file,
get_upload_files_from_folder,
IgnoreFile,
delete_two_catalog_files,
create_catalog_files,
)
from azure.ai.ml._utils._azureml_polling import AzureMLPolling
from azure.ai.ml._utils._endpoint_utils import polling_wait
Expand All @@ -42,7 +59,12 @@
LROConfigurations,
)
from azure.ai.ml.entities import Component, ValidationResult
from azure.ai.ml.exceptions import ComponentException, ErrorCategory, ErrorTarget, ValidationException
from azure.ai.ml.exceptions import (
ComponentException,
ErrorCategory,
ErrorTarget,
ValidationException,
)
from azure.core.exceptions import HttpResponseError, ResourceNotFoundError

from .._utils._cache_utils import CachedNodeResolver
Expand Down Expand Up @@ -282,7 +304,8 @@ def _localize_code(self, component: Component, base_dir: Path) -> None:

target_code_value = "./code"
self._code_operations.download(
**extract_name_and_version(code), download_path=base_dir.joinpath(target_code_value)
**extract_name_and_version(code),
download_path=base_dir.joinpath(target_code_value),
)

setattr(component, component._get_code_field_name(), target_code_value)
Expand Down Expand Up @@ -311,7 +334,13 @@ def _localize_environment(self, component: Component, base_dir: Path) -> None:

@experimental
@monitor_with_telemetry_mixin(ops_logger, "Component.Download", ActivityType.PUBLICAPI)
def download(self, name: str, download_path: Union[PathLike, str] = ".", *, version: Optional[str] = None) -> None:
def download(
self,
name: str,
download_path: Union[PathLike, str] = ".",
*,
version: Optional[str] = None,
) -> None:
"""Download the specified component and its dependencies to local. Local component can be used to create
the component in another workspace or for offline development.

Expand Down Expand Up @@ -491,7 +520,11 @@ def _reset_version_if_no_change(self, component: Component, current_name: str, c
return current_version, rest_component_resource

def _create_or_update_component_version(
self, component: Component, name: str, version: Optional[str], rest_component_resource: Any
self,
component: Component,
name: str,
version: Optional[str],
rest_component_resource: Any,
) -> Any:
try:
if self._registry_name:
Expand Down Expand Up @@ -652,6 +685,28 @@ def create_or_update(
)
return component

@experimental
def prepare_for_sign(self, component: Component):
ignore_file = IgnoreFile()

if isinstance(component, ComponentCodeMixin):
with component._build_code() as code:
delete_two_catalog_files(code.path)
ignore_file = get_ignore_file(code.path) if code._ignore_file is None else ignore_file
file_list = get_upload_files_from_folder(code.path, ignore_file=ignore_file)
json_stub = {}
json_stub["HashAlgorithm"] = "SHA256"
json_stub["CatalogItems"] = {} # type: ignore

for file_path, file_name in sorted(file_list, key=lambda x: str(x[1]).lower()):
file_hash = _get_file_hash(file_path, hashlib.sha256()).hexdigest().upper()
json_stub["CatalogItems"][file_name] = file_hash # type: ignore

json_stub["CatalogItems"] = collections.OrderedDict( # type: ignore
sorted(json_stub["CatalogItems"].items()) # type: ignore
)
create_catalog_files(code.path, json_stub)

@monitor_with_telemetry_mixin(ops_logger, "Component.Archive", ActivityType.PUBLICAPI)
def archive(
self,
Expand Down Expand Up @@ -860,7 +915,9 @@ def _resolve_binding_on_supported_fields_for_node(cls, node: BaseNode) -> None:
:param node: The node
:type node: BaseNode
"""
from azure.ai.ml.entities._job.pipeline._attr_dict import try_get_non_arbitrary_attr
from azure.ai.ml.entities._job.pipeline._attr_dict import (
try_get_non_arbitrary_attr,
)
from azure.ai.ml.entities._job.pipeline._io import PipelineInput

# compute binding to pipeline input is supported on node.
Expand Down Expand Up @@ -968,7 +1025,9 @@ def _try_resolve_compute_for_node(cls, node: BaseNode, _: str, resolver: _AssetR

@classmethod
def _divide_nodes_to_resolve_into_layers(
cls, component: PipelineComponent, extra_operations: List[Callable[[BaseNode, str], Any]]
cls,
component: PipelineComponent,
extra_operations: List[Callable[[BaseNode, str], Any]],
) -> List:
"""Traverse the pipeline component and divide nodes to resolve into layers. Note that all leaf nodes will be
put in the last layer.
Expand Down Expand Up @@ -1029,7 +1088,8 @@ def _divide_nodes_to_resolve_into_layers(
def _get_workspace_key(self) -> str:
try:
workspace_rest = self._workspace_operations._operation.get(
resource_group_name=self._resource_group_name, workspace_name=self._workspace_name
resource_group_name=self._resource_group_name,
workspace_name=self._workspace_name,
)
return str(workspace_rest.workspace_id)
except HttpResponseError:
Expand Down Expand Up @@ -1099,7 +1159,10 @@ def _resolve_dependencies_for_pipeline_component_jobs(
extra_operations=[
# no need to do this as we now keep the original component name for anonymous components
# self._set_default_display_name_for_anonymous_component_in_node,
partial(self._try_resolve_node_level_task_for_parallel_node, resolver=resolver),
partial(
self._try_resolve_node_level_task_for_parallel_node,
resolver=resolver,
),
partial(self._try_resolve_environment_for_component, resolver=resolver),
partial(self._try_resolve_compute_for_node, resolver=resolver),
# should we resolve code here after we do extra operations concurrently?
Expand Down
29 changes: 29 additions & 0 deletions sdk/ml/azure-ai-ml/samples/hello.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import argparse
import os
from datetime import datetime

parser = argparse.ArgumentParser()
parser.add_argument("--componentB_input", type=str)
parser.add_argument("--componentB_output", type=str)

print("Hello Python World...\nI'm componentB :-)")

args = parser.parse_args()

print("componentB_input path: %s" % args.componentB_input)
print("componentB_output path: %s" % args.componentB_output)

print("files in input path: ")
arr = os.listdir(args.componentB_input)
print(arr)

for filename in arr:
print("reading file: %s ..." % filename)
with open(os.path.join(args.componentB_input, filename), "r") as handle:
print(handle.read())

cur_time_str = datetime.now().strftime("%b-%d-%Y-%H-%M-%S")

print("Writing file: %s" % os.path.join(args.componentB_output, "file-" + cur_time_str + ".txt"))
with open(os.path.join(args.componentB_output, "file-" + cur_time_str + ".txt"), "wt") as text_file:
print(f"Logging date time: {cur_time_str}", file=text_file)
20 changes: 20 additions & 0 deletions sdk/ml/azure-ai-ml/samples/job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
code: ../src
command: >-
python main.py train_check --config ${{inputs.data}}/model.yaml --train ${{inputs.data}}/train.csv --sanity-check ${{inputs.data}}/sanity_check.csv --min-accuracy 0.99 --min-precision 0.95 --min-recall 0.95 --model-dir ${{outputs.model}}
inputs:
data:
path: .
mode: download
outputs:
model:
type: uri_folder
environment:
image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
conda_file: ../src/environment.yml
environment_variables:
AZUREML_COMMON_RUNTIME_USE_SBOM_CAPABILITY: "true"
compute: azureml:gpu-t4-spot-vpn
display_name: Compete
experiment_name: sensei-compete
description: Sensei Compete Model
31 changes: 31 additions & 0 deletions sdk/ml/azure-ai-ml/samples/ml_samples_test_prepForSign.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from azure.identity import (
DefaultAzureCredential,
AzureCliCredential,
InteractiveBrowserCredential,
)
from azure.ai.ml import MLClient, load_job
from azure.ai.ml.entities import Data, ManagedOnlineEndpoint, Job, CommandComponent
from azure.ai.ml.sweep import SweepJob, GridSamplingAlgorithm, Choice, Objective
from azure.ai.ml import command
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities._load_functions import load_component

subscription_id = "2d385bf4-0756-4a76-aa95-28bf9ed3b625"
resource_group = "sdkv2-20240925-rg"
workspace_name = "sdkv2-20240925-ws"


credential = DefaultAzureCredential()

print(credential)
ml_client = MLClient(
credential=credential,
subscription_id=subscription_id,
resource_group_name=resource_group,
workspace_name=workspace_name,
)

component = load_component(
"C:\\Projects\\azure-sdk-for-python\\sdk\\ml\\azure-ai-ml\\azure\\ai\\ml\\YAMLsigning\\sum1.yaml"
)
ml_client.components.prepare_for_sign(component)
Loading