diff --git a/doc/tool_usage_guide.md b/doc/tool_usage_guide.md index b8da8312ae7a..1b0df55663c2 100644 --- a/doc/tool_usage_guide.md +++ b/doc/tool_usage_guide.md @@ -22,6 +22,8 @@ This repo is currently migrating all checks from a slower `tox`-based framework, |`black`| Runs `black` checks. | `azpysdk black .` | |`verifytypes`| Runs `verifytypes` checks. | `azpysdk verifytypes .` | |`ruff`| Runs `ruff` checks. | `azpysdk ruff .` | +|`verifywhl`| Verifies that the root directory in whl is azure, and verifies manifest so that all directories in source are included in sdist. | `azpysdk verifywhl .` | +|`verifysdist`| Verify directories included in sdist and contents in manifest file. Also ensures that py.typed configuration is correct within the setup.py. | `azpysdk verifysdist .` | |`import_all`| Installs the package w/ default dependencies, then attempts to `import *` from the base namespace. Ensures that all imports will resolve after a base install and import. | `azpysdk import_all .` | ## Common arguments diff --git a/eng/tools/azure-sdk-tools/azpysdk/main.py b/eng/tools/azure-sdk-tools/azpysdk/main.py index e302589fdd11..cc107fd4f183 100644 --- a/eng/tools/azure-sdk-tools/azpysdk/main.py +++ b/eng/tools/azure-sdk-tools/azpysdk/main.py @@ -25,6 +25,8 @@ from .next_pyright import next_pyright from .ruff import ruff from .verifytypes import verifytypes +from .verify_whl import verify_whl +from .verify_sdist import verify_sdist from ci_tools.logging import configure_logging, logger @@ -81,6 +83,8 @@ def build_parser() -> argparse.ArgumentParser: next_pyright().register(subparsers, [common]) ruff().register(subparsers, [common]) verifytypes().register(subparsers, [common]) + verify_sdist().register(subparsers, [common]) + verify_whl().register(subparsers, [common]) return parser diff --git a/eng/tools/azure-sdk-tools/azpysdk/verify_sdist.py b/eng/tools/azure-sdk-tools/azpysdk/verify_sdist.py new file mode 100644 index 000000000000..92eaadd2b25a --- /dev/null +++ b/eng/tools/azure-sdk-tools/azpysdk/verify_sdist.py @@ -0,0 +1,211 @@ +import argparse +import os +from typing import List, Mapping, Any, Dict, Optional +from ci_tools.parsing import ParsedSetup, extract_package_metadata +from ci_tools.functions import verify_package_classifiers +import sys + +from typing import Optional, List +from .verify_whl import ( + cleanup, + should_verify_package, + get_prior_version, + verify_prior_version_metadata, + get_path_to_zip, + unzip_file_to_directory, +) +from ci_tools.scenario.generation import create_package_and_install +from .Check import Check +from ci_tools.variables import set_envvar_defaults +from ci_tools.logging import logger + +ALLOWED_ROOT_DIRECTORIES = ["azure", "tests", "samples", "examples"] + +EXCLUDED_PYTYPE_PACKAGES = ["azure-keyvault", "azure", "azure-common"] + +EXCLUDED_CLASSIFICATION_PACKAGES = [] + + +def get_root_directories_in_source(package_dir: str) -> List[str]: + """ + Find all allowed directories in source path. + """ + source_folders = [ + d + for d in os.listdir(package_dir) + if os.path.isdir(os.path.join(package_dir, d)) and d in ALLOWED_ROOT_DIRECTORIES + ] + return source_folders + + +def get_root_directories_in_sdist(dist_dir: str, version: str) -> List[str]: + """ + Given an unzipped sdist directory, extract which directories are present. + """ + # find sdist zip file + path_to_zip = get_path_to_zip(dist_dir, version, package_type="*.tar.gz") + # extract sdist and find list of directories in sdist + extract_location = os.path.join(dist_dir, "unzipped") + # Cleanup any files in unzipped + cleanup(extract_location) + unzipped_dir = unzip_file_to_directory(path_to_zip, extract_location) + sdist_folders = [d for d in os.listdir(unzipped_dir) if os.path.isdir(os.path.join(unzipped_dir, d))] + return sdist_folders + + +def verify_sdist_helper(package_dir: str, dist_dir: str, parsed_pkg: ParsedSetup, executable: str) -> bool: + """ + Compares the root directories in source against root directories present within a sdist. + Also verifies metadata compatibility with prior stable version. + """ + version = parsed_pkg.version + # Extract metadata from zip file to ensure we're checking the built package metadata + metadata: Dict[str, Any] = extract_package_metadata(get_path_to_zip(dist_dir, version, package_type="*.tar.gz")) + + source_folders = get_root_directories_in_source(package_dir) + sdist_folders = get_root_directories_in_sdist(dist_dir, version) + + # compare folders in source directory against unzipped sdist + missing_folders = set(source_folders) - set(sdist_folders) + for folder in missing_folders: + logger.error("Source folder [%s] is not included in sdist", folder) + + if missing_folders: + logger.info("Directories in source: %s", source_folders) + logger.info("Directories in sdist: %s", sdist_folders) + return False + + # Verify metadata compatibility with prior version + prior_version = get_prior_version(parsed_pkg.name, version) + if prior_version: + if not verify_prior_version_metadata( + parsed_pkg.name, prior_version, metadata, package_type="*.tar.gz", executable=executable + ): + return False + + return True + + +def verify_sdist_pytyped( + pkg_dir: str, namespace: str, package_metadata: Mapping[str, Any], include_package_data: bool +) -> bool: + """ + Takes a package directory and ensures that the setup.py within is correctly configured for py.typed files. + """ + result = True + manifest_location = os.path.join(pkg_dir, "MANIFEST.in") + + if not include_package_data: + logger.info( + "Ensure that the setup.py present in directory {} has kwarg 'include_package_data' defined and set to 'True'." + ) + result = False + + if package_metadata: + if not any([key for key in package_metadata if "py.typed" in str(package_metadata[key])]): + logger.info( + "At least one value in the package_metadata map should include a reference to the py.typed file." + ) + result = False + + if os.path.exists(manifest_location): + with open(manifest_location, "r") as f: + lines = f.readlines() + if not any([include for include in lines if "py.typed" in include]): + logger.info("Ensure that the MANIFEST.in includes at least one path that leads to a py.typed file.") + result = False + + pytyped_file_path = os.path.join(pkg_dir, *namespace.split("."), "py.typed") + if not os.path.exists(pytyped_file_path): + logger.info( + "The py.typed file must exist in the base namespace for your package. Traditionally this would mean the furthest depth, EG 'azure/storage/blob/py.typed'." + ) + result = False + + return result + + +class verify_sdist(Check): + def __init__(self) -> None: + super().__init__() + + def register( + self, subparsers: "argparse._SubParsersAction", parent_parsers: Optional[List[argparse.ArgumentParser]] = None + ) -> None: + """Register the verifysdist check. Verify directories included in sdist and contents in manifest file. Also ensures that py.typed configuration is correct within the setup.py""" + parents = parent_parsers or [] + p = subparsers.add_parser( + "verifysdist", + parents=parents, + help="Verify directories included in sdist and contents in manifest file. Also ensures that py.typed configuration is correct within the setup.py.", + ) + p.set_defaults(func=self.run) + + def run(self, args: argparse.Namespace) -> int: + """Run the verifysdist check command.""" + logger.info("Running verifysdist check...") + + set_envvar_defaults() + targeted = self.get_targeted_directories(args) + + results: List[int] = [] + + for parsed in targeted: + package_dir = parsed.folder + package_name = parsed.name + executable, staging_directory = self.get_executable(args.isolate, args.command, sys.executable, package_dir) + logger.info(f"Processing {package_name} for verify_sdist check") + + self.install_dev_reqs(executable, args, package_dir) + + create_package_and_install( + distribution_directory=staging_directory, + target_setup=package_dir, + skip_install=False, + cache_dir=None, + work_dir=staging_directory, + force_create=False, + package_type="sdist", + pre_download_disabled=False, + python_executable=executable, + ) + + error_occurred = False + + if should_verify_package(package_name): + logger.info(f"Verifying sdist folders and metadata for package {package_name}") + if verify_sdist_helper(package_dir, staging_directory, parsed, executable): + logger.info(f"Verified sdist folders and metadata for package {package_name}") + else: + logger.error(f"Failed to verify sdist folders and metadata for package {package_name}") + error_occurred = True + + if ( + package_name not in EXCLUDED_PYTYPE_PACKAGES + and "-nspkg" not in package_name + and "-mgmt" not in package_name + ): + logger.info(f"Verifying presence of py.typed: {package_name}") + if verify_sdist_pytyped( + package_dir, parsed.namespace, parsed.package_data, parsed.include_package_data + ): + logger.info(f"Py.typed setup.py kwargs are set properly: {package_name}") + else: + logger.error(f"Py.typed verification failed for package {package_name}. Check messages above.") + error_occurred = True + + if package_name not in EXCLUDED_CLASSIFICATION_PACKAGES and "-nspkg" not in package_name: + logger.info(f"Verifying package classifiers: {package_name}") + + status, message = verify_package_classifiers(package_name, parsed.version, parsed.classifiers) + if status: + logger.info(f"Package classifiers are set properly: {package_name}") + else: + logger.error(f"{message}") + error_occurred = True + + if error_occurred: + logger.error(f"{package_name} failed sdist verification. Check outputs above.") + results.append(1) + + return max(results) if results else 0 diff --git a/eng/tools/azure-sdk-tools/azpysdk/verify_whl.py b/eng/tools/azure-sdk-tools/azpysdk/verify_whl.py new file mode 100644 index 000000000000..6968e9e78b74 --- /dev/null +++ b/eng/tools/azure-sdk-tools/azpysdk/verify_whl.py @@ -0,0 +1,258 @@ +import argparse +import logging +import os +import sys +import glob +import shutil +import tempfile +import zipfile +import tarfile +import subprocess +from packaging.version import Version +from typing import Dict, Any, Optional, List + +from .Check import Check +from ci_tools.functions import get_pip_command +from ci_tools.scenario.generation import create_package_and_install +from ci_tools.parsing import ParsedSetup, extract_package_metadata +from pypi_tools.pypi import retrieve_versions_from_pypi +from ci_tools.variables import set_envvar_defaults +from ci_tools.logging import logger + +# Excluding auto generated applicationinsights and loganalytics +EXCLUDED_PACKAGES = [ + "azure", + "azure-mgmt", + "azure-common", + "azure-applicationinsights", + "azure-loganalytics", +] + + +def unzip_file_to_directory(path_to_zip_file: str, extract_location: str) -> str: + if path_to_zip_file.endswith(".zip"): + with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref: + zip_ref.extractall(extract_location) + extracted_dir = os.path.basename(os.path.splitext(path_to_zip_file)[0]) + return os.path.join(extract_location, extracted_dir) + else: + with tarfile.open(path_to_zip_file) as tar_ref: + tar_ref.extractall(extract_location) + extracted_dir = os.path.basename(path_to_zip_file).replace(".tar.gz", "") + return os.path.join(extract_location, extracted_dir) + + +def extract_whl(dist_dir, version): + # Find whl for the package + path_to_whl = glob.glob(os.path.join(dist_dir, "*{}*.whl".format(version)))[0] + + # Cleanup any existing stale files if any and rename whl file to zip for extraction later + zip_file = path_to_whl.replace(".whl", ".zip") + cleanup(zip_file) + os.rename(path_to_whl, zip_file) + + # Extract renamed gz file to unzipped folder + extract_location = os.path.join(dist_dir, "unzipped") + cleanup(extract_location) + unzip_file_to_directory(zip_file, extract_location) + return extract_location + + +def verify_whl_root_directory( + dist_dir: str, expected_top_level_module: str, parsed_pkg: ParsedSetup, executable: str +) -> bool: + # Verify metadata compatibility with prior version + version: str = parsed_pkg.version + metadata: Dict[str, Any] = extract_package_metadata(get_path_to_zip(dist_dir, version)) + prior_version = get_prior_version(parsed_pkg.name, version) + if prior_version: + if not verify_prior_version_metadata(parsed_pkg.name, prior_version, metadata, executable): + return False + + # This method ensures root directory in whl is the directory indicated by our top level namespace + extract_location = extract_whl(dist_dir, version) + root_folders = os.listdir(extract_location) + + # check for non 'azure' folder as root folder + non_azure_folders = [d for d in root_folders if d != expected_top_level_module and not d.endswith(".dist-info")] + + if non_azure_folders: + logging.error( + "whl has following incorrect directory at root level [%s]", + non_azure_folders, + ) + return False + else: + return True + + +def cleanup(path): + # This function deletes all files and cleanup the directory if it exists + if os.path.exists(path): + if os.path.isdir(path): + shutil.rmtree(path) + else: + os.remove(path) + + +def should_verify_package(package_name): + return package_name not in EXCLUDED_PACKAGES and "nspkg" not in package_name and "-mgmt" not in package_name + + +def get_prior_version(package_name: str, current_version: str) -> Optional[str]: + """Get prior stable version if it exists, otherwise get prior preview version, else return None.""" + try: + all_versions = retrieve_versions_from_pypi(package_name) + current_ver = Version(current_version) + prior_versions = [Version(v) for v in all_versions if Version(v) < current_ver] + if not prior_versions: + return None + + # Try stable versions first + stable_versions = [v for v in prior_versions if not v.is_prerelease] + if stable_versions: + return str(max(stable_versions)) + + # Fall back to preview versions + preview_versions = [v for v in prior_versions if v.is_prerelease] + return str(max(preview_versions)) if preview_versions else None + except Exception: + return None + + +def verify_prior_version_metadata( + package_name: str, + prior_version: str, + current_metadata: Dict[str, Any], + executable: str, + package_type: str = "*.whl", +) -> bool: + """Download prior version and verify metadata compatibility.""" + cmd = get_pip_command(executable) + + with tempfile.TemporaryDirectory() as tmp_dir: + try: + is_binary = "--only-binary=:all:" if package_type == "*.whl" else "--no-binary=:all:" + + # pip download is not supported by uv + if cmd[0] == "uv": + cmd += ["install", "--target", tmp_dir, "--no-deps", is_binary, f"{package_name}=={prior_version}"] + else: + cmd += ["download", "--no-deps", is_binary, f"{package_name}=={prior_version}", "--dest", tmp_dir] + + subprocess.run( + cmd, + check=True, + capture_output=True, + ) + + if cmd[0] == "uv": + package_path = glob.glob(os.path.join(tmp_dir, package_name.replace("-", "_") + "-*"))[0] + if not package_path: + return True + prior_metadata: Dict[str, Any] = extract_package_metadata(package_path) + else: + zip_files = glob.glob(os.path.join(tmp_dir, package_type)) + # If no match and we're not constrained to wheel-only, attempt legacy sdist (zip) once. + if not zip_files and package_type != "*.whl": + zip_files = glob.glob(os.path.join(tmp_dir, "*.zip")) + if not zip_files: # Still nothing -> treat as no prior artifact to compare. + return True + prior_metadata: Dict[str, Any] = extract_package_metadata(zip_files[0]) + + is_compatible = verify_metadata_compatibility(current_metadata, prior_metadata) + + return is_compatible + except Exception: + return True + + +def verify_metadata_compatibility(current_metadata: Dict[str, Any], prior_metadata: Dict[str, Any]) -> bool: + """Verify that all keys from prior version metadata are present in current version. + + Special handling: homepage/repository keys are exempt from prior compatibility check, + but current version must have at least one of them. + """ + if not current_metadata: + return False + # Check that current version has at least one homepage or repository URL + repo_urls = ["homepage", "repository"] + current_keys_lower = {k.lower() for k in current_metadata.keys()} + if not any(key in current_keys_lower for key in repo_urls): + logging.error(f"Current metadata must contain at least one of: {repo_urls}") + return False + + if not prior_metadata: + return True + + # For backward compatibility check, exclude homepage/repository from prior requirements + prior_keys_filtered = {k for k in prior_metadata.keys() if k.lower() not in repo_urls} + current_keys = set(current_metadata.keys()) + + is_compatible = prior_keys_filtered.issubset(current_keys) + if not is_compatible: + missing_keys = prior_keys_filtered - current_keys + logging.error("Metadata compatibility failed. Missing keys: %s", missing_keys) + return is_compatible + + +def get_path_to_zip(dist_dir: str, version: str, package_type: str = "*.whl") -> str: + return glob.glob(os.path.join(dist_dir, "**", "*{}{}".format(version, package_type)), recursive=True)[0] + + +class verify_whl(Check): + def __init__(self) -> None: + super().__init__() + + def register( + self, subparsers: "argparse._SubParsersAction", parent_parsers: Optional[List[argparse.ArgumentParser]] = None + ) -> None: + """Register the verify_whl check. The verify_whl check verifies that the root directory in whl is azure, and verifies manifest so that all directories in source are included in sdist.""" + parents = parent_parsers or [] + p = subparsers.add_parser( + "verifywhl", + parents=parents, + help="Verify directories included in whl, contents in manifest file, and metadata compatibility", + ) + p.set_defaults(func=self.run) + + def run(self, args: argparse.Namespace) -> int: + """Run the verify_whl check command.""" + logger.info("Running verify_whl check...") + + set_envvar_defaults() + targeted = self.get_targeted_directories(args) + + results: List[int] = [] + + for parsed in targeted: + package_dir = parsed.folder + package_name = parsed.name + executable, staging_directory = self.get_executable(args.isolate, args.command, sys.executable, package_dir) + logger.info(f"Processing {package_name} for verify_whl check") + + top_level_module = parsed.namespace.split(".")[0] + + self.install_dev_reqs(executable, args, package_dir) + + create_package_and_install( + distribution_directory=staging_directory, + target_setup=package_dir, + skip_install=False, + cache_dir=None, + work_dir=staging_directory, + force_create=False, + package_type="wheel", + pre_download_disabled=False, + python_executable=executable, + ) + + if should_verify_package(package_name): + logger.info(f"Verifying whl for package: {package_name}") + if verify_whl_root_directory(staging_directory, top_level_module, parsed, executable): + logger.info(f"Verified whl for package {package_name}") + else: + logger.error(f"Failed to verify whl for package {package_name}") + results.append(1) + + return max(results) if results else 0