From d22d916719eb7daff8455a01d216d65f81899a9e Mon Sep 17 00:00:00 2001 From: Andy Lugo Date: Tue, 9 Sep 2025 20:04:19 +0000 Subject: [PATCH 001/693] [ROCm] Add specific compile options for CK SDPA (#161759) Updates CK version and adds CK specific compilation options Pull Request resolved: https://github.com/pytorch/pytorch/pull/161759 Approved by: https://github.com/jeffdaily --- aten/src/ATen/CMakeLists.txt | 89 ++++++++++++++++++- .../hip/flash_attn/ck/launch_kernel_pt.hpp | 9 +- caffe2/CMakeLists.txt | 4 + third_party/composable_kernel | 2 +- 4 files changed, 94 insertions(+), 10 deletions(-) diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt index 6c095680733fe..bbf79491e2d3d 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt @@ -1,6 +1,7 @@ cmake_minimum_required(VERSION 3.27 FATAL_ERROR) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) + if(NOT MSVC) string(APPEND CMAKE_CXX_FLAGS " -Wno-ignored-qualifiers") string(APPEND CMAKE_C_FLAGS " -Wno-ignored-qualifiers") @@ -195,14 +196,94 @@ if(USE_FLASH_ATTENTION) endif() endif() message(STATUS "USE_ROCM_CK_SDPA is set; building PyTorch with CK SDPA enabled") + + # CK SDPA sources require specific compilation flags + set(CK_SDPA_EXTRA_HIPCC_FLAGS + -fno-autolink + -fhip-new-launch-api + -fgnuc-version=4.2.1 + -fno-implicit-modules + -fskip-odr-check-in-gmf + -fcxx-exceptions + -fexceptions + -fcolor-diagnostics + -faddrsig + -fno-rounding-math + -mconstructor-aliases + -mllvm + -amdgpu-internalize-symbols + -fvisibility=hidden + -Wno-float-equal + -fgpu-flush-denormals-to-zero + -Wno-unused-parameter) + + #TODO: The following flags are specific to 8-bit width types which are not integrated via CK yet. + # Add once that support is integrated + #check_cxx_compiler_flag("-fno-offload-uniform-block" HAS_NO_OFFLOAD_UNIFORM_BLOCK) + #if(HAS_NO_OFFLOAD_UNIFORM_BLOCK) + # list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -fno-offload-uniform-block) + #endif() + #check_cxx_compiler_flag("-mllvm --lsr-drop-solution=1" HAS_LSR_DROP_SOLUTION) + #if(HAS_LSR_DROP_SOLUTION) + # list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -mllvm --lsr-drop-solution=1) + #endif() + #check_cxx_compiler_flag("-mllvm -enable-post-misched=0" HAS_ENABLE_POST_MISCHED) + #if(HAS_ENABLE_POST_MISCHED) + # list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -mllvm -enable-post-misched=0) + #endif() + #set(check-coerce) + #check_cxx_compiler_flag(" -mllvm -amdgpu-coerce-illegal-types=1" check-coerce) + #if(check-coerce) + # list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -mllvm -amdgpu-coerce-illegal-types=1) + #endif() + + list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -mllvm -amdgpu-early-inline-all=true) + list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -mllvm -amdgpu-function-calls=false) + + # Additional CK compiler flags + set(CK_SDPA_EXTRA_HIPCC_OPTIONS + CK_ENABLE_BF16 + CK_ENABLE_BF8 + CK_ENABLE_FP16 + CK_ENABLE_FP32 + CK_ENABLE_FP64 + CK_ENABLE_FP8 + CK_ENABLE_INT8 + CK_USE_FNUZ_FP8 + CK_USE_GFX94 + CK_USE_XDL + __HIP_PLATFORM_AMD__=1 + __HIP_PLATFORM_HCC__=1 + CK_TILE_FMHA_FWD_FAST_EXP2=1 + CK_TILE_FMHA_FWD_SPLITKV_API=1 + CK_TILE_FMHA_FWD_APPENDKV_API=1 + CK_TILE_FMHA_FWD_PAGEDKV_API=1 + __GCC_HAVE_DWARF2_CFI_ASM=1 + USE_ROCM_CK_SDPA) + message(STATUS "Generating CK kernel instances...") add_subdirectory(native/transformers/hip/flash_attn/ck) - file(GLOB flash_attention_hip_ck_hip "native/transformers/hip/flash_attn/ck/*.hip") - list(APPEND native_transformers_hip_hip ${flash_attention_hip_ck_hip}) # FAv3 Generation add_subdirectory(native/transformers/hip/flash_attn/ck/fav_v3) - file(GLOB flash_attention_v3_hip "native/transformers/hip/flash_attn/ck/fav_v3/*.hip") - list(APPEND native_transformers_hip_hip ${flash_attention_v3_hip}) + file(GLOB ck_sdpa_sources_hip + "native/transformers/hip/flash_attn/ck/*.hip" + "native/transformers/hip/flash_attn/ck/fav_v3/*.hip") + + set_source_files_properties(${ck_sdpa_sources_hip} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) + hip_add_library(ck_sdpa STATIC + ${ck_sdpa_sources_hip} + HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${CK_SDPA_EXTRA_HIPCC_FLAGS}) + set_target_properties(ck_sdpa PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(ck_sdpa PUBLIC ${CK_SDPA_EXTRA_HIPCC_OPTIONS}) + target_include_directories(ck_sdpa PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include + ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/example/ck_tile/01_fmha + ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel + ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/aiter/csrc/include + ${CMAKE_CURRENT_SOURCE_DIR}/native/transformers/hip/flash_attn/ck + ) + endif() file(GLOB flash_attention_hip_aot_hip "native/transformers/hip/flash_attn/aot/*.hip") file(GLOB flash_attention_src_hip_hip "native/transformers/hip/flash_attn/src/*.hip") diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/ck/launch_kernel_pt.hpp b/aten/src/ATen/native/transformers/hip/flash_attn/ck/launch_kernel_pt.hpp index 400da17426f1d..f4e1ef71f5a98 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/launch_kernel_pt.hpp +++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/launch_kernel_pt.hpp @@ -8,9 +8,9 @@ namespace ck_tile { // Added by hipification to become a no-op on non supported architectures -template +template #if CK_TILE_USE_LAUNCH_BOUNDS -__launch_bounds__(MaxThreadPerBlock, MinBlockPerCu) +__launch_bounds__(Kernel::kBlockSize, MinBlockPerCu) #endif __global__ void kentry_pt(Args... args) { @@ -29,14 +29,13 @@ __launch_bounds__(MaxThreadPerBlock, MinBlockPerCu) // // the "static __device__ operator()(some_arg)" is the entry point of KernelImpl // -template CK_TILE_HOST auto make_kernel_pt(KernelImpl /*f*/, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args) { - const auto kernel = kentry_pt; + const auto kernel = kentry_pt; return [=](const stream_config& s) { kernel<<>>(args...); diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 4cd773bc16123..9c75baa0bf947 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1762,6 +1762,10 @@ if(USE_ROCM) target_link_libraries(torch_hip PUBLIC torch_cpu_library ${Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS}) target_link_libraries(torch_hip PRIVATE ${Caffe2_HIP_DEPENDENCY_LIBS}) + if(USE_ROCM_CK_SDPA) + target_link_libraries(torch_hip PRIVATE ck_sdpa) + endif() + if(USE_FBGEMM_GENAI) if(USE_ROCM) target_link_libraries(torch_hip PRIVATE fbgemm_genai) diff --git a/third_party/composable_kernel b/third_party/composable_kernel index 7fe50dc3da206..de61e55493826 160000 --- a/third_party/composable_kernel +++ b/third_party/composable_kernel @@ -1 +1 @@ -Subproject commit 7fe50dc3da2069d6645d9deb8c017a876472a977 +Subproject commit de61e554938265a5d17a1bba8c148457125e80cd From b477fb106f4350428c2e94ee94680a740ed6a52b Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Tue, 9 Sep 2025 20:04:54 +0000 Subject: [PATCH 002/693] [ROCm] enable grouped gemm fallback (#162419) Enables bf16 group gemm alternative path as described in #161366 Fast path will be enabled in future through CK integration. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162419 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily --- aten/src/ATen/native/cuda/Blas.cpp | 20 ++++++-------------- test/test_matmul_cuda.py | 6 ++---- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp index 23447c7e09b3f..1dab8c19c7006 100644 --- a/aten/src/ATen/native/cuda/Blas.cpp +++ b/aten/src/ATen/native/cuda/Blas.cpp @@ -1080,16 +1080,6 @@ static bool _scaled_mm_allowed_device(bool sm90_only=false, bool sm100_only=fals #endif } -static bool _grouped_mm_allowed_device() { -#ifdef USE_ROCM - return false; -#else - auto dprops = at::cuda::getCurrentDeviceProperties(); - // CUDA capability 8.0 and greater - return dprops->major >= 8; -#endif -} - #ifdef USE_ROCM static bool _scaled_mm_is_fnuz() { return at::detail::getCUDAHooks().isGPUArch({"gfx942"}); @@ -1786,14 +1776,19 @@ Tensor _grouped_mm_cuda(const Tensor& mat_a, const Tensor& mat_b, const std::optional& offs, const std::optional& bias, std::optional out_dtype) { -#ifndef USE_ROCM _grouped_mm_validate_inputs(mat_a, mat_b, offs, bias, out_dtype); bool a_b_and_out_are_bf16 = ( mat_a.dtype() == at::kBFloat16 && mat_b.dtype() == at::kBFloat16 && out_dtype.value_or(at::kBFloat16) == at::kBFloat16 ); +#ifndef USE_ROCM bool use_fast_path = _scaled_mm_allowed_device(/*sm90_only*/true, /*sm100_only*/true) && a_b_and_out_are_bf16; +#else + // _scaled_mm_allowed_device is used here within _grouped_mm_cuda which seems incorrect since scale is not used. + // the _grouped_mm_fallback should be safe for any ROCm GPU since it's just calling typical mm/bmm + bool use_fast_path = false; +#endif const auto out_dtype_ = _resolve_grouped_mm_out_dtype(mat_a, mat_b, out_dtype); Tensor out = create_grouped_gemm_output_tensor(mat_a, mat_b, offs, out_dtype_); if (use_fast_path) { @@ -1803,9 +1798,6 @@ std::optional out_dtype) { _grouped_mm_fallback(mat_a, mat_b, offs, bias, out_dtype, out); } return out; -#else - TORCH_CHECK(false, "grouped gemm is not supported on ROCM") -#endif } Tensor _bmm_dtype_cuda(const Tensor& batch1, const Tensor& batch2, const at::ScalarType out_dtype) { diff --git a/test/test_matmul_cuda.py b/test/test_matmul_cuda.py index f72274c407839..5d76ac383e8d8 100644 --- a/test/test_matmul_cuda.py +++ b/test/test_matmul_cuda.py @@ -316,7 +316,6 @@ def grouped_mm_helper(self, alist, blist, gOlist, agradlist, bgradlist, outlist) self.assertEqual(agrad, a.grad) self.assertEqual(bgrad, b.grad) - @unittest.skipIf(TEST_WITH_ROCM, "ROCm doesn't support CUTLASS") @xfailIfSM120OrLater @unittest.skipIf(not SM80OrLater, "Grouped gemm supported only on SM80 or greater") @parametrize("strided", [False, True]) @@ -355,7 +354,6 @@ def test_grouped_gemm_2d_2d(self, strided, a_row_major, b_row_major, dtype): start = offs_cpu[i] self.grouped_mm_helper(alist, blist, gO, agradlist, bgradlist, out) - @unittest.skipIf(TEST_WITH_ROCM, "ROCm doesn't support CUTLASS") @xfailIfSM120OrLater @unittest.skipIf(not SM80OrLater, "Grouped gemm supported only on SM80 or greater") @parametrize("strided", [False, True]) @@ -412,7 +410,6 @@ def test_grouped_gemm_2d_3d(self, strided, a_row_major, b_row_major, dtype): self.grouped_mm_helper(alist, b, gOlist, agradlist, bgradlist, outlist) - @unittest.skipIf(TEST_WITH_ROCM, "ROCm doesn't support CUTLASS") @xfailIfSM120OrLater @unittest.skipIf(not SM80OrLater, "Grouped gemm supported only on SM80 or greater") @parametrize("strided", [False, True]) @@ -447,7 +444,6 @@ def test_grouped_gemm_3d_3d(self, strided, a_row_major, b_row_major, dtype): out.backward(gO) self.grouped_mm_helper(a, b, gO, a.grad, b.grad, out) - @unittest.skipIf(TEST_WITH_ROCM, "ROCm doesn't support CUTLASS") @xfailIfSM120OrLater @unittest.skipIf(not SM80OrLater, "Grouped gemm supported only on SM80 or greater") @parametrize("strided", [False, True]) @@ -455,6 +451,8 @@ def test_grouped_gemm_3d_3d(self, strided, a_row_major, b_row_major, dtype): @parametrize("b_row_major", [False, True]) @dtypes(torch.bfloat16, torch.float32, torch.float16) def test_grouped_gemm_3d_2d(self, strided, a_row_major, b_row_major, dtype): + if TEST_WITH_ROCM and a_row_major and b_row_major and dtype in [torch.bfloat16, torch.float16]: + self.skipTest("failed using hipblaslt on rocm 6.4.2") device = "cuda" s_int = int(strided) m, n, k, n_groups = 16, 32, 64, 4 From 14744e1ab2f3b9226cc2f7d6a05311826e34354a Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 9 Sep 2025 20:38:15 +0000 Subject: [PATCH 003/693] [Release 2.9] Add compatibility matrix, Version Bump (#162526) Release 2.9 1. Add release compatibility matrix 2. Add version bump for 2.10 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162526 Approved by: https://github.com/malfet --- RELEASE.md | 1 + version.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 047bb10161f71..52371e73f0a6d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -50,6 +50,7 @@ Following is the Release Compatibility Matrix for PyTorch releases: | PyTorch version | Python | C++ | Stable CUDA | Experimental CUDA | Stable ROCm | | --- | --- | --- | --- | --- | --- | +| 2.9 | >=3.10, <=(3.14, 3.14t experimental) | C++17 | CUDA 12.6 (CUDNN 9.10.2.21), CUDA 12.8 (CUDNN 9.10.2.21) | CUDA 13.0 (CUDNN 9.13.0.50) | ROCm 6.4 | | 2.8 | >=3.9, <=3.13, (3.13t experimental) | C++17 | CUDA 12.6 (CUDNN 9.10.2.21), CUDA 12.8 (CUDNN 9.10.2.21) | CUDA 12.9 (CUDNN 9.10.2.21) | ROCm 6.4 | | 2.7 | >=3.9, <=3.13, (3.13t experimental) | C++17 | CUDA 11.8 (CUDNN 9.1.0.70), CUDA 12.6 (CUDNN 9.5.1.17) | CUDA 12.8 (CUDNN 9.7.1.26) | ROCm 6.3 | | 2.6 | >=3.9, <=3.13, (3.13t experimental) | C++17 | CUDA 11.8, CUDA 12.4 (CUDNN 9.1.0.70) | CUDA 12.6 (CUDNN 9.5.1.17) | ROCm 6.2.4 | diff --git a/version.txt b/version.txt index 03e905f0db5fe..1e8c33284d92d 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -2.9.0a0 +2.10.0a0 From 8922bbcaab18a462f3414c52904d7497a10ba8d8 Mon Sep 17 00:00:00 2001 From: Ke Wen Date: Tue, 9 Sep 2025 09:21:47 -0700 Subject: [PATCH 004/693] Use same NVSHMEM version across CUDA builds (#162206) #161321 bumped NVSHMEM version to 3.3.24 for CUDA 13, leaving CUDA 12 with 3.3.20. This PR bumps the NVSHMEM version to 3.3.24 for CUDA 12 as well. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162206 Approved by: https://github.com/tinglvv, https://github.com/Skylion007 --- .../scripts/generate_binary_build_matrix.py | 4 +-- ...linux-aarch64-binary-manywheel-nightly.yml | 28 +++++++++---------- .../generated-linux-binary-manywheel-main.yml | 2 +- ...nerated-linux-binary-manywheel-nightly.yml | 28 +++++++++---------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index 4a4f8a65f684d..a3e65b340f649 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -54,7 +54,7 @@ "nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | " + "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'" @@ -71,7 +71,7 @@ "nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | " + "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'" diff --git a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml index 860ee21cda6a7..8bbcf1138e46d 100644 --- a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml @@ -132,7 +132,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_10-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -178,7 +178,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_10-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -335,7 +335,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_11-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -381,7 +381,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_11-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -538,7 +538,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_12-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -584,7 +584,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_12-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -741,7 +741,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -787,7 +787,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -944,7 +944,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13t-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -990,7 +990,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13t-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1147,7 +1147,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1193,7 +1193,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1350,7 +1350,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14t-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1396,7 +1396,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14t-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/generated-linux-binary-manywheel-main.yml b/.github/workflows/generated-linux-binary-manywheel-main.yml index ec08b2c78eb67..a33d84c057cc8 100644 --- a/.github/workflows/generated-linux-binary-manywheel-main.yml +++ b/.github/workflows/generated-linux-binary-manywheel-main.yml @@ -60,7 +60,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_12-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_12-cuda12_8-test: # Testing diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml index 8a581a1f21fe1..1fa68ad32f81b 100644 --- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml @@ -127,7 +127,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_10-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_10-cuda12_6-test: # Testing @@ -193,7 +193,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_10-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_10-cuda12_8-test: # Testing @@ -719,7 +719,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_11-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_11-cuda12_6-test: # Testing @@ -785,7 +785,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_11-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_11-cuda12_8-test: # Testing @@ -1311,7 +1311,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_12-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_12-cuda12_6-test: # Testing @@ -1377,7 +1377,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_12-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_12-cuda12_8-test: # Testing @@ -1903,7 +1903,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13-cuda12_6-test: # Testing @@ -1969,7 +1969,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13-cuda12_8-test: # Testing @@ -2495,7 +2495,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13t-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13t-cuda12_6-test: # Testing @@ -2561,7 +2561,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13t-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13t-cuda12_8-test: # Testing @@ -3087,7 +3087,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_14-cuda12_6-test: # Testing @@ -3153,7 +3153,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_14-cuda12_8-test: # Testing @@ -3679,7 +3679,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14t-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_14t-cuda12_6-test: # Testing @@ -3745,7 +3745,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14t-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_14t-cuda12_8-test: # Testing From e2545487de3dbbe663e3f0adb699547a14da0f6a Mon Sep 17 00:00:00 2001 From: Rob Timpe Date: Tue, 9 Sep 2025 17:05:41 +0000 Subject: [PATCH 005/693] [dynamo] Graph break on on user-defined class in compiled region (#161670) Currently, user-defined classes inside of a compiled frame will cause the whole frame to be skipped by dynamo. This change defers the Unsupported exception until the __build_class__ builtin is actually called, which allows a graph break to be inserted. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161670 Approved by: https://github.com/williamwen42, https://github.com/guilhermeleobas --- test/dynamo/test_error_messages.py | 12 ++++++------ test/dynamo/test_misc.py | 16 ++++++++++++++++ ...p_True => AOTFxirTestCase.test_aoti_fx_const} | 0 .../CPython313-test_bool-BoolTest.test_blocked | 0 ...on313-test_bool-BoolTest.test_convert_to_bool | 0 ...t_collections-TestCollectionABCs.test_Mapping | 0 ...tions-TestCollectionABCs.test_Sequence_mixins | 0 ...-test_collections-TestCollectionABCs.test_Set | 0 ...ons-TestCollectionABCs.test_Set_from_iterable | 0 ...ABCs.test_Set_interoperability_with_real_sets | 0 ...ollections-TestCollectionABCs.test_issue16373 | 0 ...st_collections-TestCounter.test_copy_subclass | 0 ...medTuple.test_namedtuple_subclass_issue_24931 | 0 ...llections-TestOneTrickPonyABCs.test_Generator | 0 ...-test_contextlib-ClosingTestCase.test_closing | 0 ...contextlib-ClosingTestCase.test_closing_error | 0 ...rTestCase.test_contextmanager_except_stopiter | 0 ...textlib-ContextManagerTestCase.test_nokeepref | 0 ...tAbstractContextManager.test_exit_is_abstract | 0 ...textlib-TestAbstractContextManager.test_slots | 0 ...b-TestContextDecorator.test_decorating_method | 0 ...ntextlib-TestContextDecorator.test_typo_enter | 0 ...ontextlib-TestContextDecorator.test_typo_exit | 0 ...-TestExitStack.test_dont_reraise_RuntimeError | 0 ...t_contextlib-TestExitStack.test_enter_context | 0 ...xtlib-TestExitStack.test_enter_context_errors | 0 ...tStack.test_exit_exception_chaining_reference | 0 ...ack.test_exit_exception_explicit_none_context | 0 ...contextlib-TestExitStack.test_instance_bypass | 0 ...on313-test_contextlib-TestExitStack.test_push | 0 ...faultdict-TestDefaultDict.test_recursive_repr | 0 .../CPython313-test_dict-DictTest.test_bad_key | 0 ...st_dict-DictTest.test_copy_maintains_tracking | 0 ...ict-DictTest.test_dict_contain_use_after_free | 0 ...on313-test_dict-DictTest.test_dict_copy_order | 0 .../CPython313-test_dict-DictTest.test_eq | 0 ...ictTest.test_equal_operator_modifying_operand | 0 ...ictTest.test_errors_in_view_containment_check | 0 ...test_fromkeys_operator_modifying_dict_operand | 0 ....test_fromkeys_operator_modifying_set_operand | 0 .../CPython313-test_dict-DictTest.test_getitem | 0 ...3-test_dict-DictTest.test_init_use_after_free | 0 ...tTest.test_instance_dict_getattr_str_subclass | 0 ..._dict-DictTest.test_invalid_keyword_arguments | 0 ...n313-test_dict-DictTest.test_merge_and_mutate | 0 .../CPython313-test_dict-DictTest.test_missing | 0 ...on313-test_dict-DictTest.test_mutating_lookup | 0 ...t_object_set_item_single_instance_non_str_key | 0 ...tTest.test_oob_indexing_dictiter_iternextitem | 0 .../CPython313-test_dict-DictTest.test_pop | 0 .../CPython313-test_dict-DictTest.test_resize2 | 0 ...test_reverse_iterator_for_shared_shared_dicts | 0 ...CPython313-test_dict-DictTest.test_setdefault | 0 ...313-test_dict-DictTest.test_setdefault_atomic | 0 ...t_dict-DictTest.test_setitem_atomic_at_resize | 0 ...Test.test_splittable_to_generic_combinedtable | 0 ...313-test_dict-DictTest.test_splittable_update | 0 ...CPython313-test_dict-DictTest.test_str_nonstr | 0 ...thon313-test_dict-DictTest.test_views_mapping | 0 ..._float-GeneralFloatCases.test_floatconversion | 0 ...t-GeneralFloatCases.test_keywords_in_subclass | 0 ...eneralFloatCases.test_non_numeric_input_types | 0 ...313-test_float-HexFloatTestCase.test_subclass | 0 ...test_int-IntTestCases.test_int_base_indexable | 0 ...nt-IntTestCases.test_int_returns_int_subclass | 0 ...int-IntTestCases.test_int_subclass_with_index | 0 ...t_int-IntTestCases.test_int_subclass_with_int | 0 ...n313-test_int-IntTestCases.test_intconversion | 0 ...int-IntTestCases.test_non_numeric_input_types | 0 .../CPython313-test_iter-TestCase.test_3720 | 0 ...test_iter-TestCase.test_ref_counting_behavior | 0 ...thon313-test_iter-TestCase.test_stop_sequence | 0 ...-test_iter-TestCase.test_unicode_join_endcase | 0 ...ython313-test_list-ListTest.test_constructors | 0 ...hon313-test_list-ListTest.test_contains_order | 0 ...istTest.test_equal_operator_modifying_operand | 0 .../CPython313-test_list-ListTest.test_extend | 0 ...-test_list-ListTest.test_keywords_in_subclass | 0 ...313-test_list-ListTest.test_no_comdat_folding | 0 ...Python313-test_list-ListTest.test_repr_mutate | 0 .../CPython313-test_math-MathTests.testCeil | 0 .../CPython313-test_math-MathTests.testFloor | 0 ...Python313-test_math-MathTests.test_issue39871 | 0 ...on313-test_math-MathTests.test_sumprod_stress | 0 .../CPython313-test_math-MathTests.test_trunc | 0 ...ator-CCOperatorPickleTestCase.test_attrgetter | 0 ...or-CCOperatorPickleTestCase.test_methodcaller | 0 ...st_operator-COperatorTestCase.test_attrgetter | 0 ...on313-test_operator-COperatorTestCase.test_eq | 0 ...13-test_operator-COperatorTestCase.test_index | 0 ...-test_operator-COperatorTestCase.test_inplace | 0 ...t_operator-COperatorTestCase.test_length_hint | 0 ...on313-test_operator-COperatorTestCase.test_ne | 0 ...313-test_operator-COperatorTestCase.test_not_ | 0 ...13-test_operator-COperatorTestCase.test_truth | 0 ...tor-CPyOperatorPickleTestCase.test_attrgetter | 0 ...r-CPyOperatorPickleTestCase.test_methodcaller | 0 ...tor-PyCOperatorPickleTestCase.test_attrgetter | 0 ...r-PyCOperatorPickleTestCase.test_methodcaller | 0 ...t_operator-PyOperatorTestCase.test_attrgetter | 0 ...3-test_operator-PyOperatorTestCase.test_index | 0 ...test_operator-PyOperatorTestCase.test_inplace | 0 ..._operator-PyOperatorTestCase.test_length_hint | 0 ...-test_operator-PyOperatorTestCase.test_matmul | 0 ...operator-PyOperatorTestCase.test_methodcaller | 0 ...13-test_operator-PyOperatorTestCase.test_not_ | 0 ...or-PyPyOperatorPickleTestCase.test_attrgetter | 0 ...-PyPyOperatorPickleTestCase.test_methodcaller | 0 ...nBuiltinDictTests.test_delitem_hash_collision | 0 ...nBuiltinDictTests.test_highly_nested_subclass | 0 ...DictSubclassTests.test_delitem_hash_collision | 0 ...DictSubclassTests.test_highly_nested_subclass | 0 ...ythonOrderedDictSubclassTests.test_init_calls | 0 ....test_issue119004_change_linked_list_by_clear | 0 ..._issue119004_change_linked_list_by_delete_key | 0 ...ssTests.test_issue119004_change_size_by_clear | 0 ...ts.test_issue119004_change_size_by_delete_key | 0 ...ue119004_change_size_by_delete_key_in_dict_eq | 0 ...ythonOrderedDictSubclassTests.test_issue24347 | 0 ...ythonOrderedDictSubclassTests.test_issue24348 | 0 ...nOrderedDictTests.test_delitem_hash_collision | 0 ...nOrderedDictTests.test_highly_nested_subclass | 0 ..._dict-CPythonOrderedDictTests.test_init_calls | 0 ....test_issue119004_change_linked_list_by_clear | 0 ..._issue119004_change_linked_list_by_delete_key | 0 ...ctTests.test_issue119004_change_size_by_clear | 0 ...ts.test_issue119004_change_size_by_delete_key | 0 ...ue119004_change_size_by_delete_key_in_dict_eq | 0 ..._dict-CPythonOrderedDictTests.test_issue24347 | 0 ..._dict-CPythonOrderedDictTests.test_issue24348 | 0 ...DictSubclassTests.test_delitem_hash_collision | 0 ...DictSubclassTests.test_highly_nested_subclass | 0 ...ythonOrderedDictSubclassTests.test_init_calls | 0 ...ubclassTests.test_issue119004_attribute_error | 0 ...ythonOrderedDictSubclassTests.test_issue24347 | 0 ...ythonOrderedDictSubclassTests.test_issue24348 | 0 ...OrderedDictSubclassTests.test_overridden_init | 0 ...OrderedDictSubclassTests.test_override_update | 0 ...nOrderedDictTests.test_delitem_hash_collision | 0 ...nOrderedDictTests.test_highly_nested_subclass | 0 ...ct-PurePythonOrderedDictTests.test_init_calls | 0 ...redDictTests.test_issue119004_attribute_error | 0 ...ct-PurePythonOrderedDictTests.test_issue24347 | 0 ...ct-PurePythonOrderedDictTests.test_issue24348 | 0 ...rePythonOrderedDictTests.test_overridden_init | 0 ...rePythonOrderedDictTests.test_override_update | 0 ...aryOpsMutating_Set_Set.test_and_with_mutation | 0 ...naryOpsMutating_Set_Set.test_eq_with_mutation | 0 ...naryOpsMutating_Set_Set.test_ge_with_mutation | 0 ...naryOpsMutating_Set_Set.test_gt_with_mutation | 0 ...ryOpsMutating_Set_Set.test_iadd_with_mutation | 0 ...aryOpsMutating_Set_Set.test_ior_with_mutation | 0 ...ryOpsMutating_Set_Set.test_isub_with_mutation | 0 ...Mutating_Set_Set.test_iteration_with_mutation | 0 ...ryOpsMutating_Set_Set.test_ixor_with_mutation | 0 ...naryOpsMutating_Set_Set.test_le_with_mutation | 0 ...naryOpsMutating_Set_Set.test_lt_with_mutation | 0 ...naryOpsMutating_Set_Set.test_ne_with_mutation | 0 ...naryOpsMutating_Set_Set.test_or_with_mutation | 0 ...aryOpsMutating_Set_Set.test_sub_with_mutation | 0 ...aryOpsMutating_Set_Set.test_xor_with_mutation | 0 ...sMutating_Set_Subclass.test_and_with_mutation | 0 ...psMutating_Set_Subclass.test_eq_with_mutation | 0 ...psMutating_Set_Subclass.test_ge_with_mutation | 0 ...psMutating_Set_Subclass.test_gt_with_mutation | 0 ...Mutating_Set_Subclass.test_iadd_with_mutation | 0 ...sMutating_Set_Subclass.test_ior_with_mutation | 0 ...Mutating_Set_Subclass.test_isub_with_mutation | 0 ...ing_Set_Subclass.test_iteration_with_mutation | 0 ...Mutating_Set_Subclass.test_ixor_with_mutation | 0 ...psMutating_Set_Subclass.test_le_with_mutation | 0 ...psMutating_Set_Subclass.test_lt_with_mutation | 0 ...psMutating_Set_Subclass.test_ne_with_mutation | 0 ...psMutating_Set_Subclass.test_or_with_mutation | 0 ...sMutating_Set_Subclass.test_sub_with_mutation | 0 ...sMutating_Set_Subclass.test_xor_with_mutation | 0 ...sMutating_Subclass_Set.test_and_with_mutation | 0 ...psMutating_Subclass_Set.test_eq_with_mutation | 0 ...psMutating_Subclass_Set.test_ge_with_mutation | 0 ...psMutating_Subclass_Set.test_gt_with_mutation | 0 ...Mutating_Subclass_Set.test_iadd_with_mutation | 0 ...sMutating_Subclass_Set.test_ior_with_mutation | 0 ...Mutating_Subclass_Set.test_isub_with_mutation | 0 ...ing_Subclass_Set.test_iteration_with_mutation | 0 ...Mutating_Subclass_Set.test_ixor_with_mutation | 0 ...psMutating_Subclass_Set.test_le_with_mutation | 0 ...psMutating_Subclass_Set.test_lt_with_mutation | 0 ...psMutating_Subclass_Set.test_ne_with_mutation | 0 ...psMutating_Subclass_Set.test_or_with_mutation | 0 ...sMutating_Subclass_Set.test_sub_with_mutation | 0 ...sMutating_Subclass_Set.test_xor_with_mutation | 0 ...ting_Subclass_Subclass.test_and_with_mutation | 0 ...ating_Subclass_Subclass.test_eq_with_mutation | 0 ...ating_Subclass_Subclass.test_ge_with_mutation | 0 ...ating_Subclass_Subclass.test_gt_with_mutation | 0 ...ing_Subclass_Subclass.test_iadd_with_mutation | 0 ...ting_Subclass_Subclass.test_ior_with_mutation | 0 ...ing_Subclass_Subclass.test_isub_with_mutation | 0 ...ubclass_Subclass.test_iteration_with_mutation | 0 ...ing_Subclass_Subclass.test_ixor_with_mutation | 0 ...ating_Subclass_Subclass.test_le_with_mutation | 0 ...ating_Subclass_Subclass.test_lt_with_mutation | 0 ...ating_Subclass_Subclass.test_ne_with_mutation | 0 ...ating_Subclass_Subclass.test_or_with_mutation | 0 ...ting_Subclass_Subclass.test_sub_with_mutation | 0 ...ting_Subclass_Subclass.test_xor_with_mutation | 0 ...est_set-TestFrozenSet.test_container_iterator | 0 ...ython313-test_set-TestFrozenSet.test_deepcopy | 0 .../CPython313-test_set-TestFrozenSet.test_gc | 0 ...-TestFrozenSet.test_subclass_with_custom_hash | 0 ...TestFrozenSetSubclass.test_container_iterator | 0 ...-test_set-TestFrozenSetSubclass.test_deepcopy | 0 ...hon313-test_set-TestFrozenSetSubclass.test_gc | 0 ...stFrozenSetSubclass.test_keywords_in_subclass | 0 ...zenSetSubclass.test_subclass_with_custom_hash | 0 ...Set_Dict.test_difference_update_with_mutation | 0 ...tating_Set_Dict.test_difference_with_mutation | 0 ...t_Dict.test_intersection_update_with_mutation | 0 ...ting_Set_Dict.test_intersection_with_mutation | 0 ...tating_Set_Dict.test_isdisjoint_with_mutation | 0 ...Mutating_Set_Dict.test_issubset_with_mutation | 0 ...tating_Set_Dict.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ..._Dict.test_symmetric_difference_with_mutation | 0 ...odsMutating_Set_Dict.test_union_with_mutation | 0 ...dsMutating_Set_Dict.test_update_with_mutation | 0 ...Set_List.test_difference_update_with_mutation | 0 ...tating_Set_List.test_difference_with_mutation | 0 ...t_List.test_intersection_update_with_mutation | 0 ...ting_Set_List.test_intersection_with_mutation | 0 ...tating_Set_List.test_isdisjoint_with_mutation | 0 ...Mutating_Set_List.test_issubset_with_mutation | 0 ...tating_Set_List.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ..._List.test_symmetric_difference_with_mutation | 0 ...odsMutating_Set_List.test_union_with_mutation | 0 ...dsMutating_Set_List.test_update_with_mutation | 0 ..._Set_Set.test_difference_update_with_mutation | 0 ...utating_Set_Set.test_difference_with_mutation | 0 ...et_Set.test_intersection_update_with_mutation | 0 ...ating_Set_Set.test_intersection_with_mutation | 0 ...utating_Set_Set.test_isdisjoint_with_mutation | 0 ...sMutating_Set_Set.test_issubset_with_mutation | 0 ...utating_Set_Set.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...t_Set.test_symmetric_difference_with_mutation | 0 ...hodsMutating_Set_Set.test_union_with_mutation | 0 ...odsMutating_Set_Set.test_update_with_mutation | 0 ...Subclass.test_difference_update_with_mutation | 0 ...ng_Set_Subclass.test_difference_with_mutation | 0 ...bclass.test_intersection_update_with_mutation | 0 ..._Set_Subclass.test_intersection_with_mutation | 0 ...ng_Set_Subclass.test_isdisjoint_with_mutation | 0 ...ting_Set_Subclass.test_issubset_with_mutation | 0 ...ng_Set_Subclass.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...class.test_symmetric_difference_with_mutation | 0 ...utating_Set_Subclass.test_union_with_mutation | 0 ...tating_Set_Subclass.test_update_with_mutation | 0 ...lass_Set.test_difference_update_with_mutation | 0 ...ng_Subclass_Set.test_difference_with_mutation | 0 ...ss_Set.test_intersection_update_with_mutation | 0 ..._Subclass_Set.test_intersection_with_mutation | 0 ...ng_Subclass_Set.test_isdisjoint_with_mutation | 0 ...ting_Subclass_Set.test_issubset_with_mutation | 0 ...ng_Subclass_Set.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...s_Set.test_symmetric_difference_with_mutation | 0 ...utating_Subclass_Set.test_union_with_mutation | 0 ...tating_Subclass_Set.test_update_with_mutation | 0 ...Subclass.test_difference_update_with_mutation | 0 ...bclass_Subclass.test_difference_with_mutation | 0 ...bclass.test_intersection_update_with_mutation | 0 ...lass_Subclass.test_intersection_with_mutation | 0 ...bclass_Subclass.test_isdisjoint_with_mutation | 0 ...Subclass_Subclass.test_issubset_with_mutation | 0 ...bclass_Subclass.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...class.test_symmetric_difference_with_mutation | 0 ...ng_Subclass_Subclass.test_union_with_mutation | 0 ...g_Subclass_Subclass.test_update_with_mutation | 0 ...n313-test_set-TestSet.test_container_iterator | 0 .../CPython313-test_set-TestSet.test_deepcopy | 0 .../CPython313-test_set-TestSet.test_gc | 0 ...CPython313-test_set-TestSet.test_rich_compare | 0 ...st_set-TestSet.test_subclass_with_custom_hash | 0 ...t_set-TestSetSubclass.test_container_iterator | 0 ...hon313-test_set-TestSetSubclass.test_deepcopy | 0 .../CPython313-test_set-TestSetSubclass.test_gc | 0 ...set-TestSetSubclass.test_keywords_in_subclass | 0 ...13-test_set-TestSetSubclass.test_rich_compare | 0 ...estSetSubclass.test_subclass_with_custom_hash | 0 ...-test_set-TestWeirdBugs.test_merge_and_mutate | 0 ...CPython313-test_sort-TestBase.testStressfully | 0 .../CPython313-test_sort-TestBugs.test_bug453523 | 0 ...rateSortUndecorate.test_key_with_mutating_del | 0 ...tOptimizedCompares.test_unsafe_object_compare | 0 ...hon313-test_tuple-TupleTest.test_constructors | 0 ...n313-test_tuple-TupleTest.test_contains_order | 0 ...est_tuple-TupleTest.test_keywords_in_subclass | 0 ...3-test_tuple-TupleTest.test_no_comdat_folding | 0 ...n313-test_tuple-TupleTest.test_track_subtypes | 0 ...CPython313-test_userdict-UserDictTest.test_eq | 0 ...est_userlist-UserListTest.test_contains_order | 0 ...est_with-ExceptionalTestCase.testErrorsInBool | 0 ...-ExceptionalTestCase.testRaisedStopIteration2 | 0 ...with-FailureTestCase.testEnterAttributeError1 | 0 ...with-FailureTestCase.testEnterAttributeError2 | 0 ...t_with-FailureTestCase.testExitAttributeError | 0 .../TestAutograd.test_anomaly_detect_nan | 0 .../TestAutograd.test_autograd_print_tensor | 0 ...eckpointing_without_reentrant_with_context_fn | 0 ...ograd.test_custom_autograd_repeated_grad_grad | 0 .../TestAutograd.test_inplace_not_requires_grad | 0 .../TestAutograd.test_lobpcg | 0 .../TestAutograd.test_mark_non_differentiable | 0 ...estAutograd.test_mark_non_differentiable_none | 0 ...d.test_naughty_autograd_function_stashing_ctx | 0 .../TestAutograd.test_return_leaf_inplace | 0 ...test_const_fold_basic_one_attr_name_collision | 0 ...t_const_fold_basic_one_attr_no_name_collision | 0 .../TestConstFold.test_const_fold_basic_two_attr | 0 ...ld.test_const_fold_basic_two_attr_three_input | 0 ....test_const_fold_has_inlined_call_module_node | 0 .../TestConstFold.test_const_fold_module_attr | 0 ...estConstFold.test_const_fold_submod_hierarchy | 0 ...tConstFold.test_const_fold_unused_placeholder | 0 .../TestConstFold.test_dict_output | 0 .../TestConstFold.test_fold_module | 0 .../TestConstFold.test_three_outputs | 0 .../TestConstFold.test_two_outputs | 0 ...raced.test_cond_merge_graph_preserves_ph_meta | 0 ...ched_branch_output_dynamic_True_backend_eager | 0 ...test_cond_symint_operands_requires_grad_False | 0 ....test_cond_symint_operands_requires_grad_True | 0 ...rolFlowTraced.test_while_loop_autograd_simple | 0 ...nJIT.test_cpp_frontend_module_python_inter_op | 0 ...cpp_frontend_module_python_inter_op_with_cuda | 0 ...not_raised_when_exception_source_is_submodule | 0 ...mericSuiteCoreAPIs.test_user_defined_function | 0 .../TestFlag.test_writeable_any_base | 0 ...estIndexing.test_broken_sequence_not_nd_index | 0 ...etrizations_and_params_single_param_swap_True | 0 .../TestPrivateUse1.test_backend_type_methods | 0 .../TestPythonDispatch.test_maybe_tuple_bug | 0 .../TestPythonDispatch.test_set_data | 0 ...tch.test_wrapper_subclass_extra_dispatch_keys | 0 ...st_functional_call_member_reference_stateless | 0 ...t_functional_call_member_reference_torch_func | 0 ...lies_module_and_param_specific_decorators_cpu | 0 ...nDeviceTypeCPU.test_ops_composition_names_cpu | 0 ..._applies_op_and_param_specific_decorators_cpu | 0 .../TestTorch.test_as_subclass | 0 .../TestTorch.test_storage_cycle_via_slots | 0 .../TestTorch.test_storage_finalizer_dealloc | 0 .../TestTorch.test_storage_slot_dealloc | 0 .../TestTorch.test_tensor_cycle_via_slots | 0 .../TestTorch.test_tensor_finalizer_dealloc | 0 .../TestTorch.test_tensor_slot_dealloc | 0 ...TestTorchFunctionMode.test_custom_device_type | 0 ...orchFunctionMode.test_disable_enable_subclass | 0 ...tTorchFunctionMode.test_disable_subclass_mode | 0 .../TestTorchFunctionMode.test_factory_override | 0 .../TestTorchFunctionOverride.test_pow_rpow | 0 .../TestAutograd.test_naughty_anomaly_access | 0 test/dynamo_skips/TestPythonPytree.test_key_str | 1 + ...chFunctionMode.test_disable_subclass_not_mode | 0 .../TestTorch.test_storage_cycle_via_slots | 0 .../TestTorch.test_storage_finalizer_dealloc | 0 .../TestTorch.test_storage_slot_dealloc | 0 .../TestTorch.test_tensor_cycle_via_slots | 0 .../TestTorch.test_tensor_finalizer_dealloc | 0 .../TestTorch.test_tensor_slot_dealloc | 0 torch/_dynamo/symbolic_convert.py | 10 +--------- 374 files changed, 24 insertions(+), 15 deletions(-) rename test/dynamo_expected_failures/{TestNNParametrization.test_new_spectral_norm_forward_swap_True => AOTFxirTestCase.test_aoti_fx_const} (100%) create mode 100644 test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_blocked create mode 100644 test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_convert_to_bool create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Mapping create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Sequence_mixins create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_from_iterable create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_interoperability_with_real_sets create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_issue16373 create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCounter.test_copy_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestNamedTuple.test_namedtuple_subclass_issue_24931 create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestOneTrickPonyABCs.test_Generator create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing_error create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_contextmanager_except_stopiter create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_nokeepref create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_exit_is_abstract create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_slots create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_decorating_method create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_enter create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_exit create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_dont_reraise_RuntimeError create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context_errors create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_chaining_reference create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_explicit_none_context create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_instance_bypass create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_push create mode 100644 test/dynamo_expected_failures/CPython313-test_defaultdict-TestDefaultDict.test_recursive_repr create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_bad_key create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_copy_maintains_tracking create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_contain_use_after_free create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_copy_order create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_eq create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_equal_operator_modifying_operand create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_errors_in_view_containment_check create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_dict_operand create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_set_operand create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_getitem create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_init_use_after_free create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_instance_dict_getattr_str_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_invalid_keyword_arguments create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_merge_and_mutate create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_missing create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_mutating_lookup create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_object_set_item_single_instance_non_str_key create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_oob_indexing_dictiter_iternextitem create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_pop create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_resize2 create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_reverse_iterator_for_shared_shared_dicts create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault_atomic create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setitem_atomic_at_resize create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_to_generic_combinedtable create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_update create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_str_nonstr create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_views_mapping create mode 100644 test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_floatconversion create mode 100644 test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_keywords_in_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_non_numeric_input_types create mode 100644 test/dynamo_expected_failures/CPython313-test_float-HexFloatTestCase.test_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_base_indexable create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_returns_int_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_index create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_int create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_intconversion create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_non_numeric_input_types create mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_3720 create mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_ref_counting_behavior create mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_stop_sequence create mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_unicode_join_endcase create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_constructors create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_contains_order create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_equal_operator_modifying_operand create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_extend create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_keywords_in_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_no_comdat_folding create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_repr_mutate create mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.testCeil create mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.testFloor create mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.test_issue39871 create mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.test_sumprod_stress create mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.test_trunc create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_methodcaller create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_eq create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_index create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_inplace create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_length_hint create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_ne create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_not_ create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_truth create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_methodcaller create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_methodcaller create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_index create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_inplace create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_length_hint create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_matmul create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_methodcaller create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_not_ create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_methodcaller create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_delitem_hash_collision create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_highly_nested_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_delitem_hash_collision create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_highly_nested_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_init_calls create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_clear create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_delete_key create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_clear create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key_in_dict_eq create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24347 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24348 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_delitem_hash_collision create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_highly_nested_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_init_calls create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_clear create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_delete_key create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_clear create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key_in_dict_eq create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24347 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24348 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_delitem_hash_collision create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_highly_nested_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_init_calls create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue119004_attribute_error create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24347 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24348 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_overridden_init create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_override_update create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_delitem_hash_collision create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_highly_nested_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_init_calls create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue119004_attribute_error create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24347 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24348 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_overridden_init create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_override_update create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_and_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_eq_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ge_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_gt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iadd_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ior_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_isub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iteration_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ixor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_le_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_lt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ne_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_or_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_sub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_xor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_and_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_eq_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ge_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_gt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iadd_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ior_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_isub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iteration_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ixor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_le_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_lt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ne_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_or_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_sub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_xor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_and_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_eq_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ge_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_gt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iadd_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ior_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_isub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iteration_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ixor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_le_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_lt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ne_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_or_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_sub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_xor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_and_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_eq_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ge_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_gt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iadd_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ior_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_isub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iteration_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ixor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_le_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_lt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ne_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_or_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_sub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_xor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_container_iterator create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_deepcopy create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_gc create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_subclass_with_custom_hash create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_container_iterator create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_deepcopy create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_gc create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_keywords_in_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_subclass_with_custom_hash create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_container_iterator create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_deepcopy create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_gc create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_rich_compare create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_subclass_with_custom_hash create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_container_iterator create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_deepcopy create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_gc create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_keywords_in_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_rich_compare create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_subclass_with_custom_hash create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestWeirdBugs.test_merge_and_mutate create mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestBase.testStressfully create mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestBugs.test_bug453523 create mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestDecorateSortUndecorate.test_key_with_mutating_del create mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestOptimizedCompares.test_unsafe_object_compare create mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_constructors create mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_contains_order create mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_keywords_in_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_no_comdat_folding create mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_track_subtypes create mode 100644 test/dynamo_expected_failures/CPython313-test_userdict-UserDictTest.test_eq create mode 100644 test/dynamo_expected_failures/CPython313-test_userlist-UserListTest.test_contains_order create mode 100644 test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testErrorsInBool create mode 100644 test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testRaisedStopIteration2 create mode 100644 test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError1 create mode 100644 test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError2 create mode 100644 test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testExitAttributeError create mode 100644 test/dynamo_expected_failures/TestAutograd.test_anomaly_detect_nan create mode 100644 test/dynamo_expected_failures/TestAutograd.test_autograd_print_tensor create mode 100644 test/dynamo_expected_failures/TestAutograd.test_checkpointing_without_reentrant_with_context_fn create mode 100644 test/dynamo_expected_failures/TestAutograd.test_custom_autograd_repeated_grad_grad create mode 100644 test/dynamo_expected_failures/TestAutograd.test_inplace_not_requires_grad create mode 100644 test/dynamo_expected_failures/TestAutograd.test_lobpcg create mode 100644 test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable create mode 100644 test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable_none create mode 100644 test/dynamo_expected_failures/TestAutograd.test_naughty_autograd_function_stashing_ctx create mode 100644 test/dynamo_expected_failures/TestAutograd.test_return_leaf_inplace create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_name_collision create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_no_name_collision create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr_three_input create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_has_inlined_call_module_node create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_module_attr create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_submod_hierarchy create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_unused_placeholder create mode 100644 test/dynamo_expected_failures/TestConstFold.test_dict_output create mode 100644 test/dynamo_expected_failures/TestConstFold.test_fold_module create mode 100644 test/dynamo_expected_failures/TestConstFold.test_three_outputs create mode 100644 test/dynamo_expected_failures/TestConstFold.test_two_outputs create mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_merge_graph_preserves_ph_meta create mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_mismatched_branch_output_dynamic_True_backend_eager create mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_False create mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_True create mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_while_loop_autograd_simple create mode 100644 test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op create mode 100644 test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op_with_cuda create mode 100644 test/dynamo_expected_failures/TestFX.test_custom_traceback_not_raised_when_exception_source_is_submodule create mode 100644 test/dynamo_expected_failures/TestFXNumericSuiteCoreAPIs.test_user_defined_function create mode 100644 test/dynamo_expected_failures/TestFlag.test_writeable_any_base create mode 100644 test/dynamo_expected_failures/TestIndexing.test_broken_sequence_not_nd_index create mode 100644 test/dynamo_expected_failures/TestNNParametrization.test_transfer_parametrizations_and_params_single_param_swap_True create mode 100644 test/dynamo_expected_failures/TestPrivateUse1.test_backend_type_methods create mode 100644 test/dynamo_expected_failures/TestPythonDispatch.test_maybe_tuple_bug create mode 100644 test/dynamo_expected_failures/TestPythonDispatch.test_set_data create mode 100644 test/dynamo_expected_failures/TestPythonDispatch.test_wrapper_subclass_extra_dispatch_keys create mode 100644 test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_stateless create mode 100644 test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_torch_func create mode 100644 test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_modules_decorator_applies_module_and_param_specific_decorators_cpu create mode 100644 test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_composition_names_cpu create mode 100644 test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_decorator_applies_op_and_param_specific_decorators_cpu create mode 100644 test/dynamo_expected_failures/TestTorch.test_as_subclass create mode 100644 test/dynamo_expected_failures/TestTorch.test_storage_cycle_via_slots create mode 100644 test/dynamo_expected_failures/TestTorch.test_storage_finalizer_dealloc create mode 100644 test/dynamo_expected_failures/TestTorch.test_storage_slot_dealloc create mode 100644 test/dynamo_expected_failures/TestTorch.test_tensor_cycle_via_slots create mode 100644 test/dynamo_expected_failures/TestTorch.test_tensor_finalizer_dealloc create mode 100644 test/dynamo_expected_failures/TestTorch.test_tensor_slot_dealloc create mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_custom_device_type create mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_enable_subclass create mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_subclass_mode create mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_factory_override create mode 100644 test/dynamo_expected_failures/TestTorchFunctionOverride.test_pow_rpow create mode 100644 test/dynamo_skips/TestAutograd.test_naughty_anomaly_access create mode 100644 test/dynamo_skips/TestPythonPytree.test_key_str create mode 100644 test/dynamo_skips/TestTorchFunctionMode.test_disable_subclass_not_mode create mode 100644 test/inductor_expected_failures/TestTorch.test_storage_cycle_via_slots create mode 100644 test/inductor_expected_failures/TestTorch.test_storage_finalizer_dealloc create mode 100644 test/inductor_expected_failures/TestTorch.test_storage_slot_dealloc create mode 100644 test/inductor_expected_failures/TestTorch.test_tensor_cycle_via_slots create mode 100644 test/inductor_expected_failures/TestTorch.test_tensor_finalizer_dealloc create mode 100644 test/inductor_expected_failures/TestTorch.test_tensor_slot_dealloc diff --git a/test/dynamo/test_error_messages.py b/test/dynamo/test_error_messages.py index 847f3a6fd2166..081ceb5065dfa 100644 --- a/test/dynamo/test_error_messages.py +++ b/test/dynamo/test_error_messages.py @@ -726,14 +726,14 @@ class Foo: Unsupported, lambda: torch.compile(fn, backend="eager", fullgraph=True)(), """\ -LOAD_BUILD_CLASS bytecode not supported - Explanation: Dynamo does not support tracing classes that are defined in the compiled region. - Hint: Move the class definition out of the compiled region. - Hint: It may be possible to write Dynamo tracing rules for this code. Please report an issue to PyTorch if you encounter this graph break often and it is causing performance issues. +Attempted to call function marked as skipped + Explanation: Dynamo does not know how to trace the builtin `builtins.__build_class__.` This function is either a Python builtin (e.g. _warnings.warn) or a third-party C/C++ Python extension (perhaps created with pybind). + Hint: If it is a Python builtin, please file an issue on GitHub so the PyTorch team can add support for it and see the next case for a workaround. + Hint: If it is a third-party C/C++ Python extension, please either wrap it into a PyTorch-understood custom operator (see https://pytorch.org/tutorials/advanced/custom_ops_landing_page.html for more details) or, if it is traceable, use `torch.compiler.allow_in_graph`. - Developer debug context: + Developer debug context: module: builtins, qualname: __build_class__, skip reason: - For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0075.html + For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0007.html from user code: File "test_error_messages.py", line N, in fn diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py index b7fb01be17152..c9a2a0730b08a 100644 --- a/test/dynamo/test_misc.py +++ b/test/dynamo/test_misc.py @@ -12684,6 +12684,22 @@ def f(*args, **kwargs): self.assertRaises(Unsupported, f, []) self.assertRaises(Unsupported, f, "1 + j") + def test_compiled_class_graph_break(self): + counter = CompileCounter() + + @torch.compile(backend=counter, fullgraph=False) + def f(x): + x += 1 + + class C: + pass + + return x.sin() + + x = torch.randn(3) + f(x) + self.assertEqual(counter.frame_count, 2) + class MiscTestsPyTree(torch._inductor.test_case.TestCase): @parametrize_pytree_module diff --git a/test/dynamo_expected_failures/TestNNParametrization.test_new_spectral_norm_forward_swap_True b/test/dynamo_expected_failures/AOTFxirTestCase.test_aoti_fx_const similarity index 100% rename from test/dynamo_expected_failures/TestNNParametrization.test_new_spectral_norm_forward_swap_True rename to test/dynamo_expected_failures/AOTFxirTestCase.test_aoti_fx_const diff --git a/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_blocked b/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_blocked new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_convert_to_bool b/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_convert_to_bool new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Mapping b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Mapping new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Sequence_mixins b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Sequence_mixins new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_from_iterable b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_from_iterable new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_interoperability_with_real_sets b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_interoperability_with_real_sets new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_issue16373 b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_issue16373 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCounter.test_copy_subclass b/test/dynamo_expected_failures/CPython313-test_collections-TestCounter.test_copy_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestNamedTuple.test_namedtuple_subclass_issue_24931 b/test/dynamo_expected_failures/CPython313-test_collections-TestNamedTuple.test_namedtuple_subclass_issue_24931 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestOneTrickPonyABCs.test_Generator b/test/dynamo_expected_failures/CPython313-test_collections-TestOneTrickPonyABCs.test_Generator new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing b/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing_error b/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing_error new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_contextmanager_except_stopiter b/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_contextmanager_except_stopiter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_nokeepref b/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_nokeepref new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_exit_is_abstract b/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_exit_is_abstract new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_slots b/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_slots new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_decorating_method b/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_decorating_method new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_enter b/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_enter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_exit b/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_exit new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_dont_reraise_RuntimeError b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_dont_reraise_RuntimeError new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context_errors b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context_errors new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_chaining_reference b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_chaining_reference new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_explicit_none_context b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_explicit_none_context new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_instance_bypass b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_instance_bypass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_push b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_push new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_defaultdict-TestDefaultDict.test_recursive_repr b/test/dynamo_expected_failures/CPython313-test_defaultdict-TestDefaultDict.test_recursive_repr new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_bad_key b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_bad_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_copy_maintains_tracking b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_copy_maintains_tracking new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_contain_use_after_free b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_contain_use_after_free new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_copy_order b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_copy_order new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_eq b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_eq new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_equal_operator_modifying_operand b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_equal_operator_modifying_operand new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_errors_in_view_containment_check b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_errors_in_view_containment_check new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_dict_operand b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_dict_operand new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_set_operand b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_set_operand new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_getitem b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_getitem new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_init_use_after_free b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_init_use_after_free new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_instance_dict_getattr_str_subclass b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_instance_dict_getattr_str_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_invalid_keyword_arguments b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_invalid_keyword_arguments new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_merge_and_mutate b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_merge_and_mutate new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_missing b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_missing new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_mutating_lookup b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_mutating_lookup new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_object_set_item_single_instance_non_str_key b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_object_set_item_single_instance_non_str_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_oob_indexing_dictiter_iternextitem b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_oob_indexing_dictiter_iternextitem new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_pop b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_pop new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_resize2 b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_resize2 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_reverse_iterator_for_shared_shared_dicts b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_reverse_iterator_for_shared_shared_dicts new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault_atomic b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault_atomic new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setitem_atomic_at_resize b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setitem_atomic_at_resize new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_to_generic_combinedtable b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_to_generic_combinedtable new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_update b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_update new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_str_nonstr b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_str_nonstr new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_views_mapping b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_views_mapping new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_floatconversion b/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_floatconversion new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_keywords_in_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_non_numeric_input_types b/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_non_numeric_input_types new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_float-HexFloatTestCase.test_subclass b/test/dynamo_expected_failures/CPython313-test_float-HexFloatTestCase.test_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_base_indexable b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_base_indexable new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_returns_int_subclass b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_returns_int_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_index b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_index new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_int b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_int new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_intconversion b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_intconversion new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_non_numeric_input_types b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_non_numeric_input_types new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_3720 b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_3720 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_ref_counting_behavior b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_ref_counting_behavior new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_stop_sequence b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_stop_sequence new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_unicode_join_endcase b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_unicode_join_endcase new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_constructors b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_constructors new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_contains_order b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_contains_order new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_equal_operator_modifying_operand b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_equal_operator_modifying_operand new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_extend b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_extend new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_keywords_in_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_no_comdat_folding b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_no_comdat_folding new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_repr_mutate b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_repr_mutate new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.testCeil b/test/dynamo_expected_failures/CPython313-test_math-MathTests.testCeil new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.testFloor b/test/dynamo_expected_failures/CPython313-test_math-MathTests.testFloor new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_issue39871 b/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_issue39871 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_sumprod_stress b/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_sumprod_stress new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_trunc b/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_trunc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_methodcaller new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_eq b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_eq new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_index b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_index new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_inplace b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_inplace new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_length_hint b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_length_hint new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_ne b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_ne new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_not_ b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_not_ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_truth b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_truth new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_methodcaller new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_methodcaller new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_index b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_index new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_inplace b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_inplace new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_length_hint b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_length_hint new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_matmul b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_matmul new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_methodcaller new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_not_ b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_not_ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_methodcaller new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_delitem_hash_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_highly_nested_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_delitem_hash_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_highly_nested_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_init_calls new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_clear new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_delete_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_clear new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key_in_dict_eq b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key_in_dict_eq new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24347 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24348 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_delitem_hash_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_highly_nested_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_init_calls new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_clear new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_delete_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_clear new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key_in_dict_eq b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key_in_dict_eq new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24347 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24348 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_delitem_hash_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_highly_nested_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_init_calls new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue119004_attribute_error b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue119004_attribute_error new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24347 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24348 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_overridden_init b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_overridden_init new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_override_update b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_override_update new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_delitem_hash_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_highly_nested_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_init_calls new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue119004_attribute_error b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue119004_attribute_error new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24347 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24348 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_overridden_init b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_overridden_init new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_override_update b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_override_update new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_and_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_eq_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ge_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_gt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iadd_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ior_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_isub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iteration_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ixor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_le_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_lt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ne_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_or_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_sub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_xor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_and_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_eq_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ge_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_gt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iadd_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ior_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_isub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iteration_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ixor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_le_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_lt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ne_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_or_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_sub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_xor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_and_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_eq_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ge_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_gt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iadd_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ior_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_isub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iteration_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ixor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_le_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_lt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ne_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_or_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_sub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_xor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_and_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_eq_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ge_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_gt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iadd_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ior_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_isub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iteration_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ixor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_le_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_lt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ne_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_or_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_sub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_xor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_container_iterator new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_deepcopy new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_gc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_subclass_with_custom_hash new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_container_iterator new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_deepcopy new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_gc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_keywords_in_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_subclass_with_custom_hash new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_container_iterator new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_deepcopy new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_gc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_rich_compare b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_rich_compare new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_subclass_with_custom_hash new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_container_iterator new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_deepcopy new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_gc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_keywords_in_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_rich_compare b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_rich_compare new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_subclass_with_custom_hash new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestWeirdBugs.test_merge_and_mutate b/test/dynamo_expected_failures/CPython313-test_set-TestWeirdBugs.test_merge_and_mutate new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestBase.testStressfully b/test/dynamo_expected_failures/CPython313-test_sort-TestBase.testStressfully new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestBugs.test_bug453523 b/test/dynamo_expected_failures/CPython313-test_sort-TestBugs.test_bug453523 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestDecorateSortUndecorate.test_key_with_mutating_del b/test/dynamo_expected_failures/CPython313-test_sort-TestDecorateSortUndecorate.test_key_with_mutating_del new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestOptimizedCompares.test_unsafe_object_compare b/test/dynamo_expected_failures/CPython313-test_sort-TestOptimizedCompares.test_unsafe_object_compare new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_constructors b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_constructors new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_contains_order b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_contains_order new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_keywords_in_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_no_comdat_folding b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_no_comdat_folding new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_track_subtypes b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_track_subtypes new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_userdict-UserDictTest.test_eq b/test/dynamo_expected_failures/CPython313-test_userdict-UserDictTest.test_eq new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_userlist-UserListTest.test_contains_order b/test/dynamo_expected_failures/CPython313-test_userlist-UserListTest.test_contains_order new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testErrorsInBool b/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testErrorsInBool new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testRaisedStopIteration2 b/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testRaisedStopIteration2 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError1 b/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError1 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError2 b/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError2 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testExitAttributeError b/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testExitAttributeError new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_anomaly_detect_nan b/test/dynamo_expected_failures/TestAutograd.test_anomaly_detect_nan new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_autograd_print_tensor b/test/dynamo_expected_failures/TestAutograd.test_autograd_print_tensor new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_checkpointing_without_reentrant_with_context_fn b/test/dynamo_expected_failures/TestAutograd.test_checkpointing_without_reentrant_with_context_fn new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_custom_autograd_repeated_grad_grad b/test/dynamo_expected_failures/TestAutograd.test_custom_autograd_repeated_grad_grad new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_inplace_not_requires_grad b/test/dynamo_expected_failures/TestAutograd.test_inplace_not_requires_grad new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_lobpcg b/test/dynamo_expected_failures/TestAutograd.test_lobpcg new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable b/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable_none b/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable_none new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_naughty_autograd_function_stashing_ctx b/test/dynamo_expected_failures/TestAutograd.test_naughty_autograd_function_stashing_ctx new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_return_leaf_inplace b/test/dynamo_expected_failures/TestAutograd.test_return_leaf_inplace new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_name_collision b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_name_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_no_name_collision b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_no_name_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr_three_input b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr_three_input new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_has_inlined_call_module_node b/test/dynamo_expected_failures/TestConstFold.test_const_fold_has_inlined_call_module_node new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_module_attr b/test/dynamo_expected_failures/TestConstFold.test_const_fold_module_attr new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_submod_hierarchy b/test/dynamo_expected_failures/TestConstFold.test_const_fold_submod_hierarchy new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_unused_placeholder b/test/dynamo_expected_failures/TestConstFold.test_const_fold_unused_placeholder new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_dict_output b/test/dynamo_expected_failures/TestConstFold.test_dict_output new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_fold_module b/test/dynamo_expected_failures/TestConstFold.test_fold_module new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_three_outputs b/test/dynamo_expected_failures/TestConstFold.test_three_outputs new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_two_outputs b/test/dynamo_expected_failures/TestConstFold.test_two_outputs new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_merge_graph_preserves_ph_meta b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_merge_graph_preserves_ph_meta new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_mismatched_branch_output_dynamic_True_backend_eager b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_mismatched_branch_output_dynamic_True_backend_eager new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_False b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_False new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_True b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_True new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_while_loop_autograd_simple b/test/dynamo_expected_failures/TestControlFlowTraced.test_while_loop_autograd_simple new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op b/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op_with_cuda b/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op_with_cuda new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestFX.test_custom_traceback_not_raised_when_exception_source_is_submodule b/test/dynamo_expected_failures/TestFX.test_custom_traceback_not_raised_when_exception_source_is_submodule new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestFXNumericSuiteCoreAPIs.test_user_defined_function b/test/dynamo_expected_failures/TestFXNumericSuiteCoreAPIs.test_user_defined_function new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestFlag.test_writeable_any_base b/test/dynamo_expected_failures/TestFlag.test_writeable_any_base new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestIndexing.test_broken_sequence_not_nd_index b/test/dynamo_expected_failures/TestIndexing.test_broken_sequence_not_nd_index new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestNNParametrization.test_transfer_parametrizations_and_params_single_param_swap_True b/test/dynamo_expected_failures/TestNNParametrization.test_transfer_parametrizations_and_params_single_param_swap_True new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestPrivateUse1.test_backend_type_methods b/test/dynamo_expected_failures/TestPrivateUse1.test_backend_type_methods new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestPythonDispatch.test_maybe_tuple_bug b/test/dynamo_expected_failures/TestPythonDispatch.test_maybe_tuple_bug new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestPythonDispatch.test_set_data b/test/dynamo_expected_failures/TestPythonDispatch.test_set_data new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestPythonDispatch.test_wrapper_subclass_extra_dispatch_keys b/test/dynamo_expected_failures/TestPythonDispatch.test_wrapper_subclass_extra_dispatch_keys new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_stateless b/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_stateless new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_torch_func b/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_torch_func new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_modules_decorator_applies_module_and_param_specific_decorators_cpu b/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_modules_decorator_applies_module_and_param_specific_decorators_cpu new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_composition_names_cpu b/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_composition_names_cpu new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_decorator_applies_op_and_param_specific_decorators_cpu b/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_decorator_applies_op_and_param_specific_decorators_cpu new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_as_subclass b/test/dynamo_expected_failures/TestTorch.test_as_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_storage_cycle_via_slots b/test/dynamo_expected_failures/TestTorch.test_storage_cycle_via_slots new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_storage_finalizer_dealloc b/test/dynamo_expected_failures/TestTorch.test_storage_finalizer_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_storage_slot_dealloc b/test/dynamo_expected_failures/TestTorch.test_storage_slot_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_tensor_cycle_via_slots b/test/dynamo_expected_failures/TestTorch.test_tensor_cycle_via_slots new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_tensor_finalizer_dealloc b/test/dynamo_expected_failures/TestTorch.test_tensor_finalizer_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_tensor_slot_dealloc b/test/dynamo_expected_failures/TestTorch.test_tensor_slot_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_custom_device_type b/test/dynamo_expected_failures/TestTorchFunctionMode.test_custom_device_type new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_enable_subclass b/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_enable_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_subclass_mode b/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_subclass_mode new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_factory_override b/test/dynamo_expected_failures/TestTorchFunctionMode.test_factory_override new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorchFunctionOverride.test_pow_rpow b/test/dynamo_expected_failures/TestTorchFunctionOverride.test_pow_rpow new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_skips/TestAutograd.test_naughty_anomaly_access b/test/dynamo_skips/TestAutograd.test_naughty_anomaly_access new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_skips/TestPythonPytree.test_key_str b/test/dynamo_skips/TestPythonPytree.test_key_str new file mode 100644 index 0000000000000..a8d6b4d65e03c --- /dev/null +++ b/test/dynamo_skips/TestPythonPytree.test_key_str @@ -0,0 +1 @@ +Passes under python 3.10, fails under 3.13 diff --git a/test/dynamo_skips/TestTorchFunctionMode.test_disable_subclass_not_mode b/test/dynamo_skips/TestTorchFunctionMode.test_disable_subclass_not_mode new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_storage_cycle_via_slots b/test/inductor_expected_failures/TestTorch.test_storage_cycle_via_slots new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_storage_finalizer_dealloc b/test/inductor_expected_failures/TestTorch.test_storage_finalizer_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_storage_slot_dealloc b/test/inductor_expected_failures/TestTorch.test_storage_slot_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_tensor_cycle_via_slots b/test/inductor_expected_failures/TestTorch.test_tensor_cycle_via_slots new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_tensor_finalizer_dealloc b/test/inductor_expected_failures/TestTorch.test_tensor_finalizer_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_tensor_slot_dealloc b/test/inductor_expected_failures/TestTorch.test_tensor_slot_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/torch/_dynamo/symbolic_convert.py b/torch/_dynamo/symbolic_convert.py index 4dd1321a5057d..beebea05a0e3e 100644 --- a/torch/_dynamo/symbolic_convert.py +++ b/torch/_dynamo/symbolic_convert.py @@ -3290,15 +3290,7 @@ def LOAD_ASSERTION_ERROR(self, inst: Instruction) -> None: self.push(self.load_builtin_from_argval("AssertionError")) def LOAD_BUILD_CLASS(self, inst: Instruction) -> None: - unimplemented_v2( - gb_type="LOAD_BUILD_CLASS bytecode not supported", - context="", - explanation="Dynamo does not support tracing classes that are defined in the compiled region.", - hints=[ - "Move the class definition out of the compiled region.", - *graph_break_hints.SUPPORTABLE, - ], - ) + self.push(self.load_builtin_from_argval("__build_class__")) UNARY_POSITIVE = stack_op(operator.pos) UNARY_NEGATIVE = stack_op(operator.neg) From 4d66a3b89472cce75808f064e3af3fb4a81b806f Mon Sep 17 00:00:00 2001 From: Howard Huang Date: Tue, 9 Sep 2025 11:13:59 -0700 Subject: [PATCH 006/693] fix Dtensor doc link (#162494) Small fix for https://docs.pytorch.org/docs/main/distributed.tensor.parallel.html image now it is: image Pull Request resolved: https://github.com/pytorch/pytorch/pull/162494 Approved by: https://github.com/XilunWu --- docs/source/distributed.tensor.parallel.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/distributed.tensor.parallel.md b/docs/source/distributed.tensor.parallel.md index 6083699493ff0..fbfb6f1be2b8c 100644 --- a/docs/source/distributed.tensor.parallel.md +++ b/docs/source/distributed.tensor.parallel.md @@ -5,7 +5,7 @@ # Tensor Parallelism - torch.distributed.tensor.parallel Tensor Parallelism(TP) is built on top of the PyTorch DistributedTensor -(DTensor)[https://github.com/pytorch/pytorch/blob/main/torch/distributed/tensor/README.md] +([DTensor](https://github.com/pytorch/pytorch/blob/main/torch/distributed/tensor/README.md)) and provides different parallelism styles: Colwise, Rowwise, and Sequence Parallelism. :::{warning} @@ -89,4 +89,4 @@ Parallelized cross-entropy loss computation (loss parallelism), is supported via ``` :::{warning} The loss_parallel API is experimental and subject to change. -::: \ No newline at end of file +::: From b498299953f18ceab9e4a8b233eb04c24c0f3811 Mon Sep 17 00:00:00 2001 From: SandishKumarHN Date: Tue, 9 Sep 2025 22:23:02 +0000 Subject: [PATCH 007/693] 154849 Add support to handle IGUSR1 and SIGUSR2 in multiprocessing (#160690) Fixes #154849 This change addresses the request to add support for SIGUSR1 and SIGUSR2 signals in torchrun for SLURM environments. Changes supports these signals through the configurable `TORCHELASTIC_SIGNALS_TO_HANDLE` environment variable and signals_to_handle parameter from laucher api Tests: For validations purpose: test_signal_handling.py, simple_test_api_signal_handling.py, Unit Tests: for launcher changes:launcher/test_api.py for api changes: multiprocessing/test_api.py E2E: test_run.py Pull Request resolved: https://github.com/pytorch/pytorch/pull/160690 Approved by: https://github.com/fduwjj --- .../elastic/multiprocessing/test_api.py | 331 ++++++++++++++++++ test/distributed/launcher/test_api.py | 100 ++++++ test/distributed/test_run.py | 90 +++++ .../elastic/multiprocessing/api.py | 34 +- torch/distributed/launcher/api.py | 6 + torch/distributed/run.py | 12 + 6 files changed, 568 insertions(+), 5 deletions(-) create mode 100644 test/distributed/elastic/multiprocessing/test_api.py create mode 100644 test/distributed/launcher/test_api.py create mode 100644 test/distributed/test_run.py diff --git a/test/distributed/elastic/multiprocessing/test_api.py b/test/distributed/elastic/multiprocessing/test_api.py new file mode 100644 index 0000000000000..400ec96832c62 --- /dev/null +++ b/test/distributed/elastic/multiprocessing/test_api.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python3 +# Owner(s): ["oncall: r2p"] + +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import os +import signal +from unittest.mock import MagicMock, patch + +from torch.distributed.elastic.multiprocessing.api import ( + _terminate_process_handler, + PContext, + SignalException, +) +from torch.testing._internal.common_utils import run_tests, TestCase + + +class SignalHandlingTest(TestCase): + def setUp(self): + # Save original environment variable if it exists + self.original_signals_env = os.environ.get( + "TORCHELASTIC_SIGNALS_TO_HANDLE", None + ) + + def tearDown(self): + # Restore original environment variable + if self.original_signals_env is not None: + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] = self.original_signals_env + elif "TORCHELASTIC_SIGNALS_TO_HANDLE" in os.environ: + del os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] + + def test_terminate_process_handler(self): + """Test that the terminate process handler raises SignalException with the correct signal.""" + signum = signal.SIGTERM + with self.assertRaises(SignalException) as cm: + _terminate_process_handler(signum, None) + + self.assertEqual(cm.exception.sigval, signal.SIGTERM) + # The signal is represented as a number in the string representation + self.assertIn(f"Process {os.getpid()} got signal: {signum}", str(cm.exception)) + + @patch("torch.distributed.elastic.multiprocessing.api.threading") + @patch("torch.distributed.elastic.multiprocessing.api.signal") + @patch("torch.distributed.elastic.multiprocessing.api.logger") + def test_start_registers_default_signals( + self, mock_logger, mock_signal, mock_threading + ): + """Test that the start method registers the default signals.""" + # Setup + mock_threading.current_thread.return_value = ( + mock_threading.main_thread.return_value + ) + mock_pcontext = MagicMock(spec=PContext) + # Mock the _stdout_tail and _stderr_tail attributes + mock_pcontext._stdout_tail = MagicMock() + mock_pcontext._stderr_tail = MagicMock() + + # Remove environment variable if it exists to test default behavior + if "TORCHELASTIC_SIGNALS_TO_HANDLE" in os.environ: + del os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] + + # Call the start method + PContext.start(mock_pcontext) + + # Verify that the signal handler was registered for the default signals + expected_signals = ["SIGTERM", "SIGINT", "SIGHUP", "SIGQUIT"] + + # Count the number of calls to signal.signal + signal_calls = 0 + for call in mock_signal.signal.call_args_list: + args, _ = call + sig, handler = args + signal_calls += 1 + # Verify the handler is our _terminate_process_handler + self.assertEqual(handler, _terminate_process_handler) + + # Verify we registered the expected number of signals + self.assertEqual(signal_calls, len(expected_signals)) + + # Verify _start was called + mock_pcontext._start.assert_called_once() + # Verify _stdout_tail.start() and _stderr_tail.start() were called + mock_pcontext._stdout_tail.start.assert_called_once() + mock_pcontext._stderr_tail.start.assert_called_once() + + @patch("torch.distributed.elastic.multiprocessing.api.threading") + @patch("torch.distributed.elastic.multiprocessing.api.signal") + @patch("torch.distributed.elastic.multiprocessing.api.logger") + def test_start_registers_custom_signals( + self, mock_logger, mock_signal, mock_threading + ): + """Test that the start method registers custom signals from the environment variable.""" + # Setup + mock_threading.current_thread.return_value = ( + mock_threading.main_thread.return_value + ) + mock_pcontext = MagicMock(spec=PContext) + # Mock the _stdout_tail and _stderr_tail attributes + mock_pcontext._stdout_tail = MagicMock() + mock_pcontext._stderr_tail = MagicMock() + + # Set custom signals in the environment variable + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] = "SIGTERM,SIGUSR1,SIGUSR2" + + # Call the start method + PContext.start(mock_pcontext) + + # Verify that the signal handler was registered for the custom signals + expected_signals = ["SIGTERM", "SIGUSR1", "SIGUSR2"] + + # Count the number of calls to signal.signal + signal_calls = 0 + for call in mock_signal.signal.call_args_list: + args, _ = call + sig, handler = args + signal_calls += 1 + # Verify the handler is our _terminate_process_handler + self.assertEqual(handler, _terminate_process_handler) + + # Verify we registered the expected number of signals + self.assertEqual(signal_calls, len(expected_signals)) + + # Verify _start was called + mock_pcontext._start.assert_called_once() + + @patch("torch.distributed.elastic.multiprocessing.api.threading") + @patch("torch.distributed.elastic.multiprocessing.api.signal") + @patch("torch.distributed.elastic.multiprocessing.api.logger") + def test_start_handles_invalid_signals( + self, mock_logger, mock_signal, mock_threading + ): + """Test that the start method handles invalid signals gracefully.""" + # Setup + mock_threading.current_thread.return_value = ( + mock_threading.main_thread.return_value + ) + mock_pcontext = MagicMock(spec=PContext) + # Mock the _stdout_tail and _stderr_tail attributes + mock_pcontext._stdout_tail = MagicMock() + mock_pcontext._stderr_tail = MagicMock() + + # Set invalid signals in the environment variable + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] = "SIGTERM,INVALID_SIGNAL" + + # Mock the signal module to not have the INVALID_SIGNAL attribute + # but have SIGTERM + mock_signal.SIGTERM = signal.SIGTERM + # Remove INVALID_SIGNAL attribute if it exists + if hasattr(mock_signal, "INVALID_SIGNAL"): + delattr(mock_signal, "INVALID_SIGNAL") + + # Call the start method + PContext.start(mock_pcontext) + + # Verify that the warning was logged for the invalid signal + # The exact message may vary, so let's check if warning was called with INVALID_SIGNAL + warning_calls = [ + call + for call in mock_logger.warning.call_args_list + if "INVALID_SIGNAL" in str(call) + ] + self.assertTrue(len(warning_calls) > 0, "Expected warning about INVALID_SIGNAL") + + # Verify _start was called + mock_pcontext._start.assert_called_once() + + @patch("torch.distributed.elastic.multiprocessing.api.threading") + @patch("torch.distributed.elastic.multiprocessing.api.signal") + @patch("torch.distributed.elastic.multiprocessing.api.logger") + def test_start_handles_windows_signals( + self, mock_logger, mock_signal, mock_threading + ): + """Test that the start method handles Windows-specific signal behavior.""" + # Setup + mock_threading.current_thread.return_value = ( + mock_threading.main_thread.return_value + ) + mock_pcontext = MagicMock(spec=PContext) + # Mock the _stdout_tail and _stderr_tail attributes + mock_pcontext._stdout_tail = MagicMock() + mock_pcontext._stderr_tail = MagicMock() + + # Set signals including ones not supported on Windows + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] = "SIGTERM,SIGHUP,SIGUSR1" + + # Mock signal attributes + mock_signal.SIGTERM = signal.SIGTERM + mock_signal.SIGHUP = signal.SIGHUP + mock_signal.SIGUSR1 = signal.SIGUSR1 + + # Mock IS_WINDOWS to be True + with patch("torch.distributed.elastic.multiprocessing.api.IS_WINDOWS", True): + # Mock signal.signal to raise RuntimeError for Windows-unsupported signals + def signal_side_effect(sig, handler): + if sig in [signal.SIGHUP, signal.SIGUSR1]: + raise RuntimeError("Signal not supported on Windows") + + mock_signal.signal.side_effect = signal_side_effect + + # Call the start method + PContext.start(mock_pcontext) + + # Verify that the info was logged for the unsupported signals + # Check if any info calls contain the expected messages + info_calls = [str(call) for call in mock_logger.info.call_args_list] + sighup_logged = any( + "SIGHUP" in call and "Windows" in call for call in info_calls + ) + sigusr1_logged = any( + "SIGUSR1" in call and "Windows" in call for call in info_calls + ) + + self.assertTrue( + sighup_logged, + f"Expected SIGHUP Windows message in info calls: {info_calls}", + ) + self.assertTrue( + sigusr1_logged, + f"Expected SIGUSR1 Windows message in info calls: {info_calls}", + ) + + # Verify _start was called + mock_pcontext._start.assert_called_once() + + @patch("torch.distributed.elastic.multiprocessing.api.threading") + @patch("torch.distributed.elastic.multiprocessing.api.logger") + def test_start_not_main_thread(self, mock_logger, mock_threading): + """Test that the start method warns when not called from the main thread.""" + # Setup + mock_threading.current_thread.return_value = MagicMock() # Not the main thread + mock_threading.main_thread.return_value = MagicMock() + mock_pcontext = MagicMock(spec=PContext) + # Mock the _stdout_tail and _stderr_tail attributes + mock_pcontext._stdout_tail = MagicMock() + mock_pcontext._stderr_tail = MagicMock() + + # Call the start method + PContext.start(mock_pcontext) + + # Verify that the warning was logged + mock_logger.warning.assert_called_with( + "Failed to register signal handlers since torchelastic is running on a child thread. " + "This could lead to orphaned worker processes if the torchrun is terminated." + ) + + # Verify _start was called + mock_pcontext._start.assert_called_once() + + @patch("torch.distributed.elastic.multiprocessing.api.threading") + @patch("torch.distributed.elastic.multiprocessing.api.signal") + @patch("torch.distributed.elastic.multiprocessing.api.logger") + def test_start_supports_sigusr1_and_sigusr2( + self, mock_logger, mock_signal, mock_threading + ): + """Test that the start method properly supports SIGUSR1 and SIGUSR2 signals.""" + # Setup + mock_threading.current_thread.return_value = ( + mock_threading.main_thread.return_value + ) + mock_pcontext = MagicMock(spec=PContext) + # Mock the _stdout_tail and _stderr_tail attributes + mock_pcontext._stdout_tail = MagicMock() + mock_pcontext._stderr_tail = MagicMock() + + # Set environment variable to include SIGUSR1 and SIGUSR2 + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] = "SIGUSR1,SIGUSR2" + + # Mock signal attributes to have SIGUSR1 and SIGUSR2 + mock_signal.SIGUSR1 = signal.SIGUSR1 + mock_signal.SIGUSR2 = signal.SIGUSR2 + + # Call the start method + PContext.start(mock_pcontext) + + # Verify that signal.signal was called for both SIGUSR1 and SIGUSR2 + signal_calls = mock_signal.signal.call_args_list + registered_signals = [ + call[0][0] for call in signal_calls + ] # Extract the signal from each call + + # Verify both SIGUSR1 and SIGUSR2 were registered + self.assertIn( + signal.SIGUSR1, registered_signals, "SIGUSR1 should be registered" + ) + self.assertIn( + signal.SIGUSR2, registered_signals, "SIGUSR2 should be registered" + ) + + # Verify the correct handler was registered for both signals + for call in signal_calls: + sig, handler = call[0] + if sig in [signal.SIGUSR1, signal.SIGUSR2]: + self.assertEqual( + handler, + _terminate_process_handler, + f"Signal {sig} should use _terminate_process_handler", + ) + + # Verify that info messages were logged for successful registration + info_calls = [str(call) for call in mock_logger.info.call_args_list] + sigusr1_logged = any( + "SIGUSR1" in call and "Registered signal handler" in call + for call in info_calls + ) + sigusr2_logged = any( + "SIGUSR2" in call and "Registered signal handler" in call + for call in info_calls + ) + + self.assertTrue( + sigusr1_logged, + f"Expected SIGUSR1 registration message in info calls: {info_calls}", + ) + self.assertTrue( + sigusr2_logged, + f"Expected SIGUSR2 registration message in info calls: {info_calls}", + ) + + # Verify _start was called + mock_pcontext._start.assert_called_once() + # Verify _stdout_tail.start() and _stderr_tail.start() were called + mock_pcontext._stdout_tail.start.assert_called_once() + mock_pcontext._stderr_tail.start.assert_called_once() + + +if __name__ == "__main__": + run_tests() diff --git a/test/distributed/launcher/test_api.py b/test/distributed/launcher/test_api.py new file mode 100644 index 0000000000000..e6e778fe2ff32 --- /dev/null +++ b/test/distributed/launcher/test_api.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +# Owner(s): ["oncall: r2p"] + +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import os +from unittest.mock import MagicMock, patch + +from torch.distributed.launcher.api import launch_agent, LaunchConfig +from torch.testing._internal.common_utils import run_tests, TestCase + + +class LauncherApiTest(TestCase): + def setUp(self): + # Save original environment variable if it exists + self.original_signals_env = os.environ.get( + "TORCHELASTIC_SIGNALS_TO_HANDLE", None + ) + + def tearDown(self): + # Restore original environment variable + if self.original_signals_env is not None: + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] = self.original_signals_env + elif "TORCHELASTIC_SIGNALS_TO_HANDLE" in os.environ: + del os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] + + @patch("torch.distributed.launcher.api.LocalElasticAgent") + @patch("torch.distributed.launcher.api.rdzv_registry.get_rendezvous_handler") + def test_launch_agent_sets_signals_env_var(self, mock_get_handler, mock_agent): + """Test that launch_agent sets the TORCHELASTIC_SIGNALS_TO_HANDLE environment variable.""" + # Setup + config = LaunchConfig( + min_nodes=1, + max_nodes=1, + nproc_per_node=1, + signals_to_handle="SIGTERM,SIGUSR1,SIGUSR2", + ) + entrypoint = "dummy_script.py" + args = [] + + # Make sure the environment variable doesn't exist before the test + if "TORCHELASTIC_SIGNALS_TO_HANDLE" in os.environ: + del os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] + + # Mock agent.run() to return a MagicMock + mock_agent_instance = MagicMock() + mock_agent_instance.run.return_value = MagicMock( + is_failed=lambda: False, return_values={} + ) + mock_agent.return_value = mock_agent_instance + + # Call launch_agent + launch_agent(config, entrypoint, args) + + # Verify that the environment variable was set correctly + self.assertEqual( + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"], "SIGTERM,SIGUSR1,SIGUSR2" + ) + + @patch("torch.distributed.launcher.api.LocalElasticAgent") + @patch("torch.distributed.launcher.api.rdzv_registry.get_rendezvous_handler") + def test_launch_agent_default_signals(self, mock_get_handler, mock_agent): + """Test that launch_agent uses the default signals if not specified.""" + # Setup + config = LaunchConfig( + min_nodes=1, + max_nodes=1, + nproc_per_node=1, + # Not specifying signals_to_handle, should use default + ) + entrypoint = "dummy_script.py" + args = [] + + # Make sure the environment variable doesn't exist before the test + if "TORCHELASTIC_SIGNALS_TO_HANDLE" in os.environ: + del os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] + + # Mock agent.run() to return a MagicMock + mock_agent_instance = MagicMock() + mock_agent_instance.run.return_value = MagicMock( + is_failed=lambda: False, return_values={} + ) + mock_agent.return_value = mock_agent_instance + + # Call launch_agent + launch_agent(config, entrypoint, args) + + # Verify that the environment variable was set to the default value + self.assertEqual( + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"], + "SIGTERM,SIGINT,SIGHUP,SIGQUIT", + ) + + +if __name__ == "__main__": + run_tests() diff --git a/test/distributed/test_run.py b/test/distributed/test_run.py new file mode 100644 index 0000000000000..659241dbcbe99 --- /dev/null +++ b/test/distributed/test_run.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# Owner(s): ["oncall: r2p"] + +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import os +from unittest.mock import MagicMock, patch + +import torch.distributed.run as run +from torch.distributed.launcher.api import launch_agent, LaunchConfig +from torch.testing._internal.common_utils import run_tests, TestCase + + +class RunTest(TestCase): + def setUp(self): + # Save original environment variable if it exists + self.original_signals_env = os.environ.get( + "TORCHELASTIC_SIGNALS_TO_HANDLE", None + ) + + def tearDown(self): + # Restore original environment variable + if self.original_signals_env is not None: + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] = self.original_signals_env + elif "TORCHELASTIC_SIGNALS_TO_HANDLE" in os.environ: + del os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] + + def test_signals_to_handle_default(self): + """Test that the default value for signals_to_handle is correctly set.""" + parser = run.get_args_parser() + args = parser.parse_args(["dummy_script.py"]) + self.assertEqual(args.signals_to_handle, "SIGTERM,SIGINT,SIGHUP,SIGQUIT") + + def test_signals_to_handle_custom(self): + """Test that a custom value for signals_to_handle is correctly parsed.""" + parser = run.get_args_parser() + args = parser.parse_args( + ["--signals-to-handle=SIGTERM,SIGUSR1,SIGUSR2", "dummy_script.py"] + ) + self.assertEqual(args.signals_to_handle, "SIGTERM,SIGUSR1,SIGUSR2") + + def test_config_from_args_signals_to_handle(self): + """Test that the signals_to_handle argument is correctly passed to LaunchConfig.""" + parser = run.get_args_parser() + args = parser.parse_args( + ["--signals-to-handle=SIGTERM,SIGUSR1,SIGUSR2", "dummy_script.py"] + ) + config, _, _ = run.config_from_args(args) + self.assertEqual(config.signals_to_handle, "SIGTERM,SIGUSR1,SIGUSR2") + + @patch("torch.distributed.launcher.api.LocalElasticAgent") + @patch("torch.distributed.launcher.api.rdzv_registry.get_rendezvous_handler") + def test_launch_agent_sets_environment_variable(self, mock_get_handler, mock_agent): + """Test that launch_agent sets the TORCHELASTIC_SIGNALS_TO_HANDLE environment variable.""" + # Setup + config = LaunchConfig( + min_nodes=1, + max_nodes=1, + nproc_per_node=1, + signals_to_handle="SIGTERM,SIGUSR1,SIGUSR2", + ) + entrypoint = "dummy_script.py" + args = [] + + # Make sure the environment variable doesn't exist before the test + if "TORCHELASTIC_SIGNALS_TO_HANDLE" in os.environ: + del os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] + + # Mock agent.run() to return a MagicMock + mock_agent_instance = MagicMock() + mock_agent_instance.run.return_value = MagicMock( + is_failed=lambda: False, return_values={} + ) + mock_agent.return_value = mock_agent_instance + + # Call launch_agent + launch_agent(config, entrypoint, args) + + # Verify that the environment variable was set correctly + self.assertEqual( + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"], "SIGTERM,SIGUSR1,SIGUSR2" + ) + + +if __name__ == "__main__": + run_tests() diff --git a/torch/distributed/elastic/multiprocessing/api.py b/torch/distributed/elastic/multiprocessing/api.py index ed3ea86b0f2aa..a088b9e82412c 100644 --- a/torch/distributed/elastic/multiprocessing/api.py +++ b/torch/distributed/elastic/multiprocessing/api.py @@ -477,11 +477,35 @@ def __init__( def start(self) -> None: """Start processes using parameters defined in the constructor.""" if threading.current_thread() is threading.main_thread(): - signal.signal(signal.SIGTERM, _terminate_process_handler) - signal.signal(signal.SIGINT, _terminate_process_handler) - if not IS_WINDOWS: - signal.signal(signal.SIGHUP, _terminate_process_handler) - signal.signal(signal.SIGQUIT, _terminate_process_handler) + # Register signal handlers for the signals specified in the environment variable + signals_to_handle = os.environ.get( + "TORCHELASTIC_SIGNALS_TO_HANDLE", "SIGTERM,SIGINT,SIGHUP,SIGQUIT" + ) + signal_list = signals_to_handle.split(",") + + for sig_name in signal_list: + try: + sig = getattr(signal, sig_name.strip()) + signal.signal(sig, _terminate_process_handler) + logger.info("Registered signal handler for %s", sig_name) + except (AttributeError, ValueError) as e: + logger.warning( + "Failed to register signal handler for %s: %s", sig_name, e + ) + except RuntimeError as e: + if IS_WINDOWS and sig_name.strip() in [ + "SIGHUP", + "SIGQUIT", + "SIGUSR1", + "SIGUSR2", + ]: + logger.info( + "Signal %s is not supported on Windows, skipping", sig_name + ) + else: + logger.warning( + "Failed to register signal handler for %s: %s", sig_name, e + ) else: logger.warning( "Failed to register signal handlers since torchelastic is running on a child thread. " diff --git a/torch/distributed/launcher/api.py b/torch/distributed/launcher/api.py index acf23b27ca2a6..cde1dc1750d2a 100644 --- a/torch/distributed/launcher/api.py +++ b/torch/distributed/launcher/api.py @@ -6,6 +6,7 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import os import sys import uuid from dataclasses import dataclass, field @@ -95,6 +96,7 @@ class LaunchConfig: local_addr: Optional[str] = None event_log_handler: str = "null" numa_options: Optional[NumaOptions] = None + signals_to_handle: str = "SIGTERM,SIGINT,SIGHUP,SIGQUIT" def __post_init__(self): default_timeout = 900 @@ -240,6 +242,7 @@ def launch_agent( "metrics_cfg": config.metrics_cfg, "event_log_handler": config.event_log_handler, "numa_options": config.numa_options, + "signals_to_handle": config.signals_to_handle, }, ) @@ -255,6 +258,9 @@ def launch_agent( master_addr, master_port = _get_addr_and_port(rdzv_parameters) + # Set the signals to handle in the environment variable + os.environ["TORCHELASTIC_SIGNALS_TO_HANDLE"] = config.signals_to_handle + spec = WorkerSpec( role=config.role, local_world_size=config.nproc_per_node, diff --git a/torch/distributed/run.py b/torch/distributed/run.py index 2738191f0e379..d7bedb4335c24 100644 --- a/torch/distributed/run.py +++ b/torch/distributed/run.py @@ -645,6 +645,17 @@ def get_args_parser() -> ArgumentParser: featuring a single L3 cache per socket.""", ) + parser.add_argument( + "--signals-to-handle", + "--signals_to_handle", + action=env, + type=str, + default="SIGTERM,SIGINT,SIGHUP,SIGQUIT", + help="Comma-separated list of signals to handle and forward to subprocesses. " + "Default: SIGTERM,SIGINT,SIGHUP,SIGQUIT. " + "Common additional signals: SIGUSR1,SIGUSR2 (used in SLURM environments).", + ) + # # Positional arguments. # @@ -861,6 +872,7 @@ def config_from_args(args) -> tuple[LaunchConfig, Union[Callable, str], list[str logs_specs=logs_specs, event_log_handler=args.event_log_handler, numa_options=numa_options, + signals_to_handle=args.signals_to_handle, ) with_python = not args.no_python From d2393c2d7da03a1523a12e6f80edb6bd7b464ec5 Mon Sep 17 00:00:00 2001 From: Andy Lugo Date: Tue, 9 Sep 2025 22:30:12 +0000 Subject: [PATCH 008/693] [ROCm] Integrate AITER Fav3 fwd kernels (#160105) Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/160105 Approved by: https://github.com/jeffdaily --- .../hip/flash_attn/ck/fav_v3/CMakeLists.txt | 31 +++++++++++++++++-- .../hip/flash_attn/ck/mha_fwd_ck.hip | 12 +++++-- .../hip/flash_attn/ck/mha_varlen_fwd_ck.hip | 2 +- third_party/aiter | 2 +- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/ck/fav_v3/CMakeLists.txt b/aten/src/ATen/native/transformers/hip/flash_attn/ck/fav_v3/CMakeLists.txt index cccf026690dc0..19d2930f31777 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/fav_v3/CMakeLists.txt +++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/fav_v3/CMakeLists.txt @@ -1,13 +1,22 @@ include(CMakePrintHelpers) # Generate AITER/CK Asm code +execute_process( + COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/py_itfs_cu/fmha_v3_fwd_kernel_generate.py --output_dir ${CMAKE_CURRENT_LIST_DIR} + RESULT_VARIABLE ret +) + +if(ret AND NOT ret EQUAL 0) + message( FATAL_ERROR "Failed to generate FAv3 fwd CK Kernels") +endif() + execute_process( COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/py_itfs_cu/fmha_v3_bwd_kernel_generate.py --receipt 1 --output_dir ${CMAKE_CURRENT_LIST_DIR} RESULT_VARIABLE ret ) if(ret AND NOT ret EQUAL 0) - message( FATAL_ERROR "Failed to generate FAv3 CK Kernels") + message( FATAL_ERROR "Failed to generate FAv3 bwd CK Kernels") endif() execute_process( @@ -15,6 +24,24 @@ execute_process( RESULT_VARIABLE ret ) +if(ret AND NOT ret EQUAL 0) + message( FATAL_ERROR "Failed to generate FAv3 bwd api") +endif() + +execute_process( + COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/cpp_itfs/mha_fwd_generate.py --receipt 6 --output_dir ${CMAKE_CURRENT_LIST_DIR} + RESULT_VARIABLE ret +) + +if(ret AND NOT ret EQUAL 0) + message( FATAL_ERROR "Failed to generate FAv3 fwd api") +endif() # Change file extensions to .hip -execute_process(COMMAND bash -c "for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done") +execute_process(COMMAND bash -c "for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done" + RESULT_VARIABLE ret +) + +if(ret AND NOT ret EQUAL 0) + message( FATAL_ERROR "Failed to modify aiter file extensions") +endif() diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_fwd_ck.hip b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_fwd_ck.hip index 05f97414acdd8..492e0e4f3498b 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_fwd_ck.hip +++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_fwd_ck.hip @@ -3,6 +3,7 @@ ******************************************************************************/ #include +#include #include #include @@ -141,7 +142,7 @@ fmha_fwd_args get_ck_fmha_fwd_args(bool has_lse, mask.left, mask.right, static_cast(mask.type), - -1, // min_seqlen_q + 0, // min_seqlen_q p_dropout, has_dropout_randval, drop_seed_offset}; @@ -350,7 +351,14 @@ mha_fwd_ck(const at::Tensor &q, // batch_size x seqlen_q x softmax_scale, p_dropout, drop_seed_offset); - float t = fmha_fwd(traits, args, stream_config); + float t = aiter::mha_fwd(args, // mha_fwd_args args + stream_config, // stream_config + q_dtype_str, // q_dtype_str + false, // is_group_mode + mask.type, // mask_type + attn_bias_.has_value() ? bias_enum::elementwise_bias : bias_enum::no_bias, + has_lse, // has_lse + true); // use_ext_asm TORCH_CHECK(t >= 0, "invalid argument for fmha_fwd"); } else { diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_fwd_ck.hip b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_fwd_ck.hip index ee6261df8a91a..d4ffc2ec424c5 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_fwd_ck.hip +++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_fwd_ck.hip @@ -349,7 +349,7 @@ mha_varlen_fwd_ck(const at::Tensor &q, // total_q x num_heads p_dropout, drop_seed_offset); float t = fmha_fwd(traits, args, stream_config); - TORCH_CHECK(t >= 0, "invalid argument for fmha_fwd"); + TORCH_CHECK(t >= 0, "invalid argument for fmha_varlen_fwd"); } else { // If seqlen_k == 0, then we have an empty tensor. We need to set the output to 0. diff --git a/third_party/aiter b/third_party/aiter index 01aae101b9e5e..28918c0e68d28 160000 --- a/third_party/aiter +++ b/third_party/aiter @@ -1 +1 @@ -Subproject commit 01aae101b9e5e94d6c16a9514c9fb8df99c93150 +Subproject commit 28918c0e68d28e2c217e0f05344d178877ba611e From a3e26d1727461a5b8f44065ce5b0dbdfd5541e64 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Tue, 9 Sep 2025 23:40:26 +0000 Subject: [PATCH 009/693] Revert "[dynamo] Graph break on on user-defined class in compiled region (#161670)" This reverts commit e2545487de3dbbe663e3f0adb699547a14da0f6a. Reverted https://github.com/pytorch/pytorch/pull/161670 on behalf of https://github.com/huydhn due to Sorry for reverting your change but it is failing a trunk test ([comment](https://github.com/pytorch/pytorch/pull/161670#issuecomment-3272626391)) --- test/dynamo/test_error_messages.py | 12 ++++++------ test/dynamo/test_misc.py | 16 ---------------- .../CPython313-test_bool-BoolTest.test_blocked | 0 ...on313-test_bool-BoolTest.test_convert_to_bool | 0 ...t_collections-TestCollectionABCs.test_Mapping | 0 ...tions-TestCollectionABCs.test_Sequence_mixins | 0 ...-test_collections-TestCollectionABCs.test_Set | 0 ...ons-TestCollectionABCs.test_Set_from_iterable | 0 ...ABCs.test_Set_interoperability_with_real_sets | 0 ...ollections-TestCollectionABCs.test_issue16373 | 0 ...st_collections-TestCounter.test_copy_subclass | 0 ...medTuple.test_namedtuple_subclass_issue_24931 | 0 ...llections-TestOneTrickPonyABCs.test_Generator | 0 ...-test_contextlib-ClosingTestCase.test_closing | 0 ...contextlib-ClosingTestCase.test_closing_error | 0 ...rTestCase.test_contextmanager_except_stopiter | 0 ...textlib-ContextManagerTestCase.test_nokeepref | 0 ...tAbstractContextManager.test_exit_is_abstract | 0 ...textlib-TestAbstractContextManager.test_slots | 0 ...b-TestContextDecorator.test_decorating_method | 0 ...ntextlib-TestContextDecorator.test_typo_enter | 0 ...ontextlib-TestContextDecorator.test_typo_exit | 0 ...-TestExitStack.test_dont_reraise_RuntimeError | 0 ...t_contextlib-TestExitStack.test_enter_context | 0 ...xtlib-TestExitStack.test_enter_context_errors | 0 ...tStack.test_exit_exception_chaining_reference | 0 ...ack.test_exit_exception_explicit_none_context | 0 ...contextlib-TestExitStack.test_instance_bypass | 0 ...on313-test_contextlib-TestExitStack.test_push | 0 ...faultdict-TestDefaultDict.test_recursive_repr | 0 .../CPython313-test_dict-DictTest.test_bad_key | 0 ...st_dict-DictTest.test_copy_maintains_tracking | 0 ...ict-DictTest.test_dict_contain_use_after_free | 0 ...on313-test_dict-DictTest.test_dict_copy_order | 0 .../CPython313-test_dict-DictTest.test_eq | 0 ...ictTest.test_equal_operator_modifying_operand | 0 ...ictTest.test_errors_in_view_containment_check | 0 ...test_fromkeys_operator_modifying_dict_operand | 0 ....test_fromkeys_operator_modifying_set_operand | 0 .../CPython313-test_dict-DictTest.test_getitem | 0 ...3-test_dict-DictTest.test_init_use_after_free | 0 ...tTest.test_instance_dict_getattr_str_subclass | 0 ..._dict-DictTest.test_invalid_keyword_arguments | 0 ...n313-test_dict-DictTest.test_merge_and_mutate | 0 .../CPython313-test_dict-DictTest.test_missing | 0 ...on313-test_dict-DictTest.test_mutating_lookup | 0 ...t_object_set_item_single_instance_non_str_key | 0 ...tTest.test_oob_indexing_dictiter_iternextitem | 0 .../CPython313-test_dict-DictTest.test_pop | 0 .../CPython313-test_dict-DictTest.test_resize2 | 0 ...test_reverse_iterator_for_shared_shared_dicts | 0 ...CPython313-test_dict-DictTest.test_setdefault | 0 ...313-test_dict-DictTest.test_setdefault_atomic | 0 ...t_dict-DictTest.test_setitem_atomic_at_resize | 0 ...Test.test_splittable_to_generic_combinedtable | 0 ...313-test_dict-DictTest.test_splittable_update | 0 ...CPython313-test_dict-DictTest.test_str_nonstr | 0 ...thon313-test_dict-DictTest.test_views_mapping | 0 ..._float-GeneralFloatCases.test_floatconversion | 0 ...t-GeneralFloatCases.test_keywords_in_subclass | 0 ...eneralFloatCases.test_non_numeric_input_types | 0 ...313-test_float-HexFloatTestCase.test_subclass | 0 ...test_int-IntTestCases.test_int_base_indexable | 0 ...nt-IntTestCases.test_int_returns_int_subclass | 0 ...int-IntTestCases.test_int_subclass_with_index | 0 ...t_int-IntTestCases.test_int_subclass_with_int | 0 ...n313-test_int-IntTestCases.test_intconversion | 0 ...int-IntTestCases.test_non_numeric_input_types | 0 .../CPython313-test_iter-TestCase.test_3720 | 0 ...test_iter-TestCase.test_ref_counting_behavior | 0 ...thon313-test_iter-TestCase.test_stop_sequence | 0 ...-test_iter-TestCase.test_unicode_join_endcase | 0 ...ython313-test_list-ListTest.test_constructors | 0 ...hon313-test_list-ListTest.test_contains_order | 0 ...istTest.test_equal_operator_modifying_operand | 0 .../CPython313-test_list-ListTest.test_extend | 0 ...-test_list-ListTest.test_keywords_in_subclass | 0 ...313-test_list-ListTest.test_no_comdat_folding | 0 ...Python313-test_list-ListTest.test_repr_mutate | 0 .../CPython313-test_math-MathTests.testCeil | 0 .../CPython313-test_math-MathTests.testFloor | 0 ...Python313-test_math-MathTests.test_issue39871 | 0 ...on313-test_math-MathTests.test_sumprod_stress | 0 .../CPython313-test_math-MathTests.test_trunc | 0 ...ator-CCOperatorPickleTestCase.test_attrgetter | 0 ...or-CCOperatorPickleTestCase.test_methodcaller | 0 ...st_operator-COperatorTestCase.test_attrgetter | 0 ...on313-test_operator-COperatorTestCase.test_eq | 0 ...13-test_operator-COperatorTestCase.test_index | 0 ...-test_operator-COperatorTestCase.test_inplace | 0 ...t_operator-COperatorTestCase.test_length_hint | 0 ...on313-test_operator-COperatorTestCase.test_ne | 0 ...313-test_operator-COperatorTestCase.test_not_ | 0 ...13-test_operator-COperatorTestCase.test_truth | 0 ...tor-CPyOperatorPickleTestCase.test_attrgetter | 0 ...r-CPyOperatorPickleTestCase.test_methodcaller | 0 ...tor-PyCOperatorPickleTestCase.test_attrgetter | 0 ...r-PyCOperatorPickleTestCase.test_methodcaller | 0 ...t_operator-PyOperatorTestCase.test_attrgetter | 0 ...3-test_operator-PyOperatorTestCase.test_index | 0 ...test_operator-PyOperatorTestCase.test_inplace | 0 ..._operator-PyOperatorTestCase.test_length_hint | 0 ...-test_operator-PyOperatorTestCase.test_matmul | 0 ...operator-PyOperatorTestCase.test_methodcaller | 0 ...13-test_operator-PyOperatorTestCase.test_not_ | 0 ...or-PyPyOperatorPickleTestCase.test_attrgetter | 0 ...-PyPyOperatorPickleTestCase.test_methodcaller | 0 ...nBuiltinDictTests.test_delitem_hash_collision | 0 ...nBuiltinDictTests.test_highly_nested_subclass | 0 ...DictSubclassTests.test_delitem_hash_collision | 0 ...DictSubclassTests.test_highly_nested_subclass | 0 ...ythonOrderedDictSubclassTests.test_init_calls | 0 ....test_issue119004_change_linked_list_by_clear | 0 ..._issue119004_change_linked_list_by_delete_key | 0 ...ssTests.test_issue119004_change_size_by_clear | 0 ...ts.test_issue119004_change_size_by_delete_key | 0 ...ue119004_change_size_by_delete_key_in_dict_eq | 0 ...ythonOrderedDictSubclassTests.test_issue24347 | 0 ...ythonOrderedDictSubclassTests.test_issue24348 | 0 ...nOrderedDictTests.test_delitem_hash_collision | 0 ...nOrderedDictTests.test_highly_nested_subclass | 0 ..._dict-CPythonOrderedDictTests.test_init_calls | 0 ....test_issue119004_change_linked_list_by_clear | 0 ..._issue119004_change_linked_list_by_delete_key | 0 ...ctTests.test_issue119004_change_size_by_clear | 0 ...ts.test_issue119004_change_size_by_delete_key | 0 ...ue119004_change_size_by_delete_key_in_dict_eq | 0 ..._dict-CPythonOrderedDictTests.test_issue24347 | 0 ..._dict-CPythonOrderedDictTests.test_issue24348 | 0 ...DictSubclassTests.test_delitem_hash_collision | 0 ...DictSubclassTests.test_highly_nested_subclass | 0 ...ythonOrderedDictSubclassTests.test_init_calls | 0 ...ubclassTests.test_issue119004_attribute_error | 0 ...ythonOrderedDictSubclassTests.test_issue24347 | 0 ...ythonOrderedDictSubclassTests.test_issue24348 | 0 ...OrderedDictSubclassTests.test_overridden_init | 0 ...OrderedDictSubclassTests.test_override_update | 0 ...nOrderedDictTests.test_delitem_hash_collision | 0 ...nOrderedDictTests.test_highly_nested_subclass | 0 ...ct-PurePythonOrderedDictTests.test_init_calls | 0 ...redDictTests.test_issue119004_attribute_error | 0 ...ct-PurePythonOrderedDictTests.test_issue24347 | 0 ...ct-PurePythonOrderedDictTests.test_issue24348 | 0 ...rePythonOrderedDictTests.test_overridden_init | 0 ...rePythonOrderedDictTests.test_override_update | 0 ...aryOpsMutating_Set_Set.test_and_with_mutation | 0 ...naryOpsMutating_Set_Set.test_eq_with_mutation | 0 ...naryOpsMutating_Set_Set.test_ge_with_mutation | 0 ...naryOpsMutating_Set_Set.test_gt_with_mutation | 0 ...ryOpsMutating_Set_Set.test_iadd_with_mutation | 0 ...aryOpsMutating_Set_Set.test_ior_with_mutation | 0 ...ryOpsMutating_Set_Set.test_isub_with_mutation | 0 ...Mutating_Set_Set.test_iteration_with_mutation | 0 ...ryOpsMutating_Set_Set.test_ixor_with_mutation | 0 ...naryOpsMutating_Set_Set.test_le_with_mutation | 0 ...naryOpsMutating_Set_Set.test_lt_with_mutation | 0 ...naryOpsMutating_Set_Set.test_ne_with_mutation | 0 ...naryOpsMutating_Set_Set.test_or_with_mutation | 0 ...aryOpsMutating_Set_Set.test_sub_with_mutation | 0 ...aryOpsMutating_Set_Set.test_xor_with_mutation | 0 ...sMutating_Set_Subclass.test_and_with_mutation | 0 ...psMutating_Set_Subclass.test_eq_with_mutation | 0 ...psMutating_Set_Subclass.test_ge_with_mutation | 0 ...psMutating_Set_Subclass.test_gt_with_mutation | 0 ...Mutating_Set_Subclass.test_iadd_with_mutation | 0 ...sMutating_Set_Subclass.test_ior_with_mutation | 0 ...Mutating_Set_Subclass.test_isub_with_mutation | 0 ...ing_Set_Subclass.test_iteration_with_mutation | 0 ...Mutating_Set_Subclass.test_ixor_with_mutation | 0 ...psMutating_Set_Subclass.test_le_with_mutation | 0 ...psMutating_Set_Subclass.test_lt_with_mutation | 0 ...psMutating_Set_Subclass.test_ne_with_mutation | 0 ...psMutating_Set_Subclass.test_or_with_mutation | 0 ...sMutating_Set_Subclass.test_sub_with_mutation | 0 ...sMutating_Set_Subclass.test_xor_with_mutation | 0 ...sMutating_Subclass_Set.test_and_with_mutation | 0 ...psMutating_Subclass_Set.test_eq_with_mutation | 0 ...psMutating_Subclass_Set.test_ge_with_mutation | 0 ...psMutating_Subclass_Set.test_gt_with_mutation | 0 ...Mutating_Subclass_Set.test_iadd_with_mutation | 0 ...sMutating_Subclass_Set.test_ior_with_mutation | 0 ...Mutating_Subclass_Set.test_isub_with_mutation | 0 ...ing_Subclass_Set.test_iteration_with_mutation | 0 ...Mutating_Subclass_Set.test_ixor_with_mutation | 0 ...psMutating_Subclass_Set.test_le_with_mutation | 0 ...psMutating_Subclass_Set.test_lt_with_mutation | 0 ...psMutating_Subclass_Set.test_ne_with_mutation | 0 ...psMutating_Subclass_Set.test_or_with_mutation | 0 ...sMutating_Subclass_Set.test_sub_with_mutation | 0 ...sMutating_Subclass_Set.test_xor_with_mutation | 0 ...ting_Subclass_Subclass.test_and_with_mutation | 0 ...ating_Subclass_Subclass.test_eq_with_mutation | 0 ...ating_Subclass_Subclass.test_ge_with_mutation | 0 ...ating_Subclass_Subclass.test_gt_with_mutation | 0 ...ing_Subclass_Subclass.test_iadd_with_mutation | 0 ...ting_Subclass_Subclass.test_ior_with_mutation | 0 ...ing_Subclass_Subclass.test_isub_with_mutation | 0 ...ubclass_Subclass.test_iteration_with_mutation | 0 ...ing_Subclass_Subclass.test_ixor_with_mutation | 0 ...ating_Subclass_Subclass.test_le_with_mutation | 0 ...ating_Subclass_Subclass.test_lt_with_mutation | 0 ...ating_Subclass_Subclass.test_ne_with_mutation | 0 ...ating_Subclass_Subclass.test_or_with_mutation | 0 ...ting_Subclass_Subclass.test_sub_with_mutation | 0 ...ting_Subclass_Subclass.test_xor_with_mutation | 0 ...est_set-TestFrozenSet.test_container_iterator | 0 ...ython313-test_set-TestFrozenSet.test_deepcopy | 0 .../CPython313-test_set-TestFrozenSet.test_gc | 0 ...-TestFrozenSet.test_subclass_with_custom_hash | 0 ...TestFrozenSetSubclass.test_container_iterator | 0 ...-test_set-TestFrozenSetSubclass.test_deepcopy | 0 ...hon313-test_set-TestFrozenSetSubclass.test_gc | 0 ...stFrozenSetSubclass.test_keywords_in_subclass | 0 ...zenSetSubclass.test_subclass_with_custom_hash | 0 ...Set_Dict.test_difference_update_with_mutation | 0 ...tating_Set_Dict.test_difference_with_mutation | 0 ...t_Dict.test_intersection_update_with_mutation | 0 ...ting_Set_Dict.test_intersection_with_mutation | 0 ...tating_Set_Dict.test_isdisjoint_with_mutation | 0 ...Mutating_Set_Dict.test_issubset_with_mutation | 0 ...tating_Set_Dict.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ..._Dict.test_symmetric_difference_with_mutation | 0 ...odsMutating_Set_Dict.test_union_with_mutation | 0 ...dsMutating_Set_Dict.test_update_with_mutation | 0 ...Set_List.test_difference_update_with_mutation | 0 ...tating_Set_List.test_difference_with_mutation | 0 ...t_List.test_intersection_update_with_mutation | 0 ...ting_Set_List.test_intersection_with_mutation | 0 ...tating_Set_List.test_isdisjoint_with_mutation | 0 ...Mutating_Set_List.test_issubset_with_mutation | 0 ...tating_Set_List.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ..._List.test_symmetric_difference_with_mutation | 0 ...odsMutating_Set_List.test_union_with_mutation | 0 ...dsMutating_Set_List.test_update_with_mutation | 0 ..._Set_Set.test_difference_update_with_mutation | 0 ...utating_Set_Set.test_difference_with_mutation | 0 ...et_Set.test_intersection_update_with_mutation | 0 ...ating_Set_Set.test_intersection_with_mutation | 0 ...utating_Set_Set.test_isdisjoint_with_mutation | 0 ...sMutating_Set_Set.test_issubset_with_mutation | 0 ...utating_Set_Set.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...t_Set.test_symmetric_difference_with_mutation | 0 ...hodsMutating_Set_Set.test_union_with_mutation | 0 ...odsMutating_Set_Set.test_update_with_mutation | 0 ...Subclass.test_difference_update_with_mutation | 0 ...ng_Set_Subclass.test_difference_with_mutation | 0 ...bclass.test_intersection_update_with_mutation | 0 ..._Set_Subclass.test_intersection_with_mutation | 0 ...ng_Set_Subclass.test_isdisjoint_with_mutation | 0 ...ting_Set_Subclass.test_issubset_with_mutation | 0 ...ng_Set_Subclass.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...class.test_symmetric_difference_with_mutation | 0 ...utating_Set_Subclass.test_union_with_mutation | 0 ...tating_Set_Subclass.test_update_with_mutation | 0 ...lass_Set.test_difference_update_with_mutation | 0 ...ng_Subclass_Set.test_difference_with_mutation | 0 ...ss_Set.test_intersection_update_with_mutation | 0 ..._Subclass_Set.test_intersection_with_mutation | 0 ...ng_Subclass_Set.test_isdisjoint_with_mutation | 0 ...ting_Subclass_Set.test_issubset_with_mutation | 0 ...ng_Subclass_Set.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...s_Set.test_symmetric_difference_with_mutation | 0 ...utating_Subclass_Set.test_union_with_mutation | 0 ...tating_Subclass_Set.test_update_with_mutation | 0 ...Subclass.test_difference_update_with_mutation | 0 ...bclass_Subclass.test_difference_with_mutation | 0 ...bclass.test_intersection_update_with_mutation | 0 ...lass_Subclass.test_intersection_with_mutation | 0 ...bclass_Subclass.test_isdisjoint_with_mutation | 0 ...Subclass_Subclass.test_issubset_with_mutation | 0 ...bclass_Subclass.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...class.test_symmetric_difference_with_mutation | 0 ...ng_Subclass_Subclass.test_union_with_mutation | 0 ...g_Subclass_Subclass.test_update_with_mutation | 0 ...n313-test_set-TestSet.test_container_iterator | 0 .../CPython313-test_set-TestSet.test_deepcopy | 0 .../CPython313-test_set-TestSet.test_gc | 0 ...CPython313-test_set-TestSet.test_rich_compare | 0 ...st_set-TestSet.test_subclass_with_custom_hash | 0 ...t_set-TestSetSubclass.test_container_iterator | 0 ...hon313-test_set-TestSetSubclass.test_deepcopy | 0 .../CPython313-test_set-TestSetSubclass.test_gc | 0 ...set-TestSetSubclass.test_keywords_in_subclass | 0 ...13-test_set-TestSetSubclass.test_rich_compare | 0 ...estSetSubclass.test_subclass_with_custom_hash | 0 ...-test_set-TestWeirdBugs.test_merge_and_mutate | 0 ...CPython313-test_sort-TestBase.testStressfully | 0 .../CPython313-test_sort-TestBugs.test_bug453523 | 0 ...rateSortUndecorate.test_key_with_mutating_del | 0 ...tOptimizedCompares.test_unsafe_object_compare | 0 ...hon313-test_tuple-TupleTest.test_constructors | 0 ...n313-test_tuple-TupleTest.test_contains_order | 0 ...est_tuple-TupleTest.test_keywords_in_subclass | 0 ...3-test_tuple-TupleTest.test_no_comdat_folding | 0 ...n313-test_tuple-TupleTest.test_track_subtypes | 0 ...CPython313-test_userdict-UserDictTest.test_eq | 0 ...est_userlist-UserListTest.test_contains_order | 0 ...est_with-ExceptionalTestCase.testErrorsInBool | 0 ...-ExceptionalTestCase.testRaisedStopIteration2 | 0 ...with-FailureTestCase.testEnterAttributeError1 | 0 ...with-FailureTestCase.testEnterAttributeError2 | 0 ...t_with-FailureTestCase.testExitAttributeError | 0 .../TestAutograd.test_anomaly_detect_nan | 0 .../TestAutograd.test_autograd_print_tensor | 0 ...eckpointing_without_reentrant_with_context_fn | 0 ...ograd.test_custom_autograd_repeated_grad_grad | 0 .../TestAutograd.test_inplace_not_requires_grad | 0 .../TestAutograd.test_lobpcg | 0 .../TestAutograd.test_mark_non_differentiable | 0 ...estAutograd.test_mark_non_differentiable_none | 0 ...d.test_naughty_autograd_function_stashing_ctx | 0 .../TestAutograd.test_return_leaf_inplace | 0 ...test_const_fold_basic_one_attr_name_collision | 0 ...t_const_fold_basic_one_attr_no_name_collision | 0 .../TestConstFold.test_const_fold_basic_two_attr | 0 ...ld.test_const_fold_basic_two_attr_three_input | 0 ....test_const_fold_has_inlined_call_module_node | 0 .../TestConstFold.test_const_fold_module_attr | 0 ...estConstFold.test_const_fold_submod_hierarchy | 0 ...tConstFold.test_const_fold_unused_placeholder | 0 .../TestConstFold.test_dict_output | 0 .../TestConstFold.test_fold_module | 0 .../TestConstFold.test_three_outputs | 0 .../TestConstFold.test_two_outputs | 0 ...raced.test_cond_merge_graph_preserves_ph_meta | 0 ...ched_branch_output_dynamic_True_backend_eager | 0 ...test_cond_symint_operands_requires_grad_False | 0 ....test_cond_symint_operands_requires_grad_True | 0 ...rolFlowTraced.test_while_loop_autograd_simple | 0 ...nJIT.test_cpp_frontend_module_python_inter_op | 0 ...cpp_frontend_module_python_inter_op_with_cuda | 0 ...not_raised_when_exception_source_is_submodule | 0 ...mericSuiteCoreAPIs.test_user_defined_function | 0 .../TestFlag.test_writeable_any_base | 0 ...estIndexing.test_broken_sequence_not_nd_index | 0 ...ion.test_new_spectral_norm_forward_swap_True} | 0 ...etrizations_and_params_single_param_swap_True | 0 .../TestPrivateUse1.test_backend_type_methods | 0 .../TestPythonDispatch.test_maybe_tuple_bug | 0 .../TestPythonDispatch.test_set_data | 0 ...tch.test_wrapper_subclass_extra_dispatch_keys | 0 ...st_functional_call_member_reference_stateless | 0 ...t_functional_call_member_reference_torch_func | 0 ...lies_module_and_param_specific_decorators_cpu | 0 ...nDeviceTypeCPU.test_ops_composition_names_cpu | 0 ..._applies_op_and_param_specific_decorators_cpu | 0 .../TestTorch.test_as_subclass | 0 .../TestTorch.test_storage_cycle_via_slots | 0 .../TestTorch.test_storage_finalizer_dealloc | 0 .../TestTorch.test_storage_slot_dealloc | 0 .../TestTorch.test_tensor_cycle_via_slots | 0 .../TestTorch.test_tensor_finalizer_dealloc | 0 .../TestTorch.test_tensor_slot_dealloc | 0 ...TestTorchFunctionMode.test_custom_device_type | 0 ...orchFunctionMode.test_disable_enable_subclass | 0 ...tTorchFunctionMode.test_disable_subclass_mode | 0 .../TestTorchFunctionMode.test_factory_override | 0 .../TestTorchFunctionOverride.test_pow_rpow | 0 .../TestAutograd.test_naughty_anomaly_access | 0 test/dynamo_skips/TestPythonPytree.test_key_str | 1 - ...chFunctionMode.test_disable_subclass_not_mode | 0 .../TestTorch.test_storage_cycle_via_slots | 0 .../TestTorch.test_storage_finalizer_dealloc | 0 .../TestTorch.test_storage_slot_dealloc | 0 .../TestTorch.test_tensor_cycle_via_slots | 0 .../TestTorch.test_tensor_finalizer_dealloc | 0 .../TestTorch.test_tensor_slot_dealloc | 0 torch/_dynamo/symbolic_convert.py | 10 +++++++++- 374 files changed, 15 insertions(+), 24 deletions(-) delete mode 100644 test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_blocked delete mode 100644 test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_convert_to_bool delete mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Mapping delete mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Sequence_mixins delete mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set delete mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_from_iterable delete mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_interoperability_with_real_sets delete mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_issue16373 delete mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCounter.test_copy_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestNamedTuple.test_namedtuple_subclass_issue_24931 delete mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestOneTrickPonyABCs.test_Generator delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing_error delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_contextmanager_except_stopiter delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_nokeepref delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_exit_is_abstract delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_slots delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_decorating_method delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_enter delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_exit delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_dont_reraise_RuntimeError delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context_errors delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_chaining_reference delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_explicit_none_context delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_instance_bypass delete mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_push delete mode 100644 test/dynamo_expected_failures/CPython313-test_defaultdict-TestDefaultDict.test_recursive_repr delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_bad_key delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_copy_maintains_tracking delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_contain_use_after_free delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_copy_order delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_eq delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_equal_operator_modifying_operand delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_errors_in_view_containment_check delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_dict_operand delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_set_operand delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_getitem delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_init_use_after_free delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_instance_dict_getattr_str_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_invalid_keyword_arguments delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_merge_and_mutate delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_missing delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_mutating_lookup delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_object_set_item_single_instance_non_str_key delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_oob_indexing_dictiter_iternextitem delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_pop delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_resize2 delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_reverse_iterator_for_shared_shared_dicts delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault_atomic delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setitem_atomic_at_resize delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_to_generic_combinedtable delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_update delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_str_nonstr delete mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_views_mapping delete mode 100644 test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_floatconversion delete mode 100644 test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_keywords_in_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_non_numeric_input_types delete mode 100644 test/dynamo_expected_failures/CPython313-test_float-HexFloatTestCase.test_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_base_indexable delete mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_returns_int_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_index delete mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_int delete mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_intconversion delete mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_non_numeric_input_types delete mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_3720 delete mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_ref_counting_behavior delete mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_stop_sequence delete mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_unicode_join_endcase delete mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_constructors delete mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_contains_order delete mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_equal_operator_modifying_operand delete mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_extend delete mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_keywords_in_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_no_comdat_folding delete mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_repr_mutate delete mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.testCeil delete mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.testFloor delete mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.test_issue39871 delete mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.test_sumprod_stress delete mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.test_trunc delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_attrgetter delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_methodcaller delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_attrgetter delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_eq delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_index delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_inplace delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_length_hint delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_ne delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_not_ delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_truth delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_attrgetter delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_methodcaller delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_attrgetter delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_methodcaller delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_attrgetter delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_index delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_inplace delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_length_hint delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_matmul delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_methodcaller delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_not_ delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_attrgetter delete mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_methodcaller delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_delitem_hash_collision delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_highly_nested_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_delitem_hash_collision delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_highly_nested_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_init_calls delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_clear delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_delete_key delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_clear delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key_in_dict_eq delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24347 delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24348 delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_delitem_hash_collision delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_highly_nested_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_init_calls delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_clear delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_delete_key delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_clear delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key_in_dict_eq delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24347 delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24348 delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_delitem_hash_collision delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_highly_nested_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_init_calls delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue119004_attribute_error delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24347 delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24348 delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_overridden_init delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_override_update delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_delitem_hash_collision delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_highly_nested_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_init_calls delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue119004_attribute_error delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24347 delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24348 delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_overridden_init delete mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_override_update delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_and_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_eq_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ge_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_gt_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iadd_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ior_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_isub_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iteration_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ixor_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_le_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_lt_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ne_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_or_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_sub_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_xor_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_and_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_eq_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ge_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_gt_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iadd_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ior_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_isub_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iteration_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ixor_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_le_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_lt_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ne_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_or_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_sub_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_xor_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_and_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_eq_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ge_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_gt_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iadd_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ior_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_isub_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iteration_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ixor_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_le_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_lt_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ne_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_or_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_sub_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_xor_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_and_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_eq_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ge_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_gt_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iadd_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ior_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_isub_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iteration_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ixor_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_le_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_lt_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ne_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_or_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_sub_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_xor_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_container_iterator delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_deepcopy delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_gc delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_subclass_with_custom_hash delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_container_iterator delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_deepcopy delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_gc delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_keywords_in_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_subclass_with_custom_hash delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_isdisjoint_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issubset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issuperset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_union_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_isdisjoint_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issubset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issuperset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_union_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_isdisjoint_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issubset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issuperset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_union_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_isdisjoint_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issubset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issuperset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_union_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_isdisjoint_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issubset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issuperset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_union_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_isdisjoint_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issubset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issuperset_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_union_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_update_with_mutation delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_container_iterator delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_deepcopy delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_gc delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_rich_compare delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_subclass_with_custom_hash delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_container_iterator delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_deepcopy delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_gc delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_keywords_in_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_rich_compare delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_subclass_with_custom_hash delete mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestWeirdBugs.test_merge_and_mutate delete mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestBase.testStressfully delete mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestBugs.test_bug453523 delete mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestDecorateSortUndecorate.test_key_with_mutating_del delete mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestOptimizedCompares.test_unsafe_object_compare delete mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_constructors delete mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_contains_order delete mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_keywords_in_subclass delete mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_no_comdat_folding delete mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_track_subtypes delete mode 100644 test/dynamo_expected_failures/CPython313-test_userdict-UserDictTest.test_eq delete mode 100644 test/dynamo_expected_failures/CPython313-test_userlist-UserListTest.test_contains_order delete mode 100644 test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testErrorsInBool delete mode 100644 test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testRaisedStopIteration2 delete mode 100644 test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError1 delete mode 100644 test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError2 delete mode 100644 test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testExitAttributeError delete mode 100644 test/dynamo_expected_failures/TestAutograd.test_anomaly_detect_nan delete mode 100644 test/dynamo_expected_failures/TestAutograd.test_autograd_print_tensor delete mode 100644 test/dynamo_expected_failures/TestAutograd.test_checkpointing_without_reentrant_with_context_fn delete mode 100644 test/dynamo_expected_failures/TestAutograd.test_custom_autograd_repeated_grad_grad delete mode 100644 test/dynamo_expected_failures/TestAutograd.test_inplace_not_requires_grad delete mode 100644 test/dynamo_expected_failures/TestAutograd.test_lobpcg delete mode 100644 test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable delete mode 100644 test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable_none delete mode 100644 test/dynamo_expected_failures/TestAutograd.test_naughty_autograd_function_stashing_ctx delete mode 100644 test/dynamo_expected_failures/TestAutograd.test_return_leaf_inplace delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_name_collision delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_no_name_collision delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr_three_input delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_has_inlined_call_module_node delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_module_attr delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_submod_hierarchy delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_unused_placeholder delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_dict_output delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_fold_module delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_three_outputs delete mode 100644 test/dynamo_expected_failures/TestConstFold.test_two_outputs delete mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_merge_graph_preserves_ph_meta delete mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_mismatched_branch_output_dynamic_True_backend_eager delete mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_False delete mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_True delete mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_while_loop_autograd_simple delete mode 100644 test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op delete mode 100644 test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op_with_cuda delete mode 100644 test/dynamo_expected_failures/TestFX.test_custom_traceback_not_raised_when_exception_source_is_submodule delete mode 100644 test/dynamo_expected_failures/TestFXNumericSuiteCoreAPIs.test_user_defined_function delete mode 100644 test/dynamo_expected_failures/TestFlag.test_writeable_any_base delete mode 100644 test/dynamo_expected_failures/TestIndexing.test_broken_sequence_not_nd_index rename test/dynamo_expected_failures/{AOTFxirTestCase.test_aoti_fx_const => TestNNParametrization.test_new_spectral_norm_forward_swap_True} (100%) delete mode 100644 test/dynamo_expected_failures/TestNNParametrization.test_transfer_parametrizations_and_params_single_param_swap_True delete mode 100644 test/dynamo_expected_failures/TestPrivateUse1.test_backend_type_methods delete mode 100644 test/dynamo_expected_failures/TestPythonDispatch.test_maybe_tuple_bug delete mode 100644 test/dynamo_expected_failures/TestPythonDispatch.test_set_data delete mode 100644 test/dynamo_expected_failures/TestPythonDispatch.test_wrapper_subclass_extra_dispatch_keys delete mode 100644 test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_stateless delete mode 100644 test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_torch_func delete mode 100644 test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_modules_decorator_applies_module_and_param_specific_decorators_cpu delete mode 100644 test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_composition_names_cpu delete mode 100644 test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_decorator_applies_op_and_param_specific_decorators_cpu delete mode 100644 test/dynamo_expected_failures/TestTorch.test_as_subclass delete mode 100644 test/dynamo_expected_failures/TestTorch.test_storage_cycle_via_slots delete mode 100644 test/dynamo_expected_failures/TestTorch.test_storage_finalizer_dealloc delete mode 100644 test/dynamo_expected_failures/TestTorch.test_storage_slot_dealloc delete mode 100644 test/dynamo_expected_failures/TestTorch.test_tensor_cycle_via_slots delete mode 100644 test/dynamo_expected_failures/TestTorch.test_tensor_finalizer_dealloc delete mode 100644 test/dynamo_expected_failures/TestTorch.test_tensor_slot_dealloc delete mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_custom_device_type delete mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_enable_subclass delete mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_subclass_mode delete mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_factory_override delete mode 100644 test/dynamo_expected_failures/TestTorchFunctionOverride.test_pow_rpow delete mode 100644 test/dynamo_skips/TestAutograd.test_naughty_anomaly_access delete mode 100644 test/dynamo_skips/TestPythonPytree.test_key_str delete mode 100644 test/dynamo_skips/TestTorchFunctionMode.test_disable_subclass_not_mode delete mode 100644 test/inductor_expected_failures/TestTorch.test_storage_cycle_via_slots delete mode 100644 test/inductor_expected_failures/TestTorch.test_storage_finalizer_dealloc delete mode 100644 test/inductor_expected_failures/TestTorch.test_storage_slot_dealloc delete mode 100644 test/inductor_expected_failures/TestTorch.test_tensor_cycle_via_slots delete mode 100644 test/inductor_expected_failures/TestTorch.test_tensor_finalizer_dealloc delete mode 100644 test/inductor_expected_failures/TestTorch.test_tensor_slot_dealloc diff --git a/test/dynamo/test_error_messages.py b/test/dynamo/test_error_messages.py index 081ceb5065dfa..847f3a6fd2166 100644 --- a/test/dynamo/test_error_messages.py +++ b/test/dynamo/test_error_messages.py @@ -726,14 +726,14 @@ class Foo: Unsupported, lambda: torch.compile(fn, backend="eager", fullgraph=True)(), """\ -Attempted to call function marked as skipped - Explanation: Dynamo does not know how to trace the builtin `builtins.__build_class__.` This function is either a Python builtin (e.g. _warnings.warn) or a third-party C/C++ Python extension (perhaps created with pybind). - Hint: If it is a Python builtin, please file an issue on GitHub so the PyTorch team can add support for it and see the next case for a workaround. - Hint: If it is a third-party C/C++ Python extension, please either wrap it into a PyTorch-understood custom operator (see https://pytorch.org/tutorials/advanced/custom_ops_landing_page.html for more details) or, if it is traceable, use `torch.compiler.allow_in_graph`. +LOAD_BUILD_CLASS bytecode not supported + Explanation: Dynamo does not support tracing classes that are defined in the compiled region. + Hint: Move the class definition out of the compiled region. + Hint: It may be possible to write Dynamo tracing rules for this code. Please report an issue to PyTorch if you encounter this graph break often and it is causing performance issues. - Developer debug context: module: builtins, qualname: __build_class__, skip reason: + Developer debug context: - For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0007.html + For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0075.html from user code: File "test_error_messages.py", line N, in fn diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py index c9a2a0730b08a..b7fb01be17152 100644 --- a/test/dynamo/test_misc.py +++ b/test/dynamo/test_misc.py @@ -12684,22 +12684,6 @@ def f(*args, **kwargs): self.assertRaises(Unsupported, f, []) self.assertRaises(Unsupported, f, "1 + j") - def test_compiled_class_graph_break(self): - counter = CompileCounter() - - @torch.compile(backend=counter, fullgraph=False) - def f(x): - x += 1 - - class C: - pass - - return x.sin() - - x = torch.randn(3) - f(x) - self.assertEqual(counter.frame_count, 2) - class MiscTestsPyTree(torch._inductor.test_case.TestCase): @parametrize_pytree_module diff --git a/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_blocked b/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_blocked deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_convert_to_bool b/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_convert_to_bool deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Mapping b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Mapping deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Sequence_mixins b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Sequence_mixins deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_from_iterable b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_from_iterable deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_interoperability_with_real_sets b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_interoperability_with_real_sets deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_issue16373 b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_issue16373 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCounter.test_copy_subclass b/test/dynamo_expected_failures/CPython313-test_collections-TestCounter.test_copy_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestNamedTuple.test_namedtuple_subclass_issue_24931 b/test/dynamo_expected_failures/CPython313-test_collections-TestNamedTuple.test_namedtuple_subclass_issue_24931 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestOneTrickPonyABCs.test_Generator b/test/dynamo_expected_failures/CPython313-test_collections-TestOneTrickPonyABCs.test_Generator deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing b/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing_error b/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing_error deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_contextmanager_except_stopiter b/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_contextmanager_except_stopiter deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_nokeepref b/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_nokeepref deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_exit_is_abstract b/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_exit_is_abstract deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_slots b/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_slots deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_decorating_method b/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_decorating_method deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_enter b/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_enter deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_exit b/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_exit deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_dont_reraise_RuntimeError b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_dont_reraise_RuntimeError deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context_errors b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context_errors deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_chaining_reference b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_chaining_reference deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_explicit_none_context b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_explicit_none_context deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_instance_bypass b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_instance_bypass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_push b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_push deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_defaultdict-TestDefaultDict.test_recursive_repr b/test/dynamo_expected_failures/CPython313-test_defaultdict-TestDefaultDict.test_recursive_repr deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_bad_key b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_bad_key deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_copy_maintains_tracking b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_copy_maintains_tracking deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_contain_use_after_free b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_contain_use_after_free deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_copy_order b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_copy_order deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_eq b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_eq deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_equal_operator_modifying_operand b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_equal_operator_modifying_operand deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_errors_in_view_containment_check b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_errors_in_view_containment_check deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_dict_operand b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_dict_operand deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_set_operand b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_set_operand deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_getitem b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_getitem deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_init_use_after_free b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_init_use_after_free deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_instance_dict_getattr_str_subclass b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_instance_dict_getattr_str_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_invalid_keyword_arguments b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_invalid_keyword_arguments deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_merge_and_mutate b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_merge_and_mutate deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_missing b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_missing deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_mutating_lookup b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_mutating_lookup deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_object_set_item_single_instance_non_str_key b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_object_set_item_single_instance_non_str_key deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_oob_indexing_dictiter_iternextitem b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_oob_indexing_dictiter_iternextitem deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_pop b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_pop deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_resize2 b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_resize2 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_reverse_iterator_for_shared_shared_dicts b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_reverse_iterator_for_shared_shared_dicts deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault_atomic b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault_atomic deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setitem_atomic_at_resize b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setitem_atomic_at_resize deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_to_generic_combinedtable b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_to_generic_combinedtable deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_update b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_update deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_str_nonstr b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_str_nonstr deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_views_mapping b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_views_mapping deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_floatconversion b/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_floatconversion deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_keywords_in_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_non_numeric_input_types b/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_non_numeric_input_types deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_float-HexFloatTestCase.test_subclass b/test/dynamo_expected_failures/CPython313-test_float-HexFloatTestCase.test_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_base_indexable b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_base_indexable deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_returns_int_subclass b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_returns_int_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_index b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_index deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_int b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_int deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_intconversion b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_intconversion deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_non_numeric_input_types b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_non_numeric_input_types deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_3720 b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_3720 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_ref_counting_behavior b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_ref_counting_behavior deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_stop_sequence b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_stop_sequence deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_unicode_join_endcase b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_unicode_join_endcase deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_constructors b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_constructors deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_contains_order b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_contains_order deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_equal_operator_modifying_operand b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_equal_operator_modifying_operand deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_extend b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_extend deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_keywords_in_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_no_comdat_folding b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_no_comdat_folding deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_repr_mutate b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_repr_mutate deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.testCeil b/test/dynamo_expected_failures/CPython313-test_math-MathTests.testCeil deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.testFloor b/test/dynamo_expected_failures/CPython313-test_math-MathTests.testFloor deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_issue39871 b/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_issue39871 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_sumprod_stress b/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_sumprod_stress deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_trunc b/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_trunc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_attrgetter deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_methodcaller deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_attrgetter deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_eq b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_eq deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_index b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_index deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_inplace b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_inplace deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_length_hint b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_length_hint deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_ne b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_ne deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_not_ b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_not_ deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_truth b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_truth deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_attrgetter deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_methodcaller deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_attrgetter deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_methodcaller deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_attrgetter deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_index b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_index deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_inplace b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_inplace deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_length_hint b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_length_hint deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_matmul b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_matmul deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_methodcaller deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_not_ b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_not_ deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_attrgetter deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_methodcaller deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_delitem_hash_collision deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_highly_nested_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_delitem_hash_collision deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_highly_nested_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_init_calls deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_clear deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_delete_key deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_clear deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key_in_dict_eq b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key_in_dict_eq deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24347 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24348 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_delitem_hash_collision deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_highly_nested_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_init_calls deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_clear deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_delete_key deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_clear deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key_in_dict_eq b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key_in_dict_eq deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24347 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24348 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_delitem_hash_collision deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_highly_nested_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_init_calls deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue119004_attribute_error b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue119004_attribute_error deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24347 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24348 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_overridden_init b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_overridden_init deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_override_update b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_override_update deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_delitem_hash_collision deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_highly_nested_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_init_calls deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue119004_attribute_error b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue119004_attribute_error deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24347 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24348 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_overridden_init b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_overridden_init deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_override_update b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_override_update deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_and_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_eq_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ge_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_gt_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iadd_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ior_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_isub_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iteration_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ixor_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_le_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_lt_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ne_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_or_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_sub_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_xor_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_and_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_eq_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ge_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_gt_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iadd_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ior_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_isub_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iteration_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ixor_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_le_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_lt_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ne_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_or_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_sub_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_xor_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_and_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_eq_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ge_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_gt_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iadd_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ior_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_isub_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iteration_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ixor_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_le_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_lt_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ne_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_or_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_sub_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_xor_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_and_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_eq_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ge_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_gt_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iadd_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ior_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_isub_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iteration_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ixor_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_le_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_lt_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ne_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_or_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_sub_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_xor_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_container_iterator deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_deepcopy deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_gc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_subclass_with_custom_hash deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_container_iterator deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_deepcopy deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_gc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_keywords_in_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_subclass_with_custom_hash deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_isdisjoint_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issubset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issuperset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_union_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_isdisjoint_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issubset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issuperset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_union_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_isdisjoint_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issubset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issuperset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_union_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_isdisjoint_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issubset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issuperset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_union_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_isdisjoint_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issubset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issuperset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_union_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_isdisjoint_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issubset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issuperset_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_union_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_update_with_mutation deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_container_iterator deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_deepcopy deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_gc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_rich_compare b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_rich_compare deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_subclass_with_custom_hash deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_container_iterator deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_deepcopy deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_gc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_keywords_in_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_rich_compare b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_rich_compare deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_subclass_with_custom_hash deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestWeirdBugs.test_merge_and_mutate b/test/dynamo_expected_failures/CPython313-test_set-TestWeirdBugs.test_merge_and_mutate deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestBase.testStressfully b/test/dynamo_expected_failures/CPython313-test_sort-TestBase.testStressfully deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestBugs.test_bug453523 b/test/dynamo_expected_failures/CPython313-test_sort-TestBugs.test_bug453523 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestDecorateSortUndecorate.test_key_with_mutating_del b/test/dynamo_expected_failures/CPython313-test_sort-TestDecorateSortUndecorate.test_key_with_mutating_del deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestOptimizedCompares.test_unsafe_object_compare b/test/dynamo_expected_failures/CPython313-test_sort-TestOptimizedCompares.test_unsafe_object_compare deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_constructors b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_constructors deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_contains_order b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_contains_order deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_keywords_in_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_no_comdat_folding b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_no_comdat_folding deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_track_subtypes b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_track_subtypes deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_userdict-UserDictTest.test_eq b/test/dynamo_expected_failures/CPython313-test_userdict-UserDictTest.test_eq deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_userlist-UserListTest.test_contains_order b/test/dynamo_expected_failures/CPython313-test_userlist-UserListTest.test_contains_order deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testErrorsInBool b/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testErrorsInBool deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testRaisedStopIteration2 b/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testRaisedStopIteration2 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError1 b/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError1 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError2 b/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError2 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testExitAttributeError b/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testExitAttributeError deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestAutograd.test_anomaly_detect_nan b/test/dynamo_expected_failures/TestAutograd.test_anomaly_detect_nan deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestAutograd.test_autograd_print_tensor b/test/dynamo_expected_failures/TestAutograd.test_autograd_print_tensor deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestAutograd.test_checkpointing_without_reentrant_with_context_fn b/test/dynamo_expected_failures/TestAutograd.test_checkpointing_without_reentrant_with_context_fn deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestAutograd.test_custom_autograd_repeated_grad_grad b/test/dynamo_expected_failures/TestAutograd.test_custom_autograd_repeated_grad_grad deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestAutograd.test_inplace_not_requires_grad b/test/dynamo_expected_failures/TestAutograd.test_inplace_not_requires_grad deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestAutograd.test_lobpcg b/test/dynamo_expected_failures/TestAutograd.test_lobpcg deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable b/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable_none b/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable_none deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestAutograd.test_naughty_autograd_function_stashing_ctx b/test/dynamo_expected_failures/TestAutograd.test_naughty_autograd_function_stashing_ctx deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestAutograd.test_return_leaf_inplace b/test/dynamo_expected_failures/TestAutograd.test_return_leaf_inplace deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_name_collision b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_name_collision deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_no_name_collision b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_no_name_collision deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr_three_input b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr_three_input deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_has_inlined_call_module_node b/test/dynamo_expected_failures/TestConstFold.test_const_fold_has_inlined_call_module_node deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_module_attr b/test/dynamo_expected_failures/TestConstFold.test_const_fold_module_attr deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_submod_hierarchy b/test/dynamo_expected_failures/TestConstFold.test_const_fold_submod_hierarchy deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_unused_placeholder b/test/dynamo_expected_failures/TestConstFold.test_const_fold_unused_placeholder deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_dict_output b/test/dynamo_expected_failures/TestConstFold.test_dict_output deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_fold_module b/test/dynamo_expected_failures/TestConstFold.test_fold_module deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_three_outputs b/test/dynamo_expected_failures/TestConstFold.test_three_outputs deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestConstFold.test_two_outputs b/test/dynamo_expected_failures/TestConstFold.test_two_outputs deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_merge_graph_preserves_ph_meta b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_merge_graph_preserves_ph_meta deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_mismatched_branch_output_dynamic_True_backend_eager b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_mismatched_branch_output_dynamic_True_backend_eager deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_False b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_False deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_True b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_True deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_while_loop_autograd_simple b/test/dynamo_expected_failures/TestControlFlowTraced.test_while_loop_autograd_simple deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op b/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op_with_cuda b/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op_with_cuda deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestFX.test_custom_traceback_not_raised_when_exception_source_is_submodule b/test/dynamo_expected_failures/TestFX.test_custom_traceback_not_raised_when_exception_source_is_submodule deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestFXNumericSuiteCoreAPIs.test_user_defined_function b/test/dynamo_expected_failures/TestFXNumericSuiteCoreAPIs.test_user_defined_function deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestFlag.test_writeable_any_base b/test/dynamo_expected_failures/TestFlag.test_writeable_any_base deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestIndexing.test_broken_sequence_not_nd_index b/test/dynamo_expected_failures/TestIndexing.test_broken_sequence_not_nd_index deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/AOTFxirTestCase.test_aoti_fx_const b/test/dynamo_expected_failures/TestNNParametrization.test_new_spectral_norm_forward_swap_True similarity index 100% rename from test/dynamo_expected_failures/AOTFxirTestCase.test_aoti_fx_const rename to test/dynamo_expected_failures/TestNNParametrization.test_new_spectral_norm_forward_swap_True diff --git a/test/dynamo_expected_failures/TestNNParametrization.test_transfer_parametrizations_and_params_single_param_swap_True b/test/dynamo_expected_failures/TestNNParametrization.test_transfer_parametrizations_and_params_single_param_swap_True deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestPrivateUse1.test_backend_type_methods b/test/dynamo_expected_failures/TestPrivateUse1.test_backend_type_methods deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestPythonDispatch.test_maybe_tuple_bug b/test/dynamo_expected_failures/TestPythonDispatch.test_maybe_tuple_bug deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestPythonDispatch.test_set_data b/test/dynamo_expected_failures/TestPythonDispatch.test_set_data deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestPythonDispatch.test_wrapper_subclass_extra_dispatch_keys b/test/dynamo_expected_failures/TestPythonDispatch.test_wrapper_subclass_extra_dispatch_keys deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_stateless b/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_stateless deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_torch_func b/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_torch_func deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_modules_decorator_applies_module_and_param_specific_decorators_cpu b/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_modules_decorator_applies_module_and_param_specific_decorators_cpu deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_composition_names_cpu b/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_composition_names_cpu deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_decorator_applies_op_and_param_specific_decorators_cpu b/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_decorator_applies_op_and_param_specific_decorators_cpu deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorch.test_as_subclass b/test/dynamo_expected_failures/TestTorch.test_as_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorch.test_storage_cycle_via_slots b/test/dynamo_expected_failures/TestTorch.test_storage_cycle_via_slots deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorch.test_storage_finalizer_dealloc b/test/dynamo_expected_failures/TestTorch.test_storage_finalizer_dealloc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorch.test_storage_slot_dealloc b/test/dynamo_expected_failures/TestTorch.test_storage_slot_dealloc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorch.test_tensor_cycle_via_slots b/test/dynamo_expected_failures/TestTorch.test_tensor_cycle_via_slots deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorch.test_tensor_finalizer_dealloc b/test/dynamo_expected_failures/TestTorch.test_tensor_finalizer_dealloc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorch.test_tensor_slot_dealloc b/test/dynamo_expected_failures/TestTorch.test_tensor_slot_dealloc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_custom_device_type b/test/dynamo_expected_failures/TestTorchFunctionMode.test_custom_device_type deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_enable_subclass b/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_enable_subclass deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_subclass_mode b/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_subclass_mode deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_factory_override b/test/dynamo_expected_failures/TestTorchFunctionMode.test_factory_override deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_expected_failures/TestTorchFunctionOverride.test_pow_rpow b/test/dynamo_expected_failures/TestTorchFunctionOverride.test_pow_rpow deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_skips/TestAutograd.test_naughty_anomaly_access b/test/dynamo_skips/TestAutograd.test_naughty_anomaly_access deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/dynamo_skips/TestPythonPytree.test_key_str b/test/dynamo_skips/TestPythonPytree.test_key_str deleted file mode 100644 index a8d6b4d65e03c..0000000000000 --- a/test/dynamo_skips/TestPythonPytree.test_key_str +++ /dev/null @@ -1 +0,0 @@ -Passes under python 3.10, fails under 3.13 diff --git a/test/dynamo_skips/TestTorchFunctionMode.test_disable_subclass_not_mode b/test/dynamo_skips/TestTorchFunctionMode.test_disable_subclass_not_mode deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/inductor_expected_failures/TestTorch.test_storage_cycle_via_slots b/test/inductor_expected_failures/TestTorch.test_storage_cycle_via_slots deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/inductor_expected_failures/TestTorch.test_storage_finalizer_dealloc b/test/inductor_expected_failures/TestTorch.test_storage_finalizer_dealloc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/inductor_expected_failures/TestTorch.test_storage_slot_dealloc b/test/inductor_expected_failures/TestTorch.test_storage_slot_dealloc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/inductor_expected_failures/TestTorch.test_tensor_cycle_via_slots b/test/inductor_expected_failures/TestTorch.test_tensor_cycle_via_slots deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/inductor_expected_failures/TestTorch.test_tensor_finalizer_dealloc b/test/inductor_expected_failures/TestTorch.test_tensor_finalizer_dealloc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/test/inductor_expected_failures/TestTorch.test_tensor_slot_dealloc b/test/inductor_expected_failures/TestTorch.test_tensor_slot_dealloc deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/torch/_dynamo/symbolic_convert.py b/torch/_dynamo/symbolic_convert.py index beebea05a0e3e..4dd1321a5057d 100644 --- a/torch/_dynamo/symbolic_convert.py +++ b/torch/_dynamo/symbolic_convert.py @@ -3290,7 +3290,15 @@ def LOAD_ASSERTION_ERROR(self, inst: Instruction) -> None: self.push(self.load_builtin_from_argval("AssertionError")) def LOAD_BUILD_CLASS(self, inst: Instruction) -> None: - self.push(self.load_builtin_from_argval("__build_class__")) + unimplemented_v2( + gb_type="LOAD_BUILD_CLASS bytecode not supported", + context="", + explanation="Dynamo does not support tracing classes that are defined in the compiled region.", + hints=[ + "Move the class definition out of the compiled region.", + *graph_break_hints.SUPPORTABLE, + ], + ) UNARY_POSITIVE = stack_op(operator.pos) UNARY_NEGATIVE = stack_op(operator.neg) From 87cc126457586bdd4ea56ea22cbaa6af0b69a729 Mon Sep 17 00:00:00 2001 From: Thomas Bohnstingl Date: Tue, 9 Sep 2025 23:52:26 +0000 Subject: [PATCH 010/693] [associative_scan] partial gradient support (#162388) This PR tests the partial gradient support of the `associative_scan` operation. It replaces https://github.com/bohnstingl/pytorch/pull/6 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162388 Approved by: https://github.com/ydwu4 --- test/functorch/test_control_flow.py | 121 +++++++++++++++++++- torch/_higher_order_ops/associative_scan.py | 40 +++---- 2 files changed, 140 insertions(+), 21 deletions(-) diff --git a/test/functorch/test_control_flow.py b/test/functorch/test_control_flow.py index 68a326f4f35a0..61658692612bc 100644 --- a/test/functorch/test_control_flow.py +++ b/test/functorch/test_control_flow.py @@ -3744,7 +3744,17 @@ def _run_test(self, model, model_fake, inputs, autograd_param=None): if autograd_param is not None and any( par.requires_grad for par in autograd_param ): - self._check_autograd(result, result_exp, autograd_param) + result_flat = pytree.tree_leaves(result) + result_exp_flat = pytree.tree_leaves(result_exp) + exp_grad_mask = [ + True if r.requires_grad else False for r in result_exp_flat + ] + + self._check_autograd( + [r for r, m in zip(result_flat, exp_grad_mask) if m], + [r for r, m in zip(result_exp_flat, exp_grad_mask) if m], + autograd_param, + ) # Return the result of the functions under test for further investigations return result @@ -5066,6 +5076,115 @@ def fct_pointwise(x, y): autograd_param=None if not autograd else (*pytree.tree_leaves(inp),), ) + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of combine_mode=pointwise and device=cpu + # as the current implementation of pointwise does only support CUDA device + # Skipping the combination of combine_mode=pointwise and compile_mode=compile_dynamic_shape + # as the current implementation does not support lifted arguments + @decorateIf( + unittest.skip, + lambda params: ( + params["combine_mode"] == "pointwise" + and ( + params["device"] == torch.device("cpu") + or params["compile_mode"] == "compile_dynamic_shape" + or torch.version.hip + ) + ), + ) + def test_associative_scan_partial_grad( + self, combine_mode, compile_mode, reverse, device + ): + import random + + n_params = 6 + autograds = [] + autograds.append([True, True, True, True, True, True]) + autograds.append([False, False, False, False, False, False]) + autograds.append([False, True, False, False, False, False]) + for _ in range(5): + autograds.append([bool(random.randint(0, 1)) for _ in range(n_params)]) + + def mul2(x, y): + return (*[xv * yv for xv, yv in zip(x, y)],) + + for a_grads in autograds: + inp = tuple( + [ + torch.randn(10, 3, 2, device=device, requires_grad=a_grads[n]) + for n in range(n_params) + ] + ) + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": mul2, + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=inp, + autograd_param=inp, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of combine_mode=pointwise and device=cpu + # as the current implementation of pointwise does only support CUDA device + # Skipping the combination of combine_mode=pointwise and compile_mode=compile_dynamic_shape + # as the current implementation does not support lifted arguments + @decorateIf( + unittest.skip, + lambda params: ( + params["combine_mode"] == "pointwise" + and ( + params["device"] == torch.device("cpu") + or params["compile_mode"] == "compile_dynamic_shape" + or torch.version.hip + ) + ), + ) + def test_associative_scan_partial_grad_no_grad( + self, combine_mode, compile_mode, reverse, device + ): + def mul_single_nograd(x, y): + xy1 = x[0] * y[0] + with torch.no_grad(): + xy2 = x[1] * y[1] + return xy1, xy2 + + inp = tuple( + [torch.randn(10, 3, 2, device=device, requires_grad=True) for n in range(2)] + ) + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": mul_single_nograd, + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=inp, + autograd_param=inp[0:1], + ) + @unittest.skipIf(not SM70OrLater, "triton") def test_associative_scan_sparse_tensor(self): x = torch.tensor( diff --git a/torch/_higher_order_ops/associative_scan.py b/torch/_higher_order_ops/associative_scan.py index fa59ee244fec1..f8b0e4ab6f74c 100644 --- a/torch/_higher_order_ops/associative_scan.py +++ b/torch/_higher_order_ops/associative_scan.py @@ -493,16 +493,16 @@ def combine_fn(a: torch.Tensor, b: torch.Tensor): Level 0 (Input): xs0 xs1 xs2 xs3 xs4 \ / | | | - \ / | | | - Level 1: ys1 ───────┘ | | - \ / | + \ / | | | + Level 1: ys1 ───────┘ | | + \ / | \ / | - Level 2: ys2 ────────┘ | - \ / - \ / - Level 3: ys3 ────────────┘ - \ - \ + Level 2: ys2 ────────┘ | + \ / + \ / + Level 3: ys3 ────────────┘ + \ + \ Level 4: ys4 @@ -510,17 +510,17 @@ def combine_fn(a: torch.Tensor, b: torch.Tensor): Level 0 (output): g_xs0 g_xs1 g_xs2 g_xs3 g_xs4 - \ / | | | - \ / | | | - Level 1: gl_ys1 ─> g_ys1 ──────┘ | | - \ / | - \ / | - Level 2: gl_ys2 ─> g_ys2 ────────┘ | - \ / - \ / - Level 3: gl_ys3 ─> g_ys3 ───────────┘ - \ - \ + \ / | | | + \ / | | | + Level 1: gl_ys1 ─> g_ys1 ──────┘ | | + \ / | + \ / | + Level 2: gl_ys2 ─> g_ys2 ────────┘ | + \ / + \ / + Level 3: gl_ys3 ─> g_ys3 ────────────┘ + \ + \ Level 4: gl_ys4 ─> g_ys4, where gl_y1 is the gradient of the loss with respect to ys1 and the input of backward. From 0e7ccc09db936d3154b5d70ce4255f2e6065cf98 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 9 Sep 2025 10:44:34 -0700 Subject: [PATCH 011/693] [easy] Don't force copy result of getAllOperatorsFor in init.cpp (#162218) It returns a const reference to a vector. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162218 Approved by: https://github.com/Skylion007 ghstack dependencies: #161591, #161595, #161633, #161634, #161692, #162219, #162220 --- torch/csrc/jit/python/init.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torch/csrc/jit/python/init.cpp b/torch/csrc/jit/python/init.cpp index a784ba72e6550..c711020697720 100644 --- a/torch/csrc/jit/python/init.cpp +++ b/torch/csrc/jit/python/init.cpp @@ -1696,7 +1696,7 @@ void initJITBindings(PyObject* module) { [](const std::string& op_name, const std::string& overload_name) { try { auto symbol = Symbol::fromQualString(op_name); - auto operations = getAllOperatorsFor(symbol); + const auto& operations = getAllOperatorsFor(symbol); for (const auto& op : operations) { if (op->schema().overload_name() == overload_name) { return op->schema(); @@ -1717,7 +1717,7 @@ void initJITBindings(PyObject* module) { const std::string& overload_name) -> std::optional { try { auto symbol = Symbol::fromQualString(op_name); - auto operations = getAllOperatorsFor(symbol); + const auto& operations = getAllOperatorsFor(symbol); bool allow_numbers_as_tensors = opAllowsNumbersAsTensors(symbol); for (const auto& op : operations) { if (op->schema().overload_name() == overload_name) { From e4174b1fd798f977ba4dae04c01c2936373fd145 Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Tue, 9 Sep 2025 11:56:04 -0700 Subject: [PATCH 012/693] remove gso from collapse_view_helper (#162212) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162212 Approved by: https://github.com/aorenste Co-authored-by: Aaron Orenstein --- test/test_prims.py | 2 +- torch/_prims/__init__.py | 93 ++++++++++++++++++++++++++-------------- torch/_refs/__init__.py | 9 +++- 3 files changed, 68 insertions(+), 36 deletions(-) diff --git a/test/test_prims.py b/test/test_prims.py index f0fb606d1c5b3..58ed8a7dd7589 100644 --- a/test/test_prims.py +++ b/test/test_prims.py @@ -142,7 +142,7 @@ def test_collapse(self, device, dtype): self.assertTrue(view._is_view()) t_discontig = t.transpose(0, 1) - with self.assertRaises(ValueError, msg="no such view exists"): + with self.assertRaises(RuntimeError, msg="Attempting to view a collapsed tensor, but no such view exists!"): view = prims.collapse_view(t_discontig, 0, 2) copy = prims.collapse(t_discontig, 0, 1) diff --git a/torch/_prims/__init__.py b/torch/_prims/__init__.py index bb26bbb508bd6..34f77a2aed2e8 100644 --- a/torch/_prims/__init__.py +++ b/torch/_prims/__init__.py @@ -1384,12 +1384,22 @@ def _collapsed_shape(shape: ShapeType, start: int, end: int) -> tuple[int, ...]: return shape[0:start] + (dim_length,) + shape[end + 1 :] +# If the collapse is invalid or cannot be determined (because of unbacked data) +# then `must_be_valid` determines the behavior: +# None: return None, None. +# str: Do a torch._check() to ensure the collapse is valid and if it isn't +# then fail with the provided string. def _collapse_view_helper( - a: TensorLikeType, start: int, end: int + a: TensorLikeType, start: int, end: int, must_be_valid: Optional[str] ) -> tuple[Optional[ShapeType], Optional[StrideType]]: assert isinstance(a, TensorLike) - from torch.fx.experimental.symbolic_shapes import guard_size_oblivious + from torch.fx.experimental.symbolic_shapes import ( + guard_or_false, + guard_or_true, + sym_and, + sym_or, + ) _validate_collapse_args(a, start, end) @@ -1404,52 +1414,69 @@ def _collapse_view_helper( if a.ndim == 0 or (end == start): return shape, strides - length = shape[end] + valid_op = True + if guard_or_false(a.numel() != 0): + for idx in range(end - 1, start - 1, -1): + valid_op = sym_and( + valid_op, + sym_or( + shape[idx] == 1, + shape[idx + 1] == 1, + strides[idx] == strides[idx + 1] * shape[idx + 1], + ), + ) # type: ignore[assignment] + + # early exit if we already know its invalid. + if guard_or_false(valid_op is False): + break + + # for unbacked this become a runtime assertion. + valid_op = sym_or(valid_op, a.numel() == 0) + + if must_be_valid: + torch._check(valid_op, lambda: must_be_valid) + else: + if not guard_or_false(valid_op): + return None, None + + # compute stride stride = strides[end] for idx in range(end - 1, start - 1, -1): - if guard_size_oblivious(shape[idx] == 0) or guard_size_oblivious( - shape[idx + 1] == 0 - ): - length = 0 - stride = 0 - break - - if guard_size_oblivious(shape[idx] == 1): - continue + if shape[idx] != 1: + # TODO with unbacked we should really exclude when shape[idx] == 1 + # something like + # min(stride[end], torch.ite(shape[x]!=1,stride[idx], inf), ...) + stride = min(stride, strides[idx]) - length = length * shape[idx] - if guard_size_oblivious(stride < strides[idx]): - stride = stride - else: - stride = strides[idx] - - if ( - guard_size_oblivious(a.numel() > 0) - and guard_size_oblivious(shape[idx + 1] != 1) - and not guard_size_oblivious( - strides[idx] == strides[idx + 1] * shape[idx + 1] - ) - ): - return None, None + # compute length + length = shape[end] + if guard_or_true(length != 0): + for idx in range(end - 1, start - 1, -1): + if guard_or_false(shape[idx] == 0): + length = 0 + stride = 0 + break + length = length * shape[idx] + else: + stride = 0 new_shape = shape[:start] + (length,) + shape[end + 1 :] new_strides = strides[:start] + (stride,) + strides[end + 1 :] # NOTE: when the input has no elements it's restrided as if it were contiguous - if guard_size_oblivious(a.numel() == 0): + # except for unbacked. + if guard_or_false(a.numel() == 0): new_strides = utils.make_contiguous_strides_for(new_shape) return new_shape, new_strides def _collapse_view_meta(a: TensorLikeType, start: int, end: int) -> TensorLikeType: - new_shape, new_strides = _collapse_view_helper(a, start, end) - - if new_shape is None: - msg = "Attempting to view a collapsed tensor, but no such view exists!" - raise ValueError(msg) - + new_shape, new_strides = _collapse_view_helper( + a, start, end, "Attempting to view a collapsed tensor, but no such view exists!" + ) assert new_strides is not None + assert new_shape is not None return a.as_strided(new_shape, new_strides, a.storage_offset()) diff --git a/torch/_refs/__init__.py b/torch/_refs/__init__.py index 783e440223796..8a418f349de6b 100644 --- a/torch/_refs/__init__.py +++ b/torch/_refs/__init__.py @@ -3132,7 +3132,10 @@ def flatten(a: TensorLikeType, start_dim: int = 0, end_dim: int = -1) -> TensorL # Tries to take a view # TODO: we could look at directing collapse_view to skip its meta function here (unsafe_collapse_view) - new_shape, _new_strides = prims._collapse_view_helper(a, start_dim, end_dim) + # Unbacked semnatics: if validty of in-place flattening is undecided we copy. + new_shape, _new_strides = prims._collapse_view_helper( + a, start_dim, end_dim, must_be_valid=None + ) if new_shape is not None: return prims.collapse_view(a, start_dim, end_dim) @@ -3840,7 +3843,9 @@ def _reshape_view_helper_core_alg( # may return a view of a copy # Checks if collapse can be a view and short-circuits to copying reshape if it can't - new_shape, _new_strides = prims._collapse_view_helper(a_, idx, end) + new_shape, _new_strides = prims._collapse_view_helper( + a_, idx, end, must_be_valid=None + ) if new_shape is None: if allow_copy: return prims.reshape(a, shape) From 5539916fe10ce4f44aab92263ef8641a39df10cf Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Tue, 9 Sep 2025 13:35:02 -0700 Subject: [PATCH 013/693] [dynamo][refactor] Move get_framelocals_idx to a helper (#162519) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162519 Approved by: https://github.com/williamwen42 --- torch/_dynamo/guards.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index be7ff5051f2d5..df683eb6e1ebb 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -234,6 +234,20 @@ ) +def get_framelocals_idx(code: types.CodeType, var_name: str) -> int: + # Refer to index in the frame's localsplus directly. + # NOTE: name order for a code object doesn't change. + # NOTE: we need to find the LAST matching index because <= 3.10 contains + # duplicate names in the case of cells: a name can be both local and cell + # and will take up 2 slots of the frame's localsplus. The correct behavior + # is to refer to the cell, which has a higher index. + framelocals_names_reversed = code_framelocals_names_reversed_cached(code) + framelocals_idx = ( + len(framelocals_names_reversed) - framelocals_names_reversed.index(var_name) - 1 + ) + return framelocals_idx + + class IndentedBufferWithPrefix(IndentedBuffer): def prefix(self) -> str: return "| " * (self._indent * self.tabwidth) @@ -1342,20 +1356,7 @@ def get_guard_manager_from_source(self, source: Source) -> GuardManager: # Use istype instead of isinstance to check for exact type of source. if istype(source, LocalSource): - # Refer to index in the frame's localsplus directly. - # NOTE: name order for a code object doesn't change. - # NOTE: we need to find the LAST matching index because <= 3.10 contains - # duplicate names in the case of cells: a name can be both local and cell - # and will take up 2 slots of the frame's localsplus. The correct behavior - # is to refer to the cell, which has a higher index. - framelocals_names_reversed = code_framelocals_names_reversed_cached( - self.f_code - ) - framelocals_idx = ( - len(framelocals_names_reversed) - - framelocals_names_reversed.index(source.local_name) - - 1 - ) + framelocals_idx = get_framelocals_idx(self.f_code, source.local_name) out = root_guard_manager.framelocals_manager( key=(source.local_name, framelocals_idx), source=source_name, From 33589374b6389ebb3854ec57188aa2bd691c71a2 Mon Sep 17 00:00:00 2001 From: Saurabh Mishra Date: Wed, 10 Sep 2025 00:43:03 +0000 Subject: [PATCH 014/693] [DCP] Avoid multiple storage writer resets in async save (#159448) Summary: Avoid multiple storage writer resets in async save. Currently the reset gets called by the async_save method and then again in the save method. In the async path, async_save should only do the staging and the reset should only happen in the synchronous save path. Test Plan: ``` buck test 'fbcode//mode/opt' //aiplatform/modelstore/experimental/DCP/tests:checkpoint_dist_client_test ``` https://www.internalfb.com/intern/testinfra/testrun/15199648841705052 Rollback Plan: Differential Revision: D79230339 Pull Request resolved: https://github.com/pytorch/pytorch/pull/159448 Approved by: https://github.com/meetv18 --- torch/distributed/checkpoint/state_dict_saver.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/torch/distributed/checkpoint/state_dict_saver.py b/torch/distributed/checkpoint/state_dict_saver.py index 9971f19db8174..05175cf400e5a 100644 --- a/torch/distributed/checkpoint/state_dict_saver.py +++ b/torch/distributed/checkpoint/state_dict_saver.py @@ -312,10 +312,6 @@ def async_save( ) ) - storage_writer = cast( - StorageWriter, _storage_setup(storage_writer, checkpoint_id, reader=False) - ) - state_dict = _stateful_to_state_dict(state_dict) @_dcp_method_logger(log_exceptions=True) From 2281d009e5efd7650481243faab116cd78281da3 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 10 Sep 2025 00:44:29 +0000 Subject: [PATCH 015/693] Revert "[ROCm] Add specific compile options for CK SDPA (#161759)" This reverts commit d22d916719eb7daff8455a01d216d65f81899a9e. Reverted https://github.com/pytorch/pytorch/pull/161759 on behalf of https://github.com/huydhn due to Sorry for reverting your change but this seems to break internal ROCm jobs ([comment](https://github.com/pytorch/pytorch/pull/161759#issuecomment-3272807726)) --- aten/src/ATen/CMakeLists.txt | 89 +------------------ .../hip/flash_attn/ck/launch_kernel_pt.hpp | 9 +- caffe2/CMakeLists.txt | 4 - third_party/composable_kernel | 2 +- 4 files changed, 10 insertions(+), 94 deletions(-) diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt index bbf79491e2d3d..6c095680733fe 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt @@ -1,7 +1,6 @@ cmake_minimum_required(VERSION 3.27 FATAL_ERROR) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) - if(NOT MSVC) string(APPEND CMAKE_CXX_FLAGS " -Wno-ignored-qualifiers") string(APPEND CMAKE_C_FLAGS " -Wno-ignored-qualifiers") @@ -196,94 +195,14 @@ if(USE_FLASH_ATTENTION) endif() endif() message(STATUS "USE_ROCM_CK_SDPA is set; building PyTorch with CK SDPA enabled") - - # CK SDPA sources require specific compilation flags - set(CK_SDPA_EXTRA_HIPCC_FLAGS - -fno-autolink - -fhip-new-launch-api - -fgnuc-version=4.2.1 - -fno-implicit-modules - -fskip-odr-check-in-gmf - -fcxx-exceptions - -fexceptions - -fcolor-diagnostics - -faddrsig - -fno-rounding-math - -mconstructor-aliases - -mllvm - -amdgpu-internalize-symbols - -fvisibility=hidden - -Wno-float-equal - -fgpu-flush-denormals-to-zero - -Wno-unused-parameter) - - #TODO: The following flags are specific to 8-bit width types which are not integrated via CK yet. - # Add once that support is integrated - #check_cxx_compiler_flag("-fno-offload-uniform-block" HAS_NO_OFFLOAD_UNIFORM_BLOCK) - #if(HAS_NO_OFFLOAD_UNIFORM_BLOCK) - # list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -fno-offload-uniform-block) - #endif() - #check_cxx_compiler_flag("-mllvm --lsr-drop-solution=1" HAS_LSR_DROP_SOLUTION) - #if(HAS_LSR_DROP_SOLUTION) - # list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -mllvm --lsr-drop-solution=1) - #endif() - #check_cxx_compiler_flag("-mllvm -enable-post-misched=0" HAS_ENABLE_POST_MISCHED) - #if(HAS_ENABLE_POST_MISCHED) - # list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -mllvm -enable-post-misched=0) - #endif() - #set(check-coerce) - #check_cxx_compiler_flag(" -mllvm -amdgpu-coerce-illegal-types=1" check-coerce) - #if(check-coerce) - # list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -mllvm -amdgpu-coerce-illegal-types=1) - #endif() - - list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -mllvm -amdgpu-early-inline-all=true) - list(APPEND CK_SDPA_EXTRA_HIPCC_FLAGS -mllvm -amdgpu-function-calls=false) - - # Additional CK compiler flags - set(CK_SDPA_EXTRA_HIPCC_OPTIONS - CK_ENABLE_BF16 - CK_ENABLE_BF8 - CK_ENABLE_FP16 - CK_ENABLE_FP32 - CK_ENABLE_FP64 - CK_ENABLE_FP8 - CK_ENABLE_INT8 - CK_USE_FNUZ_FP8 - CK_USE_GFX94 - CK_USE_XDL - __HIP_PLATFORM_AMD__=1 - __HIP_PLATFORM_HCC__=1 - CK_TILE_FMHA_FWD_FAST_EXP2=1 - CK_TILE_FMHA_FWD_SPLITKV_API=1 - CK_TILE_FMHA_FWD_APPENDKV_API=1 - CK_TILE_FMHA_FWD_PAGEDKV_API=1 - __GCC_HAVE_DWARF2_CFI_ASM=1 - USE_ROCM_CK_SDPA) - message(STATUS "Generating CK kernel instances...") add_subdirectory(native/transformers/hip/flash_attn/ck) + file(GLOB flash_attention_hip_ck_hip "native/transformers/hip/flash_attn/ck/*.hip") + list(APPEND native_transformers_hip_hip ${flash_attention_hip_ck_hip}) # FAv3 Generation add_subdirectory(native/transformers/hip/flash_attn/ck/fav_v3) - file(GLOB ck_sdpa_sources_hip - "native/transformers/hip/flash_attn/ck/*.hip" - "native/transformers/hip/flash_attn/ck/fav_v3/*.hip") - - set_source_files_properties(${ck_sdpa_sources_hip} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) - hip_add_library(ck_sdpa STATIC - ${ck_sdpa_sources_hip} - HIPCC_OPTIONS ${HIP_HCC_FLAGS} ${CK_SDPA_EXTRA_HIPCC_FLAGS}) - set_target_properties(ck_sdpa PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_compile_definitions(ck_sdpa PUBLIC ${CK_SDPA_EXTRA_HIPCC_OPTIONS}) - target_include_directories(ck_sdpa PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/include - ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/library/include - ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/composable_kernel/example/ck_tile/01_fmha - ${CMAKE_CURRENT_BINARY_DIR}/composable_kernel - ${CMAKE_CURRENT_SOURCE_DIR}/../../../third_party/aiter/csrc/include - ${CMAKE_CURRENT_SOURCE_DIR}/native/transformers/hip/flash_attn/ck - ) - + file(GLOB flash_attention_v3_hip "native/transformers/hip/flash_attn/ck/fav_v3/*.hip") + list(APPEND native_transformers_hip_hip ${flash_attention_v3_hip}) endif() file(GLOB flash_attention_hip_aot_hip "native/transformers/hip/flash_attn/aot/*.hip") file(GLOB flash_attention_src_hip_hip "native/transformers/hip/flash_attn/src/*.hip") diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/ck/launch_kernel_pt.hpp b/aten/src/ATen/native/transformers/hip/flash_attn/ck/launch_kernel_pt.hpp index f4e1ef71f5a98..400da17426f1d 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/launch_kernel_pt.hpp +++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/launch_kernel_pt.hpp @@ -8,9 +8,9 @@ namespace ck_tile { // Added by hipification to become a no-op on non supported architectures -template +template #if CK_TILE_USE_LAUNCH_BOUNDS -__launch_bounds__(Kernel::kBlockSize, MinBlockPerCu) +__launch_bounds__(MaxThreadPerBlock, MinBlockPerCu) #endif __global__ void kentry_pt(Args... args) { @@ -29,13 +29,14 @@ __launch_bounds__(Kernel::kBlockSize, MinBlockPerCu) // // the "static __device__ operator()(some_arg)" is the entry point of KernelImpl // -template CK_TILE_HOST auto make_kernel_pt(KernelImpl /*f*/, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args) { - const auto kernel = kentry_pt; + const auto kernel = kentry_pt; return [=](const stream_config& s) { kernel<<>>(args...); diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 9c75baa0bf947..4cd773bc16123 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1762,10 +1762,6 @@ if(USE_ROCM) target_link_libraries(torch_hip PUBLIC torch_cpu_library ${Caffe2_PUBLIC_HIP_DEPENDENCY_LIBS}) target_link_libraries(torch_hip PRIVATE ${Caffe2_HIP_DEPENDENCY_LIBS}) - if(USE_ROCM_CK_SDPA) - target_link_libraries(torch_hip PRIVATE ck_sdpa) - endif() - if(USE_FBGEMM_GENAI) if(USE_ROCM) target_link_libraries(torch_hip PRIVATE fbgemm_genai) diff --git a/third_party/composable_kernel b/third_party/composable_kernel index de61e55493826..7fe50dc3da206 160000 --- a/third_party/composable_kernel +++ b/third_party/composable_kernel @@ -1 +1 @@ -Subproject commit de61e554938265a5d17a1bba8c148457125e80cd +Subproject commit 7fe50dc3da2069d6645d9deb8c017a876472a977 From e60ad4f628725ebd7c74af0202edebf631a6e10b Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Mon, 8 Sep 2025 23:11:59 -0700 Subject: [PATCH 016/693] [DTensor] fix copy_ strategy to support linearity (#162460) Fixing issue introduced in https://github.com/pytorch/pytorch/pull/158538 where `aten.copy_.default` is registered as a pointwise op, but without linearity. In particular, when both `src` and `dst` tensors have same `Partial` placements, direct copy should happen without redistribute, instead of redistributing both to `Replicate` before making the copy. This was discovered from silent incorrect results e.g. on `torch.einsum` backward. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162460 Approved by: https://github.com/zpcore --- test/distributed/tensor/test_tensor_ops.py | 13 +++++++++++++ torch/distributed/tensor/_ops/_pointwise_ops.py | 7 ++++--- torch/distributed/tensor/_ops/_tensor_ops.py | 17 ----------------- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/test/distributed/tensor/test_tensor_ops.py b/test/distributed/tensor/test_tensor_ops.py index 0e75748be8a31..b41e8f53b1369 100644 --- a/test/distributed/tensor/test_tensor_ops.py +++ b/test/distributed/tensor/test_tensor_ops.py @@ -93,6 +93,19 @@ def test_copy_(self): dst_tensor.copy_(src_tensor) self.assertEqual(dst_dtensor.full_tensor(), dst_tensor) + # as a pointwise op, need to keep Partial placements without redistribute + src_tensor = torch.randn((64, 1)) + dst_tensor = torch.zeros(16, 32, 64, 128) + src_specs = [[Partial()]] + dst_specs = [[Partial()]] + for dst_spec, src_spec in zip(dst_specs, src_specs): + src_dtensor = DTensor.from_local(src_tensor, device_mesh, src_spec) + dst_dtensor = DTensor.from_local(dst_tensor, device_mesh, dst_spec) + dst_dtensor.copy_(src_dtensor) + dst_tensor.copy_(src_tensor) + self.assertEqual(dst_dtensor.placements, (Partial(),)) + self.assertEqual(dst_dtensor._local_tensor, dst_tensor) + @with_comms def test_contiguous(self): device_mesh = self.build_device_mesh() diff --git a/torch/distributed/tensor/_ops/_pointwise_ops.py b/torch/distributed/tensor/_ops/_pointwise_ops.py index 46fc8fbc0d990..42964ff748972 100644 --- a/torch/distributed/tensor/_ops/_pointwise_ops.py +++ b/torch/distributed/tensor/_ops/_pointwise_ops.py @@ -421,6 +421,7 @@ aten.mul_.Scalar: 0, aten.mul.Tensor: 2, aten.mul_.Tensor: 2, + aten.copy_.default: 1, } @@ -748,9 +749,9 @@ def args_tuple_strategies( args_schema, child_strtgy, linearity, - scalar_tensor_idx=_FUSED_OP_SCALAR_IDX - if op_schema.op in fused_ops - else None, + scalar_tensor_idx=( + _FUSED_OP_SCALAR_IDX if op_schema.op in fused_ops else None + ), ) list_strategy.append(pointwise_strategy) return TupleStrategy(list_strategy) diff --git a/torch/distributed/tensor/_ops/_tensor_ops.py b/torch/distributed/tensor/_ops/_tensor_ops.py index a5a037a3c73e6..0e62b817477c7 100644 --- a/torch/distributed/tensor/_ops/_tensor_ops.py +++ b/torch/distributed/tensor/_ops/_tensor_ops.py @@ -35,8 +35,6 @@ Shard, ) -from ._pointwise_ops import pointwise_strategy - aten = torch.ops.aten @@ -93,21 +91,6 @@ def propagate_single_input_strategy(op_schema: OpSchema) -> StrategyType: aten._to_copy.default, schema_info=RuntimeSchemaInfo(static_kwargkey=["dtype"]) )(propagate_single_input_strategy) -# copy_ is actually a pointwise op with broadcasting, so reuse the pointwise strategy, which takes care of these -# requirements. -# -# Following torch broadcasting semantics (https://docs.pytorch.org/docs/stable/notes/broadcasting.html) -# - self can not change shape as a result of broadcasting since this is an inplace op -# - src can broadcast, but when it does it always does so from the trailing end -# e.g. the last dim of 'src' must match up with the last dim of 'self' -# -# DTensor semantics for inplace ops also dictates that we may NOT redistribute our 'self' input. -# In practice, what this means is -# - our output strategies should map 1:1 to our 'self' input strategies -# - our 'src' input may be redistributed to match up with the 'self' input, with the caveat of adjusting for -# broadcasting dim -register_op_strategy(aten.copy_.default)(pointwise_strategy) - @register_op_strategy( [ From 878f59ef757d44c2285f9a952e2426118bd5c205 Mon Sep 17 00:00:00 2001 From: Tristan Rice Date: Wed, 10 Sep 2025 01:18:24 +0000 Subject: [PATCH 017/693] DeviceMesh: support _rank for use with non-global PGs (#162439) Summary: This adds a `_rank` field to DeviceMesh init that allows for instantiating a DeviceMesh without depending on `dist.get_rank()` which requires a global PG to be instantiated. Test Plan: ``` buck2 test mode/opt -c fbcode.enable_gpu_sections=true //caffe2/test/distributed:device_mesh -- init_backend ``` Rollback Plan: Differential Revision: D81981777 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162439 Approved by: https://github.com/kwen2501, https://github.com/fduwjj --- test/distributed/test_device_mesh.py | 6 ++++-- torch/distributed/device_mesh.py | 9 ++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/test/distributed/test_device_mesh.py b/test/distributed/test_device_mesh.py index 5672171d0be4d..693a63bd5d4ed 100644 --- a/test/distributed/test_device_mesh.py +++ b/test/distributed/test_device_mesh.py @@ -246,14 +246,16 @@ def test_device_mesh_2d(self): @with_comms def test_device_mesh_init_backend(self): - mesh = DeviceMesh(self.device_type, [1], _init_backend=False) + mesh = DeviceMesh( + self.device_type, torch.arange(10), _init_backend=False, _rank=5 + ) with self.assertRaisesRegex(RuntimeError, "process groups not initialized!"): mesh.get_group() # coordinates should always been populated when init_backend is False, as whenever # we call init_backend we should make sure the default pg already created - mesh.get_coordinate() + self.assertEqual(mesh.get_coordinate(), [5]) def test_fake_pg_device_mesh(self): fake_store = FakeStore() diff --git a/torch/distributed/device_mesh.py b/torch/distributed/device_mesh.py index 904d1f84100cc..6564acc303812 100644 --- a/torch/distributed/device_mesh.py +++ b/torch/distributed/device_mesh.py @@ -396,6 +396,9 @@ class DeviceMesh: device_type (str): The device type of the mesh. Currently supports: "cpu", "cuda/cuda-like". mesh (ndarray): A multi-dimensional array or an integer tensor describing the layout of devices, where the IDs are global IDs of the default process group. + _rank (int): (experimental/internal) + The global rank of the current process. If not provided, it will + be inferred from the default process group. Returns: DeviceMesh: A :class:`DeviceMesh` object representing the device layout. @@ -430,6 +433,7 @@ def __init__( tuple[tuple[Optional[str], Optional[C10dBackend.Options]], ...] ] = None, _init_backend: bool = True, + _rank: Optional[int] = None, ) -> None: self.device_type = device_type if isinstance(mesh, torch.Tensor) and mesh.device.type != "cpu": @@ -460,8 +464,11 @@ def __init__( if is_initialized() and get_backend() == "threaded": self._thread_id = threading.get_ident() + if _rank is None: + _rank = get_rank() + # calculate the coordinates of the current global rank on the mesh - rank_coords = (self.mesh == get_rank()).nonzero() + rank_coords = (self.mesh == _rank).nonzero() assert rank_coords.size(0) in (0, 1) self._coordinate_on_dim: Optional[list[int]] = ( rank_coords[0].tolist() if rank_coords.size(0) > 0 else None From c66e58b7d0d8744e3042fca62131c7edabe13474 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Wed, 10 Sep 2025 01:40:20 +0000 Subject: [PATCH 018/693] [ONNX] Expose the testing module (#162495) * Created a new module `torch/onnx/testing.py` that exposes the `assert_onnx_program` function for testing exported ONNX models. * Updated the ONNX documentation (`docs/source/onnx.md`) to include `onnx_testing` in the list of relevant modules. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162495 Approved by: https://github.com/titaiwangms, https://github.com/xadupre --- docs/source/onnx.md | 1 + docs/source/onnx_testing.md | 9 +++++++++ torch/onnx/testing.py | 8 ++++++++ 3 files changed, 18 insertions(+) create mode 100644 docs/source/onnx_testing.md create mode 100644 torch/onnx/testing.py diff --git a/docs/source/onnx.md b/docs/source/onnx.md index 73a24b671553c..8310b2aa71302 100644 --- a/docs/source/onnx.md +++ b/docs/source/onnx.md @@ -102,6 +102,7 @@ also be interested in reading our [development wiki](https://github.com/pytorch/ onnx_export onnx_ops onnx_verification + onnx_testing ``` ### Deprecated APIs diff --git a/docs/source/onnx_testing.md b/docs/source/onnx_testing.md new file mode 100644 index 0000000000000..d8da35ff08922 --- /dev/null +++ b/docs/source/onnx_testing.md @@ -0,0 +1,9 @@ +# torch.onnx.testing + +```{eval-rst} +.. automodule:: torch.onnx.testing +``` + +```{eval-rst} +.. autofunction:: torch.onnx.testing.assert_onnx_program +``` diff --git a/torch/onnx/testing.py b/torch/onnx/testing.py new file mode 100644 index 0000000000000..aa168b32746f5 --- /dev/null +++ b/torch/onnx/testing.py @@ -0,0 +1,8 @@ +"""Utilities to aid in testing exported ONNX models.""" + +__all__ = ["assert_onnx_program"] + +from torch.onnx._internal.exporter._testing import assert_onnx_program + + +assert_onnx_program.__module__ = "torch.onnx.testing" From dc4f97e9c18959c4328fd597311d0ae0d9e9461f Mon Sep 17 00:00:00 2001 From: Yu Guo Date: Wed, 10 Sep 2025 01:53:26 +0000 Subject: [PATCH 019/693] [triton] enable int64 indexing in convolution and mm template (#162506) Summary: hitting illegal memory access issue when compiling conv and addmm kernels with the change in https://github.com/pytorch/pytorch/pull/157767 Differential Revision: D81995664 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162506 Approved by: https://github.com/iseeyuan --- torch/_inductor/kernel/bmm.py | 6 +++--- torch/_inductor/kernel/conv.py | 12 ++++++------ torch/_inductor/kernel/mm.py | 8 ++++---- torch/_inductor/kernel/mm_grouped.py | 2 +- torch/_inductor/kernel/mm_plus_mm.py | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/torch/_inductor/kernel/bmm.py b/torch/_inductor/kernel/bmm.py index e882be6df0df8..6c468e7da0280 100644 --- a/torch/_inductor/kernel/bmm.py +++ b/torch/_inductor/kernel/bmm.py @@ -56,7 +56,7 @@ def bmm_grid(b, m, n, meta, *, cdiv): stride_bn = {{stride("B", 2)}} # based on triton.ops.matmul - pid = tl.program_id(0) + pid = tl.program_id(0).to(INDEX_DTYPE) grid_m = (M + BLOCK_M - 1) // BLOCK_M grid_n = (N + BLOCK_N - 1) // BLOCK_N @@ -82,7 +82,7 @@ def bmm_grid(b, m, n, meta, *, cdiv): rk = tl.arange(0, BLOCK_K) - idx_q = tl.program_id(1) # batch dimension for BMM + idx_q = tl.program_id(1).to(INDEX_DTYPE) # batch dimension for BMM A = A + (ram[:, None] * stride_am + rk[None, :] * stride_ak + idx_q*stride_aq) B = B + (rk[:, None] * stride_bk + rbn[None, :] * stride_bn + idx_q*stride_bq) @@ -101,7 +101,7 @@ def bmm_grid(b, m, n, meta, *, cdiv): # rematerialize rm and rn to save registers rm = pid_m * BLOCK_M + tl.arange(0, BLOCK_M) rn = pid_n * BLOCK_N + tl.arange(0, BLOCK_N) - idx_q = tl.program_id(1) # batch dimension for BMM + idx_q = tl.program_id(1).to(INDEX_DTYPE) # batch dimension for BMM idx_m = rm[:, None] idx_n = rn[None, :] mask = (idx_m < M) & (idx_n < N) diff --git a/torch/_inductor/kernel/conv.py b/torch/_inductor/kernel/conv.py index 6b9e9a1a32e7f..c929299cc7951 100644 --- a/torch/_inductor/kernel/conv.py +++ b/torch/_inductor/kernel/conv.py @@ -117,19 +117,19 @@ def conv3d_grid(n, c, d, h, w, meta, *, cdiv): stride_wh = {{stride("W", 2)}} stride_ww = {{stride("W", 3)}} - nhw = tl.program_id(0) * BLOCK_M + tl.arange(0, BLOCK_M) + nhw = tl.program_id(0).to(INDEX_DTYPE) * BLOCK_M + tl.arange(0, BLOCK_M) idx_y_w = nhw % OUT_W nh = nhw // OUT_W idx_y_h = nh % OUT_H idx_n = nh // OUT_H - idx_y_c = tl.program_id(1) * BLOCK_N + tl.arange(0, BLOCK_N) + idx_y_c = tl.program_id(1).to(INDEX_DTYPE) * BLOCK_N + tl.arange(0, BLOCK_N) {% if GROUPS == 1 %} group = 0 GROUP_IN_C = IN_C GROUP_OUT_C = OUT_C {% else %} - group = tl.program_id(2) + group = tl.program_id(2).to(INDEX_DTYPE) GROUP_IN_C = IN_C // GROUPS GROUP_OUT_C = OUT_C // GROUPS {% endif %} @@ -245,21 +245,21 @@ def conv3d_grid(n, c, d, h, w, meta, *, cdiv): stride_wh = {{stride("W", 3)}} stride_ww = {{stride("W", 4)}} - ndhw = tl.program_id(0) * BLOCK_M + tl.arange(0, BLOCK_M) + ndhw = tl.program_id(0).to(INDEX_DTYPE) * BLOCK_M + tl.arange(0, BLOCK_M) idx_y_w = ndhw % OUT_W ndh = ndhw // OUT_W idx_y_h = ndh % OUT_H nd = ndh // OUT_H idx_y_d = nd % OUT_D idx_n = nd // OUT_D - idx_y_c = tl.program_id(1) * BLOCK_N + tl.arange(0, BLOCK_N) + idx_y_c = tl.program_id(1).to(INDEX_DTYPE) * BLOCK_N + tl.arange(0, BLOCK_N) {% if GROUPS == 1 %} group = 0 GROUP_IN_C = IN_C GROUP_OUT_C = OUT_C {% else %} - group = tl.program_id(2) + group = tl.program_id(2).to(INDEX_DTYPE) GROUP_IN_C = IN_C // GROUPS GROUP_OUT_C = OUT_C // GROUPS {% endif %} diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 155c461775cbc..784744dba9917 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -77,7 +77,7 @@ stride_bn = {{stride("B", 1)}} # based on triton.ops.matmul - pid = tl.program_id(0) + pid = tl.program_id(0).to(INDEX_DTYPE) grid_m = (M + BLOCK_M - 1) // BLOCK_M grid_n = (N + BLOCK_N - 1) // BLOCK_N @@ -153,7 +153,7 @@ stride_bn = {{stride("B", 1)}} # based on triton.ops.matmul - pid = tl.program_id(0) + pid = tl.program_id(0).to(INDEX_DTYPE) grid_m = (M + BLOCK_M - 1) // BLOCK_M grid_n = (N + BLOCK_N - 1) // BLOCK_N @@ -227,7 +227,7 @@ # early exit due to zero-size input(s) return - start_pid = tl.program_id(0) + start_pid = tl.program_id(0).to(INDEX_DTYPE) grid_m = tl.cdiv(M, BLOCK_M) grid_n = tl.cdiv(N, BLOCK_N) k_tiles = tl.cdiv(K, BLOCK_K) @@ -419,7 +419,7 @@ def apply_scaling( stride_a_scale_m = 0 stride_b_scale_n = 0 - start_pid = tl.program_id(axis=0) + start_pid = tl.program_id(axis=0).to(INDEX_DTYPE) num_pid_m = tl.cdiv(M, BLOCK_M) num_pid_n = tl.cdiv(N, BLOCK_N) k_tiles = tl.cdiv(K, BLOCK_K) diff --git a/torch/_inductor/kernel/mm_grouped.py b/torch/_inductor/kernel/mm_grouped.py index 3424585e1214c..6508146fa49af 100644 --- a/torch/_inductor/kernel/mm_grouped.py +++ b/torch/_inductor/kernel/mm_grouped.py @@ -135,7 +135,7 @@ def early_config_prune(g, m, configs, named_args): {{def_kernel("a_ptr", "b_ptr")}} {%- endif %} {%- endif %} - tidx = tl.program_id(0) + tidx = tl.program_id(0).to(INDEX_DTYPE) {%- set M_IS_VARYING = A_IS_2D and not B_IS_2D %} {%- set N_IS_VARYING = not A_IS_2D and B_IS_2D %} diff --git a/torch/_inductor/kernel/mm_plus_mm.py b/torch/_inductor/kernel/mm_plus_mm.py index 2133931815949..a7497b6d684af 100644 --- a/torch/_inductor/kernel/mm_plus_mm.py +++ b/torch/_inductor/kernel/mm_plus_mm.py @@ -51,7 +51,7 @@ stride_dn = {{stride("D", 1)}} # based on triton.ops.matmul - pid = tl.program_id(0) + pid = tl.program_id(0).to(INDEX_DTYPE) grid_m = (M + BLOCK_M - 1) // BLOCK_M grid_n = (N + BLOCK_N - 1) // BLOCK_N From 760c478a14dedd224258cc57713d44d4bc90cb55 Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Wed, 10 Sep 2025 02:03:45 +0000 Subject: [PATCH 020/693] [FlexAttn][Minor] Update FlexConfig doc (#162533) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162533 Approved by: https://github.com/drisspg --- torch/_inductor/template_heuristics/triton.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch/_inductor/template_heuristics/triton.py b/torch/_inductor/template_heuristics/triton.py index 0aaf70ae3f24d..f2756a5ee515a 100644 --- a/torch/_inductor/template_heuristics/triton.py +++ b/torch/_inductor/template_heuristics/triton.py @@ -74,7 +74,8 @@ class GemmConfig(BaseConfig): class FlexConfig: """ Base Config class for flex attention - - FlexAttn forward, backward and flex decode will use this + - FlexAttn forward and backward will use this. For flex decoding, + please use FlexDecodingConfig. NOTE: For flex_attn bwd block_m and block_n are reused for block_m1, block_m2, block_n1, block_n2 From 484c4093a87a3e6767e55ed553f95db8fc137442 Mon Sep 17 00:00:00 2001 From: angelayi Date: Tue, 9 Sep 2025 09:37:04 -0700 Subject: [PATCH 021/693] test fixing benchmarks (#162503) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162503 Approved by: https://github.com/huydhn ghstack dependencies: #160741 --- .ci/pytorch/macos-test.sh | 4 ++-- benchmarks/dynamo/common.py | 10 ++++++++-- .../inductor/aoti_package/model_package_loader.cpp | 11 +++++++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh index 79d47da431712..1ed5b038fd991 100755 --- a/.ci/pytorch/macos-test.sh +++ b/.ci/pytorch/macos-test.sh @@ -393,10 +393,10 @@ elif [[ $TEST_CONFIG == *"perf_hf"* ]]; then test_hf_perf elif [[ $TEST_CONFIG == *"perf_timm"* ]]; then test_timm_perf -elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then - test_torchbench_smoketest "${SHARD_NUMBER}" elif [[ $TEST_CONFIG == *"aot_inductor_perf_smoketest"* ]]; then test_aoti_torchbench_smoketest "${SHARD_NUMBER}" +elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then + test_torchbench_smoketest "${SHARD_NUMBER}" elif [[ $TEST_CONFIG == *"mps"* ]]; then test_python_mps elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py index 2901009f7c4d1..83d55682247e7 100644 --- a/benchmarks/dynamo/common.py +++ b/benchmarks/dynamo/common.py @@ -1424,7 +1424,7 @@ def load(cls, model, example_inputs, mode): torch.hpu.max_memory_allocated() - pre_clone_memory_used ) / 1e9 - inductor_configs = {} + inductor_configs = {"aot_inductor.package_constants_in_so": False} if mode == "max-autotune": inductor_configs["max_autotune"] = True ep = torch.export.export( @@ -1439,8 +1439,14 @@ def load(cls, model, example_inputs, mode): ep, inductor_configs=inductor_configs ) # type: ignore[arg-type] + compiled = torch._inductor.aoti_load_package(package_path) + compiled.load_constants( + {**ep.state_dict, **ep.constants}, + check_full_update=False, + user_managed=True, + ) cls.cache[key] = ( - torch._inductor.aoti_load_package(package_path), + compiled, clone_memory_used, ) diff --git a/torch/csrc/inductor/aoti_package/model_package_loader.cpp b/torch/csrc/inductor/aoti_package/model_package_loader.cpp index aa8ef905d57aa..1fae20572b923 100644 --- a/torch/csrc/inductor/aoti_package/model_package_loader.cpp +++ b/torch/csrc/inductor/aoti_package/model_package_loader.cpp @@ -721,8 +721,15 @@ void AOTIModelPackageLoader::load_constants( for (const auto& it : constants_map) { if (fqn_to_constant_name.find(it.first) != fqn_to_constant_name.end()) { updated_constants_map.emplace(fqn_to_constant_name[it.first], it.second); - } else { - throw std::runtime_error("Constant not found: " + it.first); + } else if (check_full_update) { + std::string constant_fqns = ""; + for (const auto& it2 : fqn_to_constant_name) { + constant_fqns += it2.first + ", "; + } + throw std::runtime_error( + "The constant with FQN " + it.first + + " was not found in the model. The available constants are: " + + constant_fqns); } } From 00985970e312c3c5e674e8e14d39fe77c226600e Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 10 Sep 2025 03:56:22 +0000 Subject: [PATCH 022/693] Put torchao (0.13.0) back to benchmark workflow (#162227) 0.13.0 was released on Sep 3rd https://pypi.org/project/torchao/#history, which should have fixed the crashing issue on transformers now Pull Request resolved: https://github.com/pytorch/pytorch/pull/162227 Approved by: https://github.com/malfet --- .ci/docker/common/install_inductor_benchmark_deps.sh | 2 +- .ci/pytorch/macos-test.sh | 3 --- .github/ci_commit_pins/torchao.txt | 2 +- .github/workflows/inductor-perf-test-nightly-h100.yml | 3 --- .github/workflows/inductor-periodic.yml | 4 ++++ .github/workflows/inductor.yml | 2 ++ 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.ci/docker/common/install_inductor_benchmark_deps.sh b/.ci/docker/common/install_inductor_benchmark_deps.sh index 81467d87f5140..19159b50ce37d 100644 --- a/.ci/docker/common/install_inductor_benchmark_deps.sh +++ b/.ci/docker/common/install_inductor_benchmark_deps.sh @@ -43,4 +43,4 @@ install_huggingface install_timm # Clean up -conda_run pip uninstall -y torch torchvision torchaudio triton torchao +conda_run pip uninstall -y torch torchvision torchaudio triton diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh index 1ed5b038fd991..42a536a555bd6 100755 --- a/.ci/pytorch/macos-test.sh +++ b/.ci/pytorch/macos-test.sh @@ -181,9 +181,6 @@ checkout_install_torchbench() { popd pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt - # https://github.com/pytorch/pytorch/issues/160689 to remove torchao because - # its current version 0.12.0 doesn't work with transformers 4.54.0 - pip uninstall -y torchao echo "Print all dependencies after TorchBench is installed" python -mpip freeze diff --git a/.github/ci_commit_pins/torchao.txt b/.github/ci_commit_pins/torchao.txt index d12c20e6a117f..c40e3a82e615f 100644 --- a/.github/ci_commit_pins/torchao.txt +++ b/.github/ci_commit_pins/torchao.txt @@ -1 +1 @@ -51c87b6ead6b7e098ada95d6a7609ee873b854cf +f32431e593d0e9db86c502d3872dd67ee40a005f diff --git a/.github/workflows/inductor-perf-test-nightly-h100.yml b/.github/workflows/inductor-perf-test-nightly-h100.yml index 41210f89c9a89..7e363df9f8a86 100644 --- a/.github/workflows/inductor-perf-test-nightly-h100.yml +++ b/.github/workflows/inductor-perf-test-nightly-h100.yml @@ -137,7 +137,6 @@ jobs: docker-image: ${{ needs.build.outputs.docker-image }} test-matrix: ${{ needs.build.outputs.test-matrix }} timeout-minutes: 720 - # disable monitor in perf tests, next step is to enable it disable-monitor: false monitor-log-interval: 15 monitor-data-collect-interval: 4 @@ -154,7 +153,6 @@ jobs: docker-image: ${{ needs.build.outputs.docker-image }} test-matrix: ${{ needs.build.outputs.test-matrix }} timeout-minutes: 1440 - # disable monitor in perf tests, next step is to enable it disable-monitor: false monitor-log-interval: 15 monitor-data-collect-interval: 4 @@ -173,7 +171,6 @@ jobs: docker-image: ${{ needs.build.outputs.docker-image }} test-matrix: ${{ needs.build.outputs.test-matrix }} timeout-minutes: 720 - # disable monitor in perf tests for more investigation disable-monitor: false monitor-log-interval: 15 monitor-data-collect-interval: 4 diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml index 21d965eaeaada..a5b05d0d358c2 100644 --- a/.github/workflows/inductor-periodic.yml +++ b/.github/workflows/inductor-periodic.yml @@ -36,6 +36,8 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix with: + # More memory is needed to build torchao + runner: linux.2xlarge.memory runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86 docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks @@ -128,6 +130,8 @@ jobs: needs: - get-default-label-prefix with: + # More memory is needed to build torchao + runner: linux.2xlarge.memory runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80 docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 4189d24a7b14f..a1367991e6c6d 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -49,6 +49,8 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: + # More memory is needed to build torchao + runner: linux.2xlarge.memory build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86 docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks cuda-arch-list: '8.6' From e64965300aaa6f23e26fa8a1936362b9da8c61c8 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 10 Sep 2025 04:02:34 +0000 Subject: [PATCH 023/693] Repackage vLLM nightlies (#162371) I suspected that I would need to repack vLLM wheels from https://github.com/pytorch/pytorch/pull/162000 because I renamed the wheel, and it turns out to be true. The error is as follows: ``` $ uv pip install --pre xformers --index-url https://download.pytorch.org/whl/nightly/cu129 Using Python 3.12.11+meta environment at: venv/py3.12 Resolved 28 packages in 759ms error: Failed to install: xformers-0.0.33.dev20250901+cu129-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (xformers==0.0.33.dev20250901+cu129) Caused by: Wheel version does not match filename: 0.0.33+5d4b92a5.d20250907 != 0.0.33.dev20250901+cu129 ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162371 Approved by: https://github.com/atalman --- .github/scripts/prepare_vllm_wheels.sh | 91 ++++++++++++++++++++++++++ .github/workflows/build-vllm-wheel.yml | 61 +++++------------ 2 files changed, 107 insertions(+), 45 deletions(-) create mode 100755 .github/scripts/prepare_vllm_wheels.sh diff --git a/.github/scripts/prepare_vllm_wheels.sh b/.github/scripts/prepare_vllm_wheels.sh new file mode 100755 index 0000000000000..a1cd387ef4a6f --- /dev/null +++ b/.github/scripts/prepare_vllm_wheels.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +set -eux + +torch_version=$(unzip -p torch-* '**/METADATA' | grep '^Version: ' | cut -d' ' -f2) +nightly=$(echo ${torch_version} | cut -d'.' -f4) + +# Copied from .ci/manywheel/build_common.sh +make_wheel_record() { + fpath=$1 + if echo $fpath | grep RECORD >/dev/null 2>&1; then + echo "$fpath,," + else + fhash=$(openssl dgst -sha256 -binary $fpath | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g') + fsize=$(ls -nl $fpath | awk '{print $5}') + echo "$fpath,sha256=$fhash,$fsize" + fi +} + +change_wheel_version() { + local package=$1 + local wheel=$2 + local f_version=$3 + local t_version=$4 + + # Extract the wheel + ${PYTHON_EXECUTABLE} -mwheel unpack $wheel + + mv "${package}-${f_version}" "${package}-${t_version}" + # Change the version from f_version to t_version in the dist-info dir + pushd "${package}-${t_version}" + mv "${package}-${f_version}.dist-info" "${package}-${t_version}.dist-info" + + pushd "${package}-${t_version}.dist-info" + sed -i "s/${package}-${f_version}.dist-info/${package}-${t_version}.dist-info/g" RECORD + + # Update the version in METADATA and its SHA256 hash + sed -i "s/Version: ${f_version}/Version: ${t_version}/g" METADATA + # then add PyTorch nightly dependency of vLLM + if [[ "${package}" == vllm ]] || [[ "${package}" == xformers ]]; then + sed -i "/License-File/a\Requires-Dist: torch==${torch_version}" METADATA + fi + sed -i '/METADATA,sha256/d' RECORD + popd + + make_wheel_record "${package}-${t_version}.dist-info/METADATA" >> "${package}-${t_version}.dist-info/RECORD" + popd + + # Repack the wheel + ${PYTHON_EXECUTABLE} -mwheel pack "${package}-${t_version}" + + # Clean up + rm -rf "${package}-${t_version}" +} + +repackage_wheel() { + local package=$1 + pushd $package + + local orig_wheel=$(find . -name *${package//-/_}*) + local orig_version=$(unzip -p $orig_wheel '**/METADATA' | grep '^Version: ' | cut -d' ' -f2) + + local version="" + if [[ "${package}" == vllm ]]; then + # Copied from vllm/.buildkite/scripts/upload-wheels.sh + version=1.0.0 + else + version=$(echo $orig_version | tr '.+' '.' | cut -d'.' -f1-3) + fi + local nightly_version=$version.$nightly + + # Use nightly version + change_wheel_version ${package//-/_} $orig_wheel $orig_version $nightly_version + # Clean up + rm "${orig_wheel}" + + auditwheel repair --plat $PLATFORM *.whl \ + --exclude libc10* --exclude libtorch* --exclude libcu* --exclude libnv* + local repair_wheel=$(find wheelhouse -name *${PLATFORM}*) + local repair_wheel=$(basename ${repair_wheel}) + popd + + cp ${package}/wheelhouse/${repair_wheel} . + rm -rf $package +} + +pushd externals/vllm/wheels +for package in xformers flashinfer-python vllm; do + repackage_wheel $package +done +popd diff --git a/.github/workflows/build-vllm-wheel.yml b/.github/workflows/build-vllm-wheel.yml index 658e02ede6fbd..1c3b1cce46038 100644 --- a/.github/workflows/build-vllm-wheel.yml +++ b/.github/workflows/build-vllm-wheel.yml @@ -59,20 +59,6 @@ jobs: run: | set -eux - # Keep PyTorch nightly wheel here so that we can install it later during - # vLLM build process - mkdir -p "${RUNNER_TEMP}/artifacts/" - - container_name=$(docker run \ - --tty \ - --detach \ - -e PLATFORM \ - -v "${GITHUB_WORKSPACE}:/pytorch" \ - -v "${RUNNER_TEMP}/artifacts:/artifacts" \ - -w /artifacts/ \ - "${MANYLINUX_IMAGE}" - ) - # Determine python executable for given version (copied from build-triton-wheel) case $PY_VERS in 3.10) @@ -102,6 +88,21 @@ jobs: ;; esac + # Keep PyTorch nightly wheel here so that we can install it later during + # vLLM build process + mkdir -p "${RUNNER_TEMP}/artifacts/" + + container_name=$(docker run \ + --tty \ + --detach \ + -e PLATFORM \ + -e PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" \ + -v "${GITHUB_WORKSPACE}:/pytorch" \ + -v "${RUNNER_TEMP}/artifacts:/artifacts" \ + -w /artifacts/ \ + "${MANYLINUX_IMAGE}" + ) + docker exec -t "${container_name}" "${PYTHON_EXECUTABLE}" -mpip install \ --pre torch torchvision torchaudio \ --index-url "https://download.pytorch.org/whl/nightly/${BUILD_DEVICE}" @@ -113,7 +114,6 @@ jobs: --index-url "https://download.pytorch.org/whl/nightly/${BUILD_DEVICE}" # Save this for later - echo "PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}" >> "$GITHUB_ENV" echo "container_name=${container_name}" >> "$GITHUB_ENV" - name: Build vLLM wheel @@ -131,36 +131,7 @@ jobs: set -eux # Get these wheels ready, the vllm renaming logic is copied from its .buildkite/scripts/upload-wheels.sh - docker exec -t "${container_name}" bash -c " - set -eux - - nightly=\$(unzip -p torch-* '**/METADATA' | grep '^Version: ' | cut -d' ' -f2 | cut -d'.' -f4) - - pushd externals/vllm/wheels - for package in xformers flashinfer-python vllm; do - pushd \$package - auditwheel repair --plat \$PLATFORM *.whl \ - --exclude libc10* --exclude libtorch* --exclude libcu* --exclude libnv* - repair_wheel=\$(find wheelhouse -name *\${PLATFORM}*) - repair_wheel=\$(basename \${repair_wheel}) - popd - - cp \${package}/wheelhouse/\${repair_wheel} . - version=\$(unzip -p \$repair_wheel '**/METADATA' | grep '^Version: ' | cut -d' ' -f2) - - if [[ \$package == vllm ]]; then - new_wheel=\${repair_wheel/\$version/1.0.0.\$nightly} - else - major_version=\$(echo \$version | tr '.+' '.' | cut -d'.' -f1-3) - new_wheel=\${repair_wheel/\$version/\$major_version.\$nightly} - fi - - mv -- \$repair_wheel \$new_wheel - rm -rf \$package - done - popd - " - + docker exec -t "${container_name}" bash -c /pytorch/.github/scripts/prepare_vllm_wheels.sh docker exec -t "${container_name}" chown -R 1000:1000 /artifacts - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 From 5f40a8a9a3977d6af57caafa5e94f7766489f534 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 10 Sep 2025 04:21:38 +0000 Subject: [PATCH 024/693] [BE] Fix `'_WIN32' is not defined` warning (#162516) Summary: As indeed it is not defined neither on Linux nor on MacOS platforms Test Plan: CI Rollback Plan: Differential Revision: D82044853 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162516 Approved by: https://github.com/Skylion007 --- c10/cuda/CUDAFunctions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c10/cuda/CUDAFunctions.cpp b/c10/cuda/CUDAFunctions.cpp index 9839e4e72049e..422652bb021b1 100644 --- a/c10/cuda/CUDAFunctions.cpp +++ b/c10/cuda/CUDAFunctions.cpp @@ -78,7 +78,7 @@ int device_count_impl(bool fail_if_no_driver) { "would like to use GPUs, turn off ASAN."); break; #endif // C10_ASAN_ENABLED -#if _WIN32 && CUDA_VERSION >= 13000 +#if defined(_WIN32) && CUDA_VERSION >= 13000 // Workaround for CUDA-13.0 error handling on Windows, see // https://github.com/pytorch/pytorch/issues/162333#issuecomment-3267929585 case cudaErrorNotSupported: From 11acfed3ced1d4865de47a25e4577229be4d622b Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Wed, 10 Sep 2025 04:24:36 +0000 Subject: [PATCH 025/693] [audio hash update] update the pinned audio hash (#162552) This PR is auto-generated nightly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/nightly.yml). Update the pinned audio hash. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162552 Approved by: https://github.com/pytorchbot --- .github/ci_commit_pins/audio.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ci_commit_pins/audio.txt b/.github/ci_commit_pins/audio.txt index b0255e764c594..55fc09b9c034f 100644 --- a/.github/ci_commit_pins/audio.txt +++ b/.github/ci_commit_pins/audio.txt @@ -1 +1 @@ -27fc2493d383354a008106f22f3be232badee9a1 +fa5142928ee157aa65137c4ecff2fe9b1a9e0648 From dda071587f0522a16b237f92cbe27fd13a1a1c11 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Wed, 10 Sep 2025 00:24:16 -0400 Subject: [PATCH 026/693] Revert "Make distributed modules importable even when backend not built (#159889)" (#162568) This reverts commit a0d026688cd69583d5a4e0c6f3e5fda141a7f4a9. Revert "Always build USE_DISTRIBUTED. (#160449)" This reverts commit d80297a6846f1f2c36fd4f19e22919f2abe8fcea. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162568 Approved by: https://github.com/huydhn --- .ci/pytorch/macos-build.sh | 7 +- .ci/pytorch/macos-test.sh | 4 - .ci/wheel/build_wheel.sh | 3 +- BUILD.bazel | 1 + CMakeLists.txt | 12 +- buckbuild.bzl | 2 - caffe2/CMakeLists.txt | 144 +++++----- cmake/Dependencies.cmake | 2 +- cmake/Summary.cmake | 12 +- docs/source/conf.py | 7 + test/cpp/dist_autograd/CMakeLists.txt | 2 +- test/distributed/tensor/test_fake.py | 41 --- test/export/test_export.py | 10 +- test/test_numa_binding.py | 5 +- tools/build_pytorch_libs.py | 3 +- torch/CMakeLists.txt | 50 ++-- torch/_C/_distributed_c10d.pyi | 9 - torch/csrc/Exceptions.h | 2 + torch/csrc/Module.cpp | 8 +- torch/csrc/autograd/functions/init.cpp | 4 + torch/csrc/distributed/c10d/HashStore.cpp | 1 + torch/csrc/distributed/c10d/Work.cpp | 2 +- torch/csrc/inductor/aoti_torch/shim_cpu.cpp | 4 + torch/csrc/jit/python/pybind_utils.h | 6 +- .../csrc/jit/python/python_sugared_value.cpp | 3 +- torch/csrc/jit/runtime/interpreter.h | 14 +- torch/csrc/jit/serialization/pickler.h | 2 + torch/csrc/jit/serialization/unpickler.h | 2 + .../standalone/execution_trace_observer.cpp | 9 + torch/csrc/profiler/util.cpp | 6 +- torch/csrc/profiler/util.h | 2 + torch/distributed/_C_stubs.py | 150 ---------- torch/distributed/__init__.py | 258 +++++++++--------- torch/distributed/_dist2.py | 2 +- torch/distributed/_distributed_c10d.py | 245 ----------------- torch/distributed/_functional_collectives.py | 12 +- .../_shard/sharded_tensor/reshard.py | 2 +- .../chunk_sharding_spec_ops/embedding_bag.py | 2 +- .../distributed/_symmetric_memory/__init__.py | 22 +- .../_symmetric_memory/_nvshmem_triton.py | 2 +- torch/distributed/_tools/fake_collectives.py | 4 +- .../algorithms/model_averaging/utils.py | 4 + torch/distributed/constants.py | 15 +- torch/distributed/device_mesh.py | 44 ++- torch/distributed/distributed_c10d.py | 70 ++--- torch/distributed/elastic/control_plane.py | 2 +- torch/distributed/nn/functional.py | 4 + torch/distributed/rpc/__init__.py | 2 +- torch/distributed/tensor/_collective_utils.py | 4 +- .../testing/_internal/distributed/fake_pg.py | 2 +- 50 files changed, 451 insertions(+), 774 deletions(-) delete mode 100644 test/distributed/tensor/test_fake.py delete mode 100644 torch/distributed/_C_stubs.py delete mode 100644 torch/distributed/_distributed_c10d.py diff --git a/.ci/pytorch/macos-build.sh b/.ci/pytorch/macos-build.sh index d41c3c08e6288..d7447e7d48582 100755 --- a/.ci/pytorch/macos-build.sh +++ b/.ci/pytorch/macos-build.sh @@ -35,10 +35,11 @@ fi print_cmake_info if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then - USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel + # Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls + USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel else - # NB: we always build with distributed; USE_DISTRIBUTED turns off all - # backends (specifically the gloo backend), so test that this case works too + # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests + # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448 USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64 fi if which sccache > /dev/null; then diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh index 42a536a555bd6..53f5b46714639 100755 --- a/.ci/pytorch/macos-test.sh +++ b/.ci/pytorch/macos-test.sh @@ -13,13 +13,9 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available( fi popd -python -mpip install -r requirements.txt - # enable debug asserts in serialization export TORCH_SERIALIZATION_DEBUG=1 -python -mpip install --no-input -r requirements.txt - setup_test_python() { # The CircleCI worker hostname doesn't resolve to an address. # This environment variable makes ProcessGroupGloo default to diff --git a/.ci/wheel/build_wheel.sh b/.ci/wheel/build_wheel.sh index 763fce4b73e18..e63a68e4f1934 100755 --- a/.ci/wheel/build_wheel.sh +++ b/.ci/wheel/build_wheel.sh @@ -189,8 +189,7 @@ pip install requests ninja typing-extensions retry pip install -r "${pytorch_rootdir}/requirements.txt" || true retry brew install libomp -# For USE_DISTRIBUTED=1 on macOS, this enables gloo, which needs libuv, which -# is build as part of tensorpipe submodule +# For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule export USE_DISTRIBUTED=1 export USE_MKLDNN=OFF diff --git a/BUILD.bazel b/BUILD.bazel index 2cbd36f06761b..d4202e7a2c1e4 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -22,6 +22,7 @@ COMMON_COPTS = [ "-DHAVE_SHM_UNLINK=1", "-D_FILE_OFFSET_BITS=64", "-DUSE_FBGEMM", + "-DUSE_DISTRIBUTED", "-DAT_PER_OPERATOR_HEADERS", "-DATEN_THREADING=NATIVE", "-DNO_CUDNN_DESTROY_HANDLE", diff --git a/CMakeLists.txt b/CMakeLists.txt index d89f5f6709d12..dc5405ecef235 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,9 +181,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le)") set(CPU_POWER ON) endif() -# For non-supported platforms, turn USE_DISTRIBUTED off by default. -# NB: USE_DISTRIBUTED simply disables the backend; distributed code -# still gets built +# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not +# tested and likely won't work without additional changes. if(NOT LINUX AND NOT WIN32) set(USE_DISTRIBUTED OFF @@ -263,11 +262,11 @@ option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF) option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF) option(USE_NATIVE_ARCH "Use -march=native" OFF) cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF) -option(USE_DISTRIBUTED "Enable default distributed backends" ON) +option(USE_DISTRIBUTED "Use distributed" ON) cmake_dependent_option(USE_NCCL "Use NCCL" ON "USE_DISTRIBUTED;USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF) cmake_dependent_option(USE_XCCL "Use XCCL" ON - "USE_DISTRIBUTED;USE_XPU;UNIX;NOT APPLE" OFF) + "USE_XPU;UNIX;NOT APPLE" OFF) cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF) cmake_dependent_option(USE_RCCL "Use RCCL" ON "USE_NCCL;NOT WIN32" OFF) cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF) @@ -432,10 +431,11 @@ if(WIN32) PATH_SUFFIXES lib NO_DEFAULT_PATH) if(NOT libuv_tmp_LIBRARY) + set(USE_DISTRIBUTED OFF) set(USE_GLOO OFF) message( WARNING - "Libuv is not installed in current conda env. Set USE_GLOO to OFF. " + "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. " "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv." ) else() diff --git a/buckbuild.bzl b/buckbuild.bzl index 218fd747301f9..e079d98395441 100644 --- a/buckbuild.bzl +++ b/buckbuild.bzl @@ -948,7 +948,6 @@ def define_buck_targets( [ ("torch/csrc/api/include", "torch/**/*.h"), ("", "torch/csrc/**/*.h"), - ("", "torch/csrc/**/*.hpp"), ("", "torch/nativert/**/*.h"), ("", "torch/headeronly/**/*.h"), ("", "torch/script.h"), @@ -2034,7 +2033,6 @@ def define_buck_targets( ("", "caffe2/utils/*.h"), ("", "caffe2/core/*.h"), ("", "torch/csrc/*.h"), - ("", "torch/csrc/*.hpp"), ("", "torch/csrc/api/include/torch/*.h"), ("", "torch/csrc/autograd/*.h"), ("", "torch/csrc/autograd/*/*.h"), diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 4cd773bc16123..4623fec08fe32 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -540,9 +540,11 @@ if(NOT INTERN_BUILD_MOBILE AND NOT BUILD_LITE_INTERPRETER) ${TORCH_SRC_DIR}/csrc/utils/byte_order.cpp ) - append_filelist("libtorch_distributed_base_sources" TORCH_SRCS) - if(NOT WIN32) - append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS) + if(USE_DISTRIBUTED) + append_filelist("libtorch_distributed_base_sources" TORCH_SRCS) + if(NOT WIN32) + append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS) + endif() endif() endif() @@ -566,30 +568,32 @@ if(USE_CUDA) list(APPEND Caffe2_GPU_SRCS ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) endif() - append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS) - if(NOT WIN32) - append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS) - set_source_files_properties( - ${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupNCCL.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/utils.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CudaDMAConnectivity.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp - PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" - ) - endif() + if(USE_DISTRIBUTED) + append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS) + if(NOT WIN32) + append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS) + set_source_files_properties( + ${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupNCCL.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/utils.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CudaDMAConnectivity.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp + PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" + ) + endif() - set(ASYNC_MM_FILE "${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/AsyncMM.cu") - # Disable the warning to make cutlass warp-specialized cooperative kernel build for gcc-9 - if(CMAKE_COMPILER_IS_GNUCXX) - set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-Wno-unused-but-set-variable") - endif() - if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND CUDA_NVCC_FLAGS MATCHES ".*compute_90.*") - set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a") + set(ASYNC_MM_FILE "${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/AsyncMM.cu") + # Disable the warning to make cutlass warp-specialized cooperative kernel build for gcc-9 + if(CMAKE_COMPILER_IS_GNUCXX) + set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-Wno-unused-but-set-variable") + endif() + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND CUDA_NVCC_FLAGS MATCHES ".*compute_90.*") + set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a") + endif() endif() set_source_files_properties( ${TORCH_ROOT}/aten/src/ATen/cuda/detail/LazyNVRTC.cpp @@ -622,9 +626,11 @@ if(USE_ROCM) list(APPEND Caffe2_HIP_SRCS ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) endif() - append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS) - if(NOT WIN32) - append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS) + if(USE_DISTRIBUTED) + append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS) + if(NOT WIN32) + append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS) + endif() endif() # caffe2_nvrtc's stubs to driver APIs are useful for HIP. # See NOTE [ ATen NVRTC Stub and HIP ] @@ -1345,10 +1351,12 @@ if(BUILD_TEST) add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit) add_subdirectory(${TORCH_ROOT}/test/cpp/nativert ${CMAKE_BINARY_DIR}/test_nativert) add_subdirectory(${TORCH_ROOT}/test/inductor ${CMAKE_BINARY_DIR}/test_inductor) - add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d) - if(NOT WIN32) - add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd) - add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc) + if(USE_DISTRIBUTED) + add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d) + if(NOT WIN32) + add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd) + add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc) + endif() endif() if(NOT NO_API) add_subdirectory(${TORCH_ROOT}/test/cpp/api ${CMAKE_BINARY_DIR}/test_api) @@ -1453,40 +1461,46 @@ if(BUILD_LITE_INTERPRETER) endif() endif() -if(USE_GLOO AND USE_C10D_GLOO) - target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO) -endif() -if(USE_UCC AND USE_C10D_UCC) - target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC) - if(USE_CUDA) - target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC) + +# Pass USE_DISTRIBUTED to torch_cpu, as some codes in jit/pickler.cpp and +# jit/unpickler.cpp need to be compiled only when USE_DISTRIBUTED is set +if(USE_DISTRIBUTED) + target_compile_definitions(torch_cpu PUBLIC USE_DISTRIBUTED) + if(USE_GLOO AND USE_C10D_GLOO) + target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO) endif() -endif() -if(USE_NCCL AND USE_C10D_NCCL) - if(USE_ROCM) - target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL) - else() - target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL) + if(USE_UCC AND USE_C10D_UCC) + target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC) + if(USE_CUDA) + target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC) + endif() + endif() + if(USE_NCCL AND USE_C10D_NCCL) + if(USE_ROCM) + target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL) + else() + target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL) + endif() + endif() + if(USE_MPI AND USE_C10D_MPI) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set_source_files_properties( + "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp" + PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) + endif() + target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI) + endif() + # Pass USE_RPC in order to reduce use of + # #if defined(USE_DISTRIBUTED) && !defined(_WIN32) + # need to be removed when RPC is supported + if(NOT WIN32) + target_compile_definitions(torch_cpu PUBLIC USE_RPC) + endif() + # Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp + # can only be compiled with USE_TENSORPIPE is set. + if(USE_TENSORPIPE) + target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE) endif() -endif() -if(USE_MPI AND USE_C10D_MPI) - if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set_source_files_properties( - "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp" - PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) - endif() - target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI) -endif() -# Pass USE_RPC in order to reduce use of -# #if defined(USE_DISTRIBUTED) && !defined(_WIN32) -# need to be removed when RPC is supported -if(NOT WIN32) - target_compile_definitions(torch_cpu PUBLIC USE_RPC) -endif() -# Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp -# can only be compiled with USE_TENSORPIPE is set. -if(USE_TENSORPIPE) - target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE) endif() if(NOT INTERN_BUILD_MOBILE) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 08ffdaf8cf451..6ad56d3b9b44e 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1134,7 +1134,7 @@ if(USE_CUDA AND CUDA_VERSION VERSION_LESS 13.0) include_directories(SYSTEM ${CUB_INCLUDE_DIRS}) endif() -if(USE_TENSORPIPE) +if(USE_DISTRIBUTED AND USE_TENSORPIPE) if(MSVC) message(WARNING "Tensorpipe cannot be used on Windows.") else() diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index fb64e99bccf22..ffd4b5298a890 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -192,11 +192,13 @@ function(caffe2_print_configuration_summary) message(STATUS " USE_PYTORCH_QNNPACK : ${USE_PYTORCH_QNNPACK}") message(STATUS " USE_XNNPACK : ${USE_XNNPACK}") message(STATUS " USE_DISTRIBUTED : ${USE_DISTRIBUTED}") - message(STATUS " USE_MPI : ${USE_MPI}") - message(STATUS " USE_GLOO : ${USE_GLOO}") - message(STATUS " USE_GLOO_WITH_OPENSSL : ${USE_GLOO_WITH_OPENSSL}") - message(STATUS " USE_GLOO_IBVERBS : ${USE_GLOO_IBVERBS}") - message(STATUS " USE_TENSORPIPE : ${USE_TENSORPIPE}") + if(${USE_DISTRIBUTED}) + message(STATUS " USE_MPI : ${USE_MPI}") + message(STATUS " USE_GLOO : ${USE_GLOO}") + message(STATUS " USE_GLOO_WITH_OPENSSL : ${USE_GLOO_WITH_OPENSSL}") + message(STATUS " USE_GLOO_IBVERBS : ${USE_GLOO_IBVERBS}") + message(STATUS " USE_TENSORPIPE : ${USE_TENSORPIPE}") + endif() if(NOT "${SELECTED_OP_LIST}" STREQUAL "") message(STATUS " SELECTED_OP_LIST : ${SELECTED_OP_LIST}") endif() diff --git a/docs/source/conf.py b/docs/source/conf.py index d1504757f9c54..44ad4de8115f6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -3333,6 +3333,13 @@ def coverage_post_process(app, exception): if not isinstance(app.builder, CoverageBuilder): return + if not torch.distributed.is_available(): + raise RuntimeError( + "The coverage tool cannot run with a version " + "of PyTorch that was built with USE_DISTRIBUTED=0 " + "as this module's API changes." + ) + # These are all the modules that have "automodule" in an rst file # These modules are the ones for which coverage is checked # Here, we make sure that no module is missing from that list diff --git a/test/cpp/dist_autograd/CMakeLists.txt b/test/cpp/dist_autograd/CMakeLists.txt index 86a6c924288bb..14fd7f7ae9a2b 100644 --- a/test/cpp/dist_autograd/CMakeLists.txt +++ b/test/cpp/dist_autograd/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT WIN32) +if(USE_DISTRIBUTED AND NOT WIN32) set(DIST_AUTOGRAD_TEST_DIR "${TORCH_ROOT}/test/cpp/dist_autograd") set(DIST_AUTOGRAD_TEST_SOURCES ${TORCH_ROOT}/test/cpp/common/main.cpp diff --git a/test/distributed/tensor/test_fake.py b/test/distributed/tensor/test_fake.py deleted file mode 100644 index 099c6e87f5f18..0000000000000 --- a/test/distributed/tensor/test_fake.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates -# Owner(s): ["oncall: distributed"] - -import torch -from torch._subclasses.fake_tensor import FakeTensorMode -from torch.distributed.tensor import DTensor -from torch.distributed.tensor.placement_types import Shard -from torch.testing._internal.common_utils import run_tests, TestCase -from torch.testing._internal.distributed.fake_pg import FakeStore - - -class TestFakeDTensor(TestCase): - def test_fake_dtensor_operations(self): - # Use FakeTensorMode to handle CUDA tensors without actual CUDA - fake_mode = FakeTensorMode() - world_size = 4 - - fake_store = FakeStore() - torch.distributed.init_process_group( - "fake", store=fake_store, rank=0, world_size=world_size - ) - device_mesh = torch.distributed.device_mesh.init_device_mesh( - "cuda", - (2, world_size // 2), - ) - - # Create fake CUDA tensor using FakeTensorMode - with fake_mode: - x = torch.randn(1, 1, device="cuda") - x = DTensor.from_local(x, device_mesh, [Shard(0), Shard(1)]) - - # Test basic DTensor operations - self.assertIsInstance(x, DTensor) - - # Test sum operation - r = x.sum(1) - self.assertIsInstance(r, DTensor) - - -if __name__ == "__main__": - run_tests() diff --git a/test/export/test_export.py b/test/export/test_export.py index feb85e59556ef..4b3f97345d06e 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -65,7 +65,10 @@ from torch.fx.experimental.proxy_tensor import make_fx from torch.fx.experimental.symbolic_shapes import ShapeEnv from torch.testing import FileCheck -from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_FLASH_ATTENTION +from torch.testing._internal.common_cuda import ( + PLATFORM_SUPPORTS_FLASH_ATTENTION, + xfailIfDistributedNotSupported, +) from torch.testing._internal.common_utils import ( find_library_location, IS_FBCODE, @@ -15655,6 +15658,7 @@ def distributed_env(self, world_size): finally: torch.distributed.destroy_process_group() + @xfailIfDistributedNotSupported def test_distributed_all_reduce(self): class Foo(torch.nn.Module): def __init__(self): @@ -15672,6 +15676,7 @@ def forward(self, x): inp = (torch.randn(4, 4),) self.assertTrue(torch.allclose(ep.module()(*inp), m(*inp))) + @xfailIfDistributedNotSupported def test_distributed_all_gather(self): class Foo(torch.nn.Module): def forward(self, x): @@ -15687,6 +15692,7 @@ def forward(self, x): torch.allclose(a, b) for a, b in zip(ep.module()(*inp), m(*inp)) ) + @xfailIfDistributedNotSupported def test_distributed_all_gather_into_tensor(self): class Foo(torch.nn.Module): def forward(self, x): @@ -15700,6 +15706,7 @@ def forward(self, x): inp = (torch.randn(2),) self.assertTrue(torch.allclose(ep.module()(*inp), m(*inp))) + @xfailIfDistributedNotSupported @testing.expectedFailureCppRuntime def test_distributed_all_to_all_single(self): class Foo(torch.nn.Module): @@ -15717,6 +15724,7 @@ def forward(self, x): ) self.assertEqual(len(nodes), 1) + @xfailIfDistributedNotSupported @testing.expectedFailureCppRuntime def test_distributed_reduce_scatter_tensor(self): class Foo(torch.nn.Module): diff --git a/test/test_numa_binding.py b/test/test_numa_binding.py index d38032ba22603..764156ff9b98a 100644 --- a/test/test_numa_binding.py +++ b/test/test_numa_binding.py @@ -7,7 +7,7 @@ from dataclasses import dataclass from multiprocessing.context import SpawnProcess from typing import Any, Optional -from unittest import skipIf, skipUnless +from unittest import skipUnless from unittest.mock import mock_open, patch import torch @@ -22,7 +22,7 @@ AffinityMode, NumaOptions, ) -from torch.testing._internal.common_utils import IS_MACOS, run_tests, TestCase +from torch.testing._internal.common_utils import run_tests, TestCase @dataclass(frozen=True) @@ -680,7 +680,6 @@ def test_core_complex_tiebreak_prefers_lower_cache_key(self) -> None: set(range(0, 2)), ) - @skipIf(IS_MACOS, "sched_getaffinity doesn't exist") def test_binds_to_node_0_if_node_stored_as_minus_one(self) -> None: self._add_mock_hardware( num_sockets=1, diff --git a/tools/build_pytorch_libs.py b/tools/build_pytorch_libs.py index 457b224354fb2..9d43de80f1298 100644 --- a/tools/build_pytorch_libs.py +++ b/tools/build_pytorch_libs.py @@ -88,7 +88,8 @@ def build_pytorch( ) -> None: my_env = _create_build_env() if ( - not check_negative_env_flag("USE_CUDA") + not check_negative_env_flag("USE_DISTRIBUTED") + and not check_negative_env_flag("USE_CUDA") and not check_negative_env_flag("USE_NCCL") and not check_env_flag("USE_SYSTEM_NCCL") ): diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index adc9aad4a05c3..866c40ad1c12e 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -276,30 +276,32 @@ add_custom_command( WORKING_DIRECTORY "${TORCH_ROOT}" ) +if(USE_DISTRIBUTED) + if(WIN32) + append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS) + else() + append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS) + endif() + # Disable certain warnings for GCC-9.X + if(CMAKE_COMPILER_IS_GNUCXX) + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") + endif() + # NCCL is a private dependency of libtorch, but libtorch_python includes + # some private headers of libtorch, which in turn include NCCL. As a hacky + # alternative to making NCCL a public dependency of libtorch, we make it + # a private dependency of libtorch_python as well. + if(USE_NCCL) + list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl) + endif() + # Same for MPI. + if(USE_MPI) + list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX) + endif() + list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D) -if(WIN32) - append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS) -else() - append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS) endif() -# Disable certain warnings for GCC-9.X -if(CMAKE_COMPILER_IS_GNUCXX) - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") -endif() -# NCCL is a private dependency of libtorch, but libtorch_python includes -# some private headers of libtorch, which in turn include NCCL. As a hacky -# alternative to making NCCL a public dependency of libtorch, we make it -# a private dependency of libtorch_python as well. -if(USE_NCCL) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl) -endif() -# Same for MPI. -if(USE_MPI) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX) -endif() -list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D) if(USE_NCCL AND NOT WIN32) list(APPEND TORCH_PYTHON_SRCS @@ -367,6 +369,10 @@ if(BUILD_LIBTORCHLESS) target_compile_definitions(torch_python PRIVATE USE_C10D_NCCL) endif() + if(USE_DISTRIBUTED) + target_compile_definitions(torch_python PRIVATE USE_DISTRIBUTED) + endif() + if(USE_MPI AND USE_C10D_MPI) target_compile_definitions(torch_python PRIVATE USE_C10D_MPI) endif() diff --git a/torch/_C/_distributed_c10d.pyi b/torch/_C/_distributed_c10d.pyi index 79e437063b8cb..ad3d8e3abf245 100644 --- a/torch/_C/_distributed_c10d.pyi +++ b/torch/_C/_distributed_c10d.pyi @@ -851,12 +851,3 @@ class ProcessGroupXCCL(Backend): def _set_process_group(pg: ProcessGroup) -> None: ... def _current_process_group() -> ProcessGroup: ... -def _dump_nccl_trace_json( - includeCollectives: Optional[bool] = ..., - onlyActive: Optional[bool] = ..., -) -> bytes: ... -def _dump_nccl_trace( - includeCollectives: Optional[bool] = ..., - includeStackTraces: Optional[bool] = ..., - onlyActive: Optional[bool] = ..., -) -> bytes: ... diff --git a/torch/csrc/Exceptions.h b/torch/csrc/Exceptions.h index d43d2b02a23ef..60a7bb644df01 100644 --- a/torch/csrc/Exceptions.h +++ b/torch/csrc/Exceptions.h @@ -15,7 +15,9 @@ #include #include +#if defined(USE_DISTRIBUTED) #include +#endif inline void PyErr_SetString(PyObject* type, const std::string& message) { PyErr_SetString(type, message.c_str()); diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index 6f052b0331edc..675a4c4310052 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -120,12 +120,14 @@ #endif #endif +#ifdef USE_DISTRIBUTED #ifdef USE_C10D #include #include #include #include #endif +#endif #if defined(USE_VALGRIND) #include @@ -550,7 +552,11 @@ static PyObject* THPModule_getBackcompatKeepdimWarn( } static PyObject* THPModule_hasDistributed(PyObject* _unused, PyObject* noargs) { +#ifdef USE_DISTRIBUTED Py_RETURN_TRUE; +#else + Py_RETURN_FALSE; +#endif } static PyObject* THPModule_showConfig(PyObject* module, PyObject* noargs) { @@ -1987,7 +1993,7 @@ PyObject* initModule() { #ifdef USE_XPU THPUtils_addPyMethodDefs(methods, THXPModule_methods()); #endif -#ifdef USE_C10D +#if defined(USE_DISTRIBUTED) && defined(USE_C10D) THPUtils_addPyMethodDefs( methods, torch::distributed::c10d::python_functions()); #ifndef _WIN32 diff --git a/torch/csrc/autograd/functions/init.cpp b/torch/csrc/autograd/functions/init.cpp index 05c8901e1f60d..5e19010f9ae3c 100644 --- a/torch/csrc/autograd/functions/init.cpp +++ b/torch/csrc/autograd/functions/init.cpp @@ -8,7 +8,9 @@ #include #include #include +#ifdef USE_DISTRIBUTED #include +#endif #include #include #include @@ -148,9 +150,11 @@ void THPAutograd_initFunctions() { static PyTypeObject CopyBackwardsClass; addClass(module, CopyBackwardsClass, "CopyBackwards"); +#ifdef USE_DISTRIBUTED static PyTypeObject SendRpcBackwardClass; addClass( module, SendRpcBackwardClass, "SendRpcBackward"); +#endif static PyTypeObject CopySlicesClass; addClass(module, CopySlicesClass, "CopySlices"); diff --git a/torch/csrc/distributed/c10d/HashStore.cpp b/torch/csrc/distributed/c10d/HashStore.cpp index 1055afc4847d0..15befd9ec34e2 100644 --- a/torch/csrc/distributed/c10d/HashStore.cpp +++ b/torch/csrc/distributed/c10d/HashStore.cpp @@ -1,5 +1,6 @@ #include +#include #include #include diff --git a/torch/csrc/distributed/c10d/Work.cpp b/torch/csrc/distributed/c10d/Work.cpp index 2c1ee42727d8a..cdec9185ce537 100644 --- a/torch/csrc/distributed/c10d/Work.cpp +++ b/torch/csrc/distributed/c10d/Work.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/torch/csrc/inductor/aoti_torch/shim_cpu.cpp b/torch/csrc/inductor/aoti_torch/shim_cpu.cpp index a610685fe9557..b1c864bf3fbba 100644 --- a/torch/csrc/inductor/aoti_torch/shim_cpu.cpp +++ b/torch/csrc/inductor/aoti_torch/shim_cpu.cpp @@ -1,5 +1,7 @@ +#ifdef USE_DISTRIBUTED #include +#endif #include #include @@ -531,6 +533,7 @@ AOTITorchError aoti_torch_cpu__weight_int4pack_mm_cpu_tensor( }); } +#ifdef USE_DISTRIBUTED AOTITorchError aoti_torch_cpu__c10d_functional_all_reduce_( AtenTensorHandle inp, const char* reduce_op, @@ -563,3 +566,4 @@ AOTITorchError aoti_torch_cpu__c10d_functional_wait_tensor( *ret0 = new_tensor_handle(std::move(tmp_result)); }); } +#endif diff --git a/torch/csrc/jit/python/pybind_utils.h b/torch/csrc/jit/python/pybind_utils.h index 2c0c1ea4b9cf2..5ae84e3e0c68b 100644 --- a/torch/csrc/jit/python/pybind_utils.h +++ b/torch/csrc/jit/python/pybind_utils.h @@ -13,8 +13,6 @@ #include #include #include -#include -#include #include #include #include @@ -26,6 +24,10 @@ #include #include #include +#ifdef USE_DISTRIBUTED +#include +#include +#endif #include #include diff --git a/torch/csrc/jit/python/python_sugared_value.cpp b/torch/csrc/jit/python/python_sugared_value.cpp index 808fe7d3605ba..8b16e089aa50e 100644 --- a/torch/csrc/jit/python/python_sugared_value.cpp +++ b/torch/csrc/jit/python/python_sugared_value.cpp @@ -1225,7 +1225,7 @@ std::shared_ptr toSugaredValue( } else if (obj.ptr() == py::module::import("torch").attr("_check").ptr()) { return std::make_shared(); #ifdef USE_RPC - // This is not defined on WINDOWS + // RPC module is only available when build flag "USE_DISTRIBUTED" is on. } else if ( isRpcAvailable && obj.ptr() == @@ -1238,6 +1238,7 @@ std::shared_ptr toSugaredValue( return SpecialFormValue::create(prim::rpc_sync); } else if ( isRpcAvailable && + // RPC module is only available when build flag "USE_DISTRIBUTED" is on. obj.ptr() == py::module::import("torch.distributed.rpc").attr("remote").ptr()) { return SpecialFormValue::create(prim::rpc_remote); diff --git a/torch/csrc/jit/runtime/interpreter.h b/torch/csrc/jit/runtime/interpreter.h index be582cfb7cdd8..6ae9f52a0cda2 100644 --- a/torch/csrc/jit/runtime/interpreter.h +++ b/torch/csrc/jit/runtime/interpreter.h @@ -128,8 +128,13 @@ struct InterpreterContinuation { std::optional tls_state = std::nullopt) : state(std::move(state_)), stack(std::move(stack_)), - tls_state_(std::move(tls_state)), - dist_autograd_context_id_(dist_autograd_context_id) {} + tls_state_(std::move(tls_state)) +#ifdef USE_DISTRIBUTED + , + dist_autograd_context_id_(dist_autograd_context_id) +#endif + { + } void operator()(); @@ -137,10 +142,9 @@ struct InterpreterContinuation { InterpreterState state; Stack stack; std::optional tls_state_ = std::nullopt; -#ifndef USE_RPC - [[maybe_unused]] -#endif +#ifdef USE_DISTRIBUTED int64_t dist_autograd_context_id_; +#endif }; // what is the tensors type, including state from the current execution context diff --git a/torch/csrc/jit/serialization/pickler.h b/torch/csrc/jit/serialization/pickler.h index e3379f4de65ac..526c840bc10e8 100644 --- a/torch/csrc/jit/serialization/pickler.h +++ b/torch/csrc/jit/serialization/pickler.h @@ -79,7 +79,9 @@ class TORCH_API Pickler { void pushTuple(const IValue& ivalue); void pushString(const std::string& string); void pushDevice(const IValue& ivalue); +#ifdef USE_DISTRIBUTED void pushRRef(const IValue& ivalue); +#endif // unmemoized version void pushStringImpl(const std::string& string); void pushStorageOfTensor(const at::Tensor& tensor); diff --git a/torch/csrc/jit/serialization/unpickler.h b/torch/csrc/jit/serialization/unpickler.h index 208cf554ad2bb..702a1d8816e7f 100644 --- a/torch/csrc/jit/serialization/unpickler.h +++ b/torch/csrc/jit/serialization/unpickler.h @@ -140,7 +140,9 @@ class TORCH_API Unpickler { void rebuildParameter(); void rebuildTensorFromTypeV2(); void rebuildSparseTensor(); +#ifdef USE_DISTRIBUTED void rebuildRRef(); +#endif PickleOpCode readInstruction(); PickleOpCode readOpCode() { return static_cast(read()); diff --git a/torch/csrc/profiler/standalone/execution_trace_observer.cpp b/torch/csrc/profiler/standalone/execution_trace_observer.cpp index e46c141cd3f4d..1c88e80d4021c 100644 --- a/torch/csrc/profiler/standalone/execution_trace_observer.cpp +++ b/torch/csrc/profiler/standalone/execution_trace_observer.cpp @@ -30,12 +30,15 @@ #include #include +#ifdef USE_DISTRIBUTED #include +#endif // USE_DISTRIBUTED using namespace at; // Collective property attributes // https://github.com/pytorch/pytorch/issues/124674 +#ifdef USE_DISTRIBUTED constexpr auto kETCommsName = "collective_name"; constexpr auto kETInMsgNelems = "in_msg_nelems"; constexpr auto kETOutMsgNelems = "out_msg_nelems"; @@ -46,6 +49,7 @@ constexpr auto kETGlobalRankStride = "global_rank_stride"; constexpr auto kETGroupSize = "pg_size"; constexpr auto kETProcessGroupName = "pg_name"; constexpr auto kETProcessGroupDesc = "pg_desc"; +#endif // USE_DISTRIBUTED namespace torch::profiler::impl { @@ -265,6 +269,7 @@ static std::ofstream openOutputFile(const std::string& name) { return stream; } +#ifdef USE_DISTRIBUTED static std::string getAttrJson( const std::string& name, const std::string& type, @@ -277,6 +282,7 @@ static std::string getAttrJson( type, value); } +#endif static void writeJsonNode( std::ofstream& out, @@ -654,6 +660,7 @@ static void handleKernelBackendInfo( inline std::string getCommsNodeAttrs(const RecordFunction& fn) { // NOLINT std::vector attrs; +#ifdef USE_DISTRIBUTED // We rely on paramcommsdebug object that is available in thread local info auto debugInfo = dynamic_cast( c10::ThreadLocalDebugInfo::get(c10::DebugInfoKind::PARAM_COMMS_INFO)); @@ -697,6 +704,8 @@ inline std::string getCommsNodeAttrs(const RecordFunction& fn) { // NOLINT addAttr(kGroupSize, kETGroupSize, "uint64"); +#endif // USE_DISTRIBUTED + // XXX consider using as string stream? return attrs.empty() ? "" : fmt::format(", {}", fmt::join(attrs, ", ")); } diff --git a/torch/csrc/profiler/util.cpp b/torch/csrc/profiler/util.cpp index e97699a99fd1c..0b2979e6fb7ea 100644 --- a/torch/csrc/profiler/util.cpp +++ b/torch/csrc/profiler/util.cpp @@ -11,7 +11,9 @@ #ifdef USE_KINETO #include #endif +#ifdef USE_DISTRIBUTED #include +#endif // USE_DISTRIBUTED namespace torch::profiler::impl { @@ -453,7 +455,7 @@ std::unordered_map saveNcclMeta( // @lint-ignore CLANGTIDY const SaveNcclMetaConfig& config) { std::unordered_map map; -#if !defined(BUILD_LITE_INTERPRETER) && !defined(C10_MOBILE) +#ifdef USE_DISTRIBUTED auto debugInfo = dynamic_cast( c10::ThreadLocalDebugInfo::get(c10::DebugInfoKind::PARAM_COMMS_INFO)); @@ -563,7 +565,7 @@ std::unordered_map saveNcclMeta( } } } -#endif // !defined(BUILD_LITE_INTERPRETER) && !defined(C10_MOBILE) +#endif // USE_DISTRIBUTED return map; } diff --git a/torch/csrc/profiler/util.h b/torch/csrc/profiler/util.h index dcb4b866a2de3..f2ae57fa0e591 100644 --- a/torch/csrc/profiler/util.h +++ b/torch/csrc/profiler/util.h @@ -185,6 +185,7 @@ struct HashCombine { } }; +#ifdef USE_DISTRIBUTED constexpr auto kCommsName = "Collective name"; constexpr auto kDtype = "dtype"; constexpr auto kInMsgNelems = "In msg nelems"; @@ -202,5 +203,6 @@ constexpr auto kP2pSrc = "Src Rank"; constexpr auto kP2pDst = "Dst Rank"; constexpr auto kInTensorsStart = "Input Tensors start"; constexpr auto kOutTensorsStart = "Output Tensors start"; +#endif // USE_DISTRIBUTED } // namespace torch::profiler::impl diff --git a/torch/distributed/_C_stubs.py b/torch/distributed/_C_stubs.py deleted file mode 100644 index b241006372b6a..0000000000000 --- a/torch/distributed/_C_stubs.py +++ /dev/null @@ -1,150 +0,0 @@ -# mypy: allow-untyped-defs -""" -Python stubs for backend-specific distributed components. - -Since _C._distributed_c10d always exists now, this module only provides -stubs for backend-specific functionality that may not be available in all builds -(e.g., NCCL, UCC, MPI, Gloo, etc.). -""" - -from __future__ import annotations - -from typing import Optional, TYPE_CHECKING - -from torch._C._distributed_c10d import Store - - -if TYPE_CHECKING: - from datetime import timedelta - -import torch - - -# Store classes -class HashStore(Store): - """Stub HashStore for builds without this functionality.""" - - def __init__(self, *args, **kwargs): - self._data = {} - - def set(self, key: str, value: str): - self._data[key] = value - - def get(self, key: str) -> bytes: - return self._data.get(key, "").encode() - - -# Backend-specific process group stubs -class ProcessGroupMPI: - """Stub ProcessGroupMPI for non-MPI builds.""" - - def __init__(self, *args, **kwargs): - pass - - -class ProcessGroupNCCL: - """Stub ProcessGroupNCCL for non-NCCL builds.""" - - def __init__(self, *args, **kwargs): - pass - - -class ProcessGroupGloo: - """Stub ProcessGroupGloo for non-Gloo builds.""" - - def __init__(self, *args, **kwargs): - pass - - -class ProcessGroupUCC: - """Stub ProcessGroupUCC for non-UCC builds.""" - - def __init__(self, *args, **kwargs): - pass - - -class ProcessGroupXCCL: - """Stub ProcessGroupXCCL for non-XCCL builds.""" - - def __init__(self, *args, **kwargs): - pass - - -class _ProcessGroupWrapper: - """Stub _ProcessGroupWrapper for non-Gloo builds.""" - - def __init__(self, process_group, *args, **kwargs): - self._process_group = process_group - - def __getattr__(self, name): - return getattr(self._process_group, name) - - -# NCCL-specific function stubs -_DEFAULT_PG_NCCL_TIMEOUT: Optional[timedelta] = None - - -def _hash_tensors(tensors): - """Stub function to hash tensors - returns dummy hash.""" - return 0 - - -def _dump_nccl_trace_json( - includeCollectives: Optional[bool] = None, onlyActive: Optional[bool] = None -) -> bytes: - """Stub function that returns empty JSON trace.""" - return b"{}" - - -def _dump_nccl_trace( - includeCollectives: Optional[bool] = None, - includeStackTraces: Optional[bool] = None, - onlyActive: Optional[bool] = None, -) -> bytes: - """Stub function that returns empty pickle trace.""" - return b"" - - -# NVSHMEM/SymmetricMemory stubs -def _is_nvshmem_available() -> bool: - """Stub function that returns False indicating NVSHMEM is not available.""" - return False - - -def _nvshmemx_cumodule_init(module: int) -> None: - """Stub function for NVSHMEM CU module initialization.""" - - -class _SymmetricMemory: - """Stub _SymmetricMemory class for builds without this functionality.""" - - def __init__(self, *args, **kwargs): - pass - - @classmethod - def empty_strided_p2p(cls, size, stride, dtype, device, group_name=None): - """Stub that returns a regular tensor.""" - return torch.empty(size, dtype=dtype, device=device) - - @classmethod - def rendezvous(cls, tensor, group_name=None): - """Stub that returns None.""" - return None - - @classmethod - def set_group_info(cls, *args, **kwargs): - """Stub that does nothing.""" - - @classmethod - def set_backend(cls, name): - """Stub that does nothing.""" - - @classmethod - def get_backend(cls, device): - """Stub that returns None.""" - return None - - @classmethod - def has_multicast_support(cls, device_type, device_index): - """Stub that returns False.""" - return False diff --git a/torch/distributed/__init__.py b/torch/distributed/__init__.py index 836b00c51c3a4..38e2fdbee803a 100644 --- a/torch/distributed/__init__.py +++ b/torch/distributed/__init__.py @@ -14,10 +14,16 @@ def is_available() -> bool: """ - Always returns ``True``. Note that even if distributed is available, - there may not necessarily be any usable backends. + Return ``True`` if the distributed package is available. + + Otherwise, + ``torch.distributed`` does not expose any other APIs. Currently, + ``torch.distributed`` is available on Linux, MacOS and Windows. Set + ``USE_DISTRIBUTED=1`` to enable it when building PyTorch from source. + Currently, the default value is ``USE_DISTRIBUTED=1`` for Linux and Windows, + ``USE_DISTRIBUTED=0`` for MacOS. """ - return True + return hasattr(torch._C, "_c10d_init") if is_available() and not torch._C._c10d_init(): @@ -30,124 +36,132 @@ def is_available() -> bool: DistStoreError = torch._C._DistStoreError QueueEmptyError = torch._C._DistQueueEmptyError -from torch.distributed._distributed_c10d import ( - _broadcast_coalesced, - _compute_bucket_assignment_by_size, - _ControlCollectives, - _DEFAULT_FIRST_BUCKET_BYTES, - _make_nccl_premul_sum, - _register_builtin_comm_hook, - _register_comm_hook, - _StoreCollectives, - _test_python_store, - _verify_params_across_processes, - Backend as _Backend, - BuiltinCommHookType, - DebugLevel, - FileStore, - get_debug_level, - GradBucket, - Logger, - PrefixStore, - ProcessGroup as ProcessGroup, - Reducer, - set_debug_level, - set_debug_level_from_env, - Store, - TCPStore, - Work as _Work, -) - - -class _DistributedPdb(pdb.Pdb): - """ - Supports using PDB from inside a multiprocessing child process. - - Usage: - _DistributedPdb().set_trace() - """ - - def interaction(self, *args, **kwargs): - _stdin = sys.stdin +if is_available(): + from torch._C._distributed_c10d import ( + _broadcast_coalesced, + _compute_bucket_assignment_by_size, + _ControlCollectives, + _DEFAULT_FIRST_BUCKET_BYTES, + _make_nccl_premul_sum, + _register_builtin_comm_hook, + _register_comm_hook, + _StoreCollectives, + _test_python_store, + _verify_params_across_processes, + Backend as _Backend, + BuiltinCommHookType, + DebugLevel, + FileStore, + get_debug_level, + GradBucket, + Logger, + PrefixStore, + ProcessGroup as ProcessGroup, + Reducer, + set_debug_level, + set_debug_level_from_env, + Store, + TCPStore, + Work as _Work, + ) + + class _DistributedPdb(pdb.Pdb): + """ + Supports using PDB from inside a multiprocessing child process. + + Usage: + _DistributedPdb().set_trace() + """ + + def interaction(self, *args, **kwargs): + _stdin = sys.stdin + try: + sys.stdin = open("/dev/stdin") + pdb.Pdb.interaction(self, *args, **kwargs) + finally: + sys.stdin = _stdin + + _breakpoint_cache: dict[int, typing.Any] = {} + + def breakpoint(rank: int = 0, skip: int = 0, timeout_s=3600): + """ + Set a breakpoint, but only on a single rank. All other ranks will wait for you to be + done with the breakpoint before continuing. + + Args: + rank (int): Which rank to break on. Default: ``0`` + skip (int): Skip the first ``skip`` calls to this breakpoint. Default: ``0``. + """ + if skip > 0: + key = hash(str(traceback.format_exc())) + counter = _breakpoint_cache.get(key, 0) + 1 + _breakpoint_cache[key] = counter + if counter <= skip: + log.warning("Skip the breakpoint, counter=%d", counter) + return + + # avoid having the default timeout (if short) interrupt your debug session + if timeout_s is not None: + for group in torch.distributed.distributed_c10d._pg_map: + torch.distributed.distributed_c10d._set_pg_timeout( + timedelta(seconds=timeout_s), group + ) + + if get_rank() == rank: + pdb = _DistributedPdb() + pdb.message( + "\n!!! ATTENTION !!!\n\n" + f"Type 'up' to get to the frame that called dist.breakpoint(rank={rank})\n" + ) + pdb.set_trace() + # If Meta/Python keys are in the TLS, we want to make sure that we ignore them + # and hit the (default) CPU/CUDA implementation of barrier. + meta_in_tls = torch._C._meta_in_tls_dispatch_include() + guard = torch._C._DisableTorchDispatch() # type: ignore[attr-defined] + torch._C._set_meta_in_tls_dispatch_include(False) try: - sys.stdin = open("/dev/stdin") - pdb.Pdb.interaction(self, *args, **kwargs) + barrier() finally: - sys.stdin = _stdin - - -_breakpoint_cache: dict[int, typing.Any] = {} - - -def breakpoint(rank: int = 0, skip: int = 0, timeout_s=3600): - """ - Set a breakpoint, but only on a single rank. All other ranks will wait for you to be - done with the breakpoint before continuing. - - Args: - rank (int): Which rank to break on. Default: ``0`` - skip (int): Skip the first ``skip`` calls to this breakpoint. Default: ``0``. - """ - if skip > 0: - key = hash(str(traceback.format_exc())) - counter = _breakpoint_cache.get(key, 0) + 1 - _breakpoint_cache[key] = counter - if counter <= skip: - log.warning("Skip the breakpoint, counter=%d", counter) - return - - # avoid having the default timeout (if short) interrupt your debug session - if timeout_s is not None: - for group in torch.distributed.distributed_c10d._pg_map: - torch.distributed.distributed_c10d._set_pg_timeout( - timedelta(seconds=timeout_s), group - ) - - if get_rank() == rank: - pdb = _DistributedPdb() - pdb.message( - "\n!!! ATTENTION !!!\n\n" - f"Type 'up' to get to the frame that called dist.breakpoint(rank={rank})\n" - ) - pdb.set_trace() - # If Meta/Python keys are in the TLS, we want to make sure that we ignore them - # and hit the (default) CPU/CUDA implementation of barrier. - meta_in_tls = torch._C._meta_in_tls_dispatch_include() - guard = torch._C._DisableTorchDispatch() # type: ignore[attr-defined] - torch._C._set_meta_in_tls_dispatch_include(False) - try: - barrier() - finally: - torch._C._set_meta_in_tls_dispatch_include(meta_in_tls) - del guard - - -if sys.platform != "win32": - from torch.distributed._distributed_c10d import HashStore - -from .device_mesh import DeviceMesh, init_device_mesh - -# Variables prefixed with underscore are not auto imported -# See the comment in `distributed_c10d.py` above `_backend` on why we expose -# this. -from .distributed_c10d import * # noqa: F403 -from .distributed_c10d import ( - _all_gather_base, - _coalescing_manager, - _CoalescingManager, - _create_process_group_wrapper, - _get_process_group_name, - _rank_not_in_group, - _reduce_scatter_base, - _time_estimator, - get_node_local_rank, -) -from .remote_device import _remote_device -from .rendezvous import ( - _create_store_from_options, - register_rendezvous_handler, - rendezvous, -) - - -set_debug_level_from_env() + torch._C._set_meta_in_tls_dispatch_include(meta_in_tls) + del guard + + if sys.platform != "win32": + from torch._C._distributed_c10d import HashStore + + from .device_mesh import DeviceMesh, init_device_mesh + + # Variables prefixed with underscore are not auto imported + # See the comment in `distributed_c10d.py` above `_backend` on why we expose + # this. + from .distributed_c10d import * # noqa: F403 + from .distributed_c10d import ( + _all_gather_base, + _coalescing_manager, + _CoalescingManager, + _create_process_group_wrapper, + _get_process_group_name, + _rank_not_in_group, + _reduce_scatter_base, + _time_estimator, + get_node_local_rank, + ) + from .remote_device import _remote_device + from .rendezvous import ( + _create_store_from_options, + register_rendezvous_handler, + rendezvous, + ) + + set_debug_level_from_env() + +else: + # This stub is sufficient to get + # python test/test_public_bindings.py -k test_correct_module_names + # working even when USE_DISTRIBUTED=0. Feel free to add more + # stubs as necessary. + # We cannot define stubs directly because they confuse pyre + + class _ProcessGroupStub: + pass + + sys.modules["torch.distributed"].ProcessGroup = _ProcessGroupStub # type: ignore[attr-defined] diff --git a/torch/distributed/_dist2.py b/torch/distributed/_dist2.py index 1c27bf55d6834..ce5cb8d7e0cc3 100644 --- a/torch/distributed/_dist2.py +++ b/torch/distributed/_dist2.py @@ -10,7 +10,7 @@ from typing import Protocol, Union import torch -from torch.distributed._distributed_c10d import ( +from torch._C._distributed_c10d import ( _current_process_group, _set_process_group, ProcessGroup, diff --git a/torch/distributed/_distributed_c10d.py b/torch/distributed/_distributed_c10d.py deleted file mode 100644 index beb7830edc1da..0000000000000 --- a/torch/distributed/_distributed_c10d.py +++ /dev/null @@ -1,245 +0,0 @@ -# mypy: disable-error-code="assignment" -# noqa: F401 -""" -Centralized module for importing and re-exporting torch._C._distributed_c10d components. - -IMPORTANT PATTERN: -Never access torch._C._distributed_c10d directly in code. Always import from and use -torch.distributed._distributed_c10d which is guaranteed to have all functions available. - -Example: - # WRONG: torch._C._distributed_c10d._set_global_rank(rank) - # RIGHT: - from torch.distributed._distributed_c10d import _set_global_rank - _set_global_rank(rank) -""" - -from typing import TYPE_CHECKING - -# Import all core distributed components from the C extension -# NB: This list has to be spelled out because the _C module doesn't have __all__ -from torch._C._distributed_c10d import ( - _allow_inflight_collective_as_graph_input, - _broadcast_coalesced, - _compute_bucket_assignment_by_size, - _ControlCollectives, - _current_process_group, - _DEFAULT_FIRST_BUCKET_BYTES, - _DEFAULT_PG_TIMEOUT, - _DistributedBackendOptions, - _make_nccl_premul_sum, - _register_builtin_comm_hook, - _register_comm_hook, - _register_process_group, - _register_work, - _resolve_process_group, - _set_allow_inflight_collective_as_graph_input, - _set_global_rank, - _set_process_group, - _StoreCollectives, - _test_python_store, - _unregister_all_process_groups, - _unregister_process_group, - _verify_params_across_processes, - _WorkerServer, - AllgatherOptions, - AllreduceCoalescedOptions, - AllreduceOptions, - AllToAllOptions, - Backend, - BarrierOptions, - BroadcastOptions, - BuiltinCommHookType, - DebugLevel, - FakeProcessGroup, - FakeWork, - FileStore, - GatherOptions, - get_debug_level, - GradBucket, - Logger, - PrefixStore, - ProcessGroup, - ReduceOp, - ReduceOptions, - Reducer, - ReduceScatterOptions, - ScatterOptions, - set_debug_level, - set_debug_level_from_env, - Store, - TCPStore, - Work, -) - - -# Backend-specific components that may not be available -_MPI_AVAILABLE = False -_NCCL_AVAILABLE = False -_GLOO_AVAILABLE = False -_UCC_AVAILABLE = False -_XCCL_AVAILABLE = False - -# HashStore -try: - from torch._C._distributed_c10d import HashStore -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import HashStore - -# NVSHMEM/SymmetricMemory components - -# There are multiple backends for SymmetricMemory, as a result, -# _SymmetricMemory should not be imported together with NVSHMEM related modules. -try: - from torch._C._distributed_c10d import _SymmetricMemory -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import _SymmetricMemory - -try: - from torch._C._distributed_c10d import ( - _is_nvshmem_available, - _nvshmemx_cumodule_init, - ) -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import ( - _is_nvshmem_available, - _nvshmemx_cumodule_init, - ) - -# MPI backend -try: - from torch._C._distributed_c10d import ProcessGroupMPI - - _MPI_AVAILABLE = True -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import ProcessGroupMPI - -# NCCL backend -try: - from torch._C._distributed_c10d import ( - _DEFAULT_PG_NCCL_TIMEOUT, - _dump_nccl_trace, - _dump_nccl_trace_json, - _hash_tensors, - ProcessGroupNCCL, - ) - - _NCCL_AVAILABLE = True -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import ( - _DEFAULT_PG_NCCL_TIMEOUT, - _dump_nccl_trace, - _dump_nccl_trace_json, - _hash_tensors, - ProcessGroupNCCL, - ) - -# Gloo backend -try: - from torch._C._distributed_c10d import _ProcessGroupWrapper, ProcessGroupGloo - - _GLOO_AVAILABLE = True -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import _ProcessGroupWrapper, ProcessGroupGloo - -# UCC backend -try: - from torch._C._distributed_c10d import ProcessGroupUCC - - _UCC_AVAILABLE = True -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import ProcessGroupUCC - -# XCCL backend -try: - from torch._C._distributed_c10d import ProcessGroupXCCL - - _XCCL_AVAILABLE = True -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import ProcessGroupXCCL - -# Provide backwards compatibility by making all symbols available at module level -__all__ = [ - # Basic components - "_broadcast_coalesced", - "_compute_bucket_assignment_by_size", - "_ControlCollectives", - "_DEFAULT_FIRST_BUCKET_BYTES", - "_DEFAULT_PG_TIMEOUT", - "_DEFAULT_PG_NCCL_TIMEOUT", - "_make_nccl_premul_sum", - "_register_builtin_comm_hook", - "_register_comm_hook", - "_StoreCollectives", - "_test_python_store", - "_verify_params_across_processes", - "_allow_inflight_collective_as_graph_input", - "_register_work", - "_set_allow_inflight_collective_as_graph_input", - "_is_nvshmem_available", - "_nvshmemx_cumodule_init", - "_SymmetricMemory", - "_hash_tensors", - "_set_global_rank", - "_dump_nccl_trace", - "_dump_nccl_trace_json", - "Backend", - "BuiltinCommHookType", - "DebugLevel", - "FakeProcessGroup", - "FileStore", - "get_debug_level", - "GradBucket", - "HashStore", - "Logger", - "PrefixStore", - "ProcessGroup", - "Reducer", - "ReduceOp", - "set_debug_level", - "set_debug_level_from_env", - "Store", - "TCPStore", - "Work", - "FakeWork", - # Additional distributed_c10d components - "_DistributedBackendOptions", - "_register_process_group", - "_resolve_process_group", - "_unregister_all_process_groups", - "_unregister_process_group", - "_current_process_group", - "_set_process_group", - "_WorkerServer", - "AllgatherOptions", - "AllreduceCoalescedOptions", - "AllreduceOptions", - "AllToAllOptions", - "BarrierOptions", - "BroadcastOptions", - "GatherOptions", - "ReduceOptions", - "ReduceScatterOptions", - "ScatterOptions", - # Process group implementations - "ProcessGroupMPI", - "ProcessGroupNCCL", - "ProcessGroupGloo", - "ProcessGroupUCC", - "ProcessGroupXCCL", - "_ProcessGroupWrapper", - # Availability flags - "_MPI_AVAILABLE", - "_NCCL_AVAILABLE", - "_GLOO_AVAILABLE", - "_UCC_AVAILABLE", - "_XCCL_AVAILABLE", -] diff --git a/torch/distributed/_functional_collectives.py b/torch/distributed/_functional_collectives.py index 95feb6cd79714..c893794fc3011 100644 --- a/torch/distributed/_functional_collectives.py +++ b/torch/distributed/_functional_collectives.py @@ -7,10 +7,6 @@ import torch import torch.distributed as dist import torch.distributed.distributed_c10d as c10d -from torch.distributed._distributed_c10d import ( - _allow_inflight_collective_as_graph_input, - _set_allow_inflight_collective_as_graph_input, -) from torch.distributed.device_mesh import DeviceMesh from torch.fx.experimental.proxy_tensor import get_proxy_mode @@ -862,13 +858,15 @@ def all_reduce_wait_compiled(y): will be registered in the work registry, and the wait_tensor() in compiled region called on the output tensor of the collective will wait on the correct work object. """ - previous = _allow_inflight_collective_as_graph_input() + previous = torch._C._distributed_c10d._allow_inflight_collective_as_graph_input() try: - _set_allow_inflight_collective_as_graph_input(value) + torch._C._distributed_c10d._set_allow_inflight_collective_as_graph_input(value) yield finally: - _set_allow_inflight_collective_as_graph_input(previous) + torch._C._distributed_c10d._set_allow_inflight_collective_as_graph_input( + previous + ) def _make_all_gather_out_tensor(input, group_size): diff --git a/torch/distributed/_shard/sharded_tensor/reshard.py b/torch/distributed/_shard/sharded_tensor/reshard.py index 2bc3d65e5c8cb..daef9c3586184 100644 --- a/torch/distributed/_shard/sharded_tensor/reshard.py +++ b/torch/distributed/_shard/sharded_tensor/reshard.py @@ -4,7 +4,7 @@ import torch import torch.distributed as dist import torch.distributed._shard.sharding_spec as shard_spec -from torch.distributed._distributed_c10d import ProcessGroup +from torch._C._distributed_c10d import ProcessGroup from torch.distributed._shard.metadata import ShardMetadata from torch.distributed._shard.sharding_spec._internals import ( get_chunked_dim_size, diff --git a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py index f02563619d2fa..61808d0adf62a 100644 --- a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py +++ b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py @@ -4,7 +4,7 @@ import torch import torch.distributed as dist -from torch.distributed._distributed_c10d import ReduceOp +from torch._C._distributed_c10d import ReduceOp from torch.distributed._shard.sharded_tensor import ShardedTensor from torch.distributed._shard.sharding_spec import ChunkShardingSpec from torch.distributed._shard.sharding_spec.api import custom_sharding_spec_op diff --git a/torch/distributed/_symmetric_memory/__init__.py b/torch/distributed/_symmetric_memory/__init__.py index 8154cd9809139..43c2959fdd8d1 100644 --- a/torch/distributed/_symmetric_memory/__init__.py +++ b/torch/distributed/_symmetric_memory/__init__.py @@ -15,12 +15,7 @@ import torch.distributed._functional_collectives as funcol import torch.distributed.distributed_c10d as c10d from torch._C._autograd import DeviceType -from torch.distributed._distributed_c10d import ( - _register_work, - _SymmetricMemory, - ProcessGroup, - Work as _Work, -) +from torch._C._distributed_c10d import _SymmetricMemory, Work as _Work _group_name_to_store: dict[str, c10d.Store] = {} @@ -1493,7 +1488,7 @@ def _low_contention_all_gather( src_buf = symm_mem.get_buffer(remote_rank, tensor.shape, tensor.dtype) chunks[remote_rank].copy_(src_buf) symm_mem.barrier() - _register_work(output, Work()) + torch._C._distributed_c10d._register_work(output, Work()) return output @@ -1541,7 +1536,7 @@ def _low_contention_reduce_scatter_with_symm_mem_input( ret = ret.mean(dim=0) else: raise ValueError(f"reduce_op ({reduce_op}) is not supported") - _register_work(ret, Work()) + torch._C._distributed_c10d._register_work(ret, Work()) return ret @@ -1576,7 +1571,7 @@ def _low_contention_reduce_scatter_with_workspace( ret = ret.mean(dim=0) else: raise ValueError(f"reduce_op ({reduce_op}) is not supported") - _register_work(ret, Work()) + torch._C._distributed_c10d._register_work(ret, Work()) return ret @@ -1654,6 +1649,7 @@ def _all_to_all_vdev_2d_offset_meta( if TYPE_CHECKING: + from torch._C._distributed_c10d import ProcessGroup from torch.types import _device, _dtype, _int @@ -1731,6 +1727,8 @@ def rendezvous( group (Union[str, :class:`torch.distributed.ProcessGroup`]): The group identifying the participating processes. This can be either a group name or a process group object. """ + from torch._C._distributed_c10d import ProcessGroup + if isinstance(group, str): group_name = group elif isinstance(group, ProcessGroup): @@ -1748,7 +1746,11 @@ def is_nvshmem_available() -> bool: Check if NVSHMEM is available in current build and on current system. """ - from torch.distributed._distributed_c10d import _is_nvshmem_available + try: + from torch._C._distributed_c10d import _is_nvshmem_available + except ImportError: + # Not all builds have NVSHMEM support. + return False # Check if NVSHMEM is available on current system. return _is_nvshmem_available() diff --git a/torch/distributed/_symmetric_memory/_nvshmem_triton.py b/torch/distributed/_symmetric_memory/_nvshmem_triton.py index 7b7828227d7d1..c543fdffc1c76 100644 --- a/torch/distributed/_symmetric_memory/_nvshmem_triton.py +++ b/torch/distributed/_symmetric_memory/_nvshmem_triton.py @@ -75,7 +75,7 @@ def enable_triton(lib_dir: Optional[str] = None) -> dict[str, str]: """ import triton - from torch.distributed._distributed_c10d import _nvshmemx_cumodule_init + from torch._C._distributed_c10d import _nvshmemx_cumodule_init if lib_dir is not None: lib_path = os.path.join(lib_dir, "libnvshmem_device.bc") diff --git a/torch/distributed/_tools/fake_collectives.py b/torch/distributed/_tools/fake_collectives.py index b89970ab33480..3b201b395334b 100644 --- a/torch/distributed/_tools/fake_collectives.py +++ b/torch/distributed/_tools/fake_collectives.py @@ -2,9 +2,7 @@ from typing import Any import torch - -# Import centralized distributed components -from torch.distributed._distributed_c10d import ( +from torch._C._distributed_c10d import ( _resolve_process_group, FakeWork, ProcessGroup, diff --git a/torch/distributed/algorithms/model_averaging/utils.py b/torch/distributed/algorithms/model_averaging/utils.py index 3e3243002a9c0..fa8cc184eddc5 100644 --- a/torch/distributed/algorithms/model_averaging/utils.py +++ b/torch/distributed/algorithms/model_averaging/utils.py @@ -5,6 +5,10 @@ import torch import torch.distributed as dist + +# The two imports below are not always available depending on the +# USE_DISTRIBUTED compile flag. Make sure they raise import error +# if we're trying to use them. from torch.distributed import group, ProcessGroup diff --git a/torch/distributed/constants.py b/torch/distributed/constants.py index bfa8785218645..c1e604bc86753 100644 --- a/torch/distributed/constants.py +++ b/torch/distributed/constants.py @@ -1,11 +1,7 @@ from datetime import timedelta from typing import Optional -# Import from centralized fallback module - no ImportError handling needed -from torch.distributed._distributed_c10d import ( - _DEFAULT_PG_NCCL_TIMEOUT, - _DEFAULT_PG_TIMEOUT, -) +from torch._C._distributed_c10d import _DEFAULT_PG_TIMEOUT __all__ = ["default_pg_timeout", "default_pg_nccl_timeout"] @@ -20,4 +16,11 @@ # Later, we could consider merging them back together at the c++ layer if we can align on a same value. # (only if TORCH_NCCL_BLOCKING_WAIT or TORCH_NCCL_ASYNC_ERROR_HANDLING is set to 1). -default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT +try: + from torch._C._distributed_c10d import _DEFAULT_PG_NCCL_TIMEOUT + + default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT +except ImportError: + # if C++ NCCL support is not compiled, we don't have access to the default nccl value. + # if anyone is actually trying to use nccl in this state, it should error. + default_pg_nccl_timeout = None diff --git a/torch/distributed/device_mesh.py b/torch/distributed/device_mesh.py index 6564acc303812..1907f730c2e64 100644 --- a/torch/distributed/device_mesh.py +++ b/torch/distributed/device_mesh.py @@ -11,14 +11,35 @@ from typing import Optional, TYPE_CHECKING, Union import torch +from torch.distributed import is_available from torch.utils._typing_utils import not_none __all__ = ["init_device_mesh", "DeviceMesh"] -if True: # just to temporarily avoid reindentation - from torch.distributed._distributed_c10d import Backend as C10dBackend +if not is_available(): + import sys + + # We need to create the stubs when distributed is not available. + # Otherwise, we would fail the doc tests (```./.ci/pytorch/docs-test.sh```), + # since it would try to import ``torch.distributed.device_mesh`` or + # ``torch.distributed.init_device_mesh`` but cannot find them. + + class _DeviceMeshStub: + pass + + def _init_device_mesh_stub(): + pass + + sys.modules["torch.distributed.device_mesh"].DeviceMesh = _DeviceMeshStub # type: ignore[attr-defined] + sys.modules[ + "torch.distributed.device_mesh" + ].init_device_mesh = _init_device_mesh_stub # type: ignore[attr-defined] + + +else: + from torch._C._distributed_c10d import Backend as C10dBackend from torch.distributed.distributed_c10d import ( _get_default_group, _resolve_process_group, @@ -512,16 +533,15 @@ def _setup_world_group_and_device(self): # heuristic to set the current cuda/cuda-like device base on num of gpu devices available in each host # NOTE: This device selection would only work for homogeneous hardware. num_devices_per_host = device_handle.device_count() - if num_devices_per_host: - if ( - world_size > num_devices_per_host - and world_size % num_devices_per_host != 0 - ): - raise RuntimeError( - f"DeviceMesh only support homogeneous hardware, but found " - f"{world_size} ranks and {num_devices_per_host} {self.device_type} devices!" - ) - device_handle.set_device(get_rank() % num_devices_per_host) + if ( + world_size > num_devices_per_host + and world_size % num_devices_per_host != 0 + ): + raise RuntimeError( + f"DeviceMesh only support homogeneous hardware, but found " + f"{world_size} ranks and {num_devices_per_host} {self.device_type} devices!" + ) + device_handle.set_device(get_rank() % num_devices_per_host) return _get_default_group() diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 40660b41fe3eb..14790e5dba8af 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -19,21 +19,13 @@ from typing_extensions import deprecated import torch -import torch.distributed._distributed_c10d as _c10d from torch._C import _DistStoreError as DistStoreError -from torch._utils_internal import set_pytorch_distributed_envs_from_justknobs -from torch.distributed._distributed_c10d import ( # Process group implementations; Availability flags +from torch._C._distributed_c10d import ( _DistributedBackendOptions, - _GLOO_AVAILABLE, - _MPI_AVAILABLE, - _NCCL_AVAILABLE, - _ProcessGroupWrapper, _register_process_group, _resolve_process_group, - _UCC_AVAILABLE, _unregister_all_process_groups, _unregister_process_group, - _XCCL_AVAILABLE, AllgatherOptions, AllreduceCoalescedOptions, AllreduceOptions, @@ -45,11 +37,6 @@ get_debug_level, PrefixStore, ProcessGroup, - ProcessGroupGloo, - ProcessGroupMPI, - ProcessGroupNCCL, - ProcessGroupUCC, - ProcessGroupXCCL, ReduceOp, ReduceOptions, ReduceScatterOptions, @@ -57,6 +44,7 @@ Store, Work, ) +from torch._utils_internal import set_pytorch_distributed_envs_from_justknobs from torch.monitor import _WaitCounter from torch.overrides import handle_torch_function, has_torch_function from torch.utils._typing_utils import not_none @@ -143,11 +131,17 @@ "split_group", ] +_MPI_AVAILABLE = True +_NCCL_AVAILABLE = True +_GLOO_AVAILABLE = True +_UCC_AVAILABLE = True +_XCCL_AVAILABLE = True + _pickler = pickle.Pickler _unpickler = pickle.Unpickler -# Change __module__ of all imported types from the distributed wrapper that are public +# Change __module__ of all imported types from torch._C._distributed_c10d that are public def _export_c_types() -> None: _public_types_to_change_module = [ AllreduceCoalescedOptions, @@ -173,26 +167,45 @@ def _export_c_types() -> None: _export_c_types() -# Add process groups to __all__ and set their module based on availability -if _MPI_AVAILABLE: +try: + from torch._C._distributed_c10d import ProcessGroupMPI + ProcessGroupMPI.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupMPI"] +except ImportError: + _MPI_AVAILABLE = False + +try: + from torch._C._distributed_c10d import ProcessGroupNCCL -if _NCCL_AVAILABLE: ProcessGroupNCCL.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupNCCL"] +except ImportError: + _NCCL_AVAILABLE = False + +try: + from torch._C._distributed_c10d import _ProcessGroupWrapper, ProcessGroupGloo -if _GLOO_AVAILABLE: ProcessGroupGloo.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupGloo"] +except ImportError: + _GLOO_AVAILABLE = False + +try: + from torch._C._distributed_c10d import ProcessGroupUCC -if _UCC_AVAILABLE: ProcessGroupUCC.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupUCC"] +except ImportError: + _UCC_AVAILABLE = False + +try: + from torch._C._distributed_c10d import ProcessGroupXCCL -if _XCCL_AVAILABLE: ProcessGroupXCCL.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupXCCL"] +except ImportError: + _XCCL_AVAILABLE = False logger = logging.getLogger(__name__) @@ -1312,8 +1325,7 @@ def _get_default_store() -> Store: def _update_default_pg(pg) -> None: _world.default_pg = pg rank = pg.rank() if pg is not None and pg != GroupMember.NON_GROUP_MEMBER else -1 - - _c10d._set_global_rank(rank) + torch._C._distributed_c10d._set_global_rank(rank) def get_backend_config(group: Optional[ProcessGroup] = None) -> str: @@ -1950,7 +1962,7 @@ def _new_process_group_helper( if device_id: pg.bound_device_id = device_id - backend_class: _c10d.Backend + backend_class: torch._C._distributed_c10d.Backend for device, backend_str in backend_config.get_device_backend_map().items(): # Use the group name as prefix in the default store, such that # a single store can be reused by multiple groups. @@ -3065,9 +3077,7 @@ def _object_to_tensor(obj, device, group): if get_debug_level() == DebugLevel.DETAIL and is_nccl_available(): backend = get_backend(group) if backend == Backend.NCCL: - from torch.distributed._distributed_c10d import _hash_tensors - - hash = _hash_tensors([byte_tensor]) + hash = torch._C._distributed_c10d._hash_tensors([byte_tensor]) logger.warning( "_object_to_tensor size: %s hash value: %s", byte_tensor.numel(), @@ -3082,9 +3092,7 @@ def _tensor_to_object(tensor, tensor_size, group): if get_debug_level() == DebugLevel.DETAIL and is_nccl_available(): backend = get_backend(group) if backend == Backend.NCCL: - from torch.distributed._distributed_c10d import _hash_tensors - - hash = _hash_tensors([tensor]) + hash = torch._C._distributed_c10d._hash_tensors([tensor]) logger.warning( "_tensor_to_object size: %s hash value: %s", tensor.numel(), hash ) @@ -4961,7 +4969,7 @@ def monitored_barrier( def _create_process_group_wrapper( - wrapped_pg: _c10d.Backend, + wrapped_pg: torch._C._distributed_c10d.Backend, store_prefix: str, store: Store, rank: int, diff --git a/torch/distributed/elastic/control_plane.py b/torch/distributed/elastic/control_plane.py index 63334a0ca3f62..817255edd23dc 100644 --- a/torch/distributed/elastic/control_plane.py +++ b/torch/distributed/elastic/control_plane.py @@ -14,7 +14,7 @@ @contextmanager def _worker_server(socket_path: str) -> Generator[None, None, None]: - from torch.distributed._distributed_c10d import _WorkerServer + from torch._C._distributed_c10d import _WorkerServer server = _WorkerServer(socket_path) try: diff --git a/torch/distributed/nn/functional.py b/torch/distributed/nn/functional.py index 2bdf3fe2bdffd..eeff877260bcc 100644 --- a/torch/distributed/nn/functional.py +++ b/torch/distributed/nn/functional.py @@ -2,6 +2,10 @@ import torch import torch.distributed as dist from torch.autograd import Function + +# The two imports below are not always available depending on the +# USE_DISTRIBUTED compile flag. Make sure they raise import error +# if we're trying to use them. from torch.distributed import group, ReduceOp diff --git a/torch/distributed/rpc/__init__.py b/torch/distributed/rpc/__init__.py index 27a945a92e44c..adf901d6b6e3e 100644 --- a/torch/distributed/rpc/__init__.py +++ b/torch/distributed/rpc/__init__.py @@ -37,6 +37,7 @@ def is_available() -> bool: import numbers import torch.distributed.autograd as dist_autograd + from torch._C._distributed_c10d import Store from torch._C._distributed_rpc import ( # noqa: F401 _cleanup_python_rpc_handler, _DEFAULT_INIT_METHOD, @@ -69,7 +70,6 @@ def is_available() -> bool: RpcBackendOptions, WorkerInfo, ) - from torch.distributed._distributed_c10d import Store if _is_tensorpipe_available: from torch._C._distributed_rpc import ( # noqa: F401 diff --git a/torch/distributed/tensor/_collective_utils.py b/torch/distributed/tensor/_collective_utils.py index f01836c59592b..4fce6fea538a6 100644 --- a/torch/distributed/tensor/_collective_utils.py +++ b/torch/distributed/tensor/_collective_utils.py @@ -8,10 +8,8 @@ import torch import torch.distributed._functional_collectives as funcol import torch.distributed.tensor._dtensor_spec as dtensor_spec +from torch._C._distributed_c10d import _resolve_process_group from torch._logging import warning_once - -# Import from centralized fallback module - no conditional imports needed -from torch.distributed._distributed_c10d import _resolve_process_group from torch.distributed.device_mesh import _mesh_resources, DeviceMesh from torch.distributed.distributed_c10d import ( _get_group_size_by_name, diff --git a/torch/testing/_internal/distributed/fake_pg.py b/torch/testing/_internal/distributed/fake_pg.py index 035a8bb7c586d..0a2814c246459 100644 --- a/torch/testing/_internal/distributed/fake_pg.py +++ b/torch/testing/_internal/distributed/fake_pg.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import torch.distributed as dist -from torch.distributed._distributed_c10d import FakeProcessGroup +from torch._C._distributed_c10d import FakeProcessGroup class FakeStore(dist.Store): From 5ac112b56980dfda80af280d0e8b8d9a65b7eeaa Mon Sep 17 00:00:00 2001 From: Rob Timpe Date: Wed, 10 Sep 2025 00:38:20 +0000 Subject: [PATCH 027/693] [dynamo] Graph break on on user-defined class in compiled region (#161670) Currently, user-defined classes inside of a compiled frame will cause the whole frame to be skipped by dynamo. This change defers the Unsupported exception until the __build_class__ builtin is actually called, which allows a graph break to be inserted. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161670 Approved by: https://github.com/williamwen42, https://github.com/guilhermeleobas --- test/dynamo/test_error_messages.py | 12 ++++++------ test/dynamo/test_misc.py | 16 ++++++++++++++++ ...p_True => AOTFxirTestCase.test_aoti_fx_const} | 0 .../CPython313-test_bool-BoolTest.test_blocked | 0 ...on313-test_bool-BoolTest.test_convert_to_bool | 0 ...t_collections-TestCollectionABCs.test_Mapping | 0 ...tions-TestCollectionABCs.test_Sequence_mixins | 0 ...-test_collections-TestCollectionABCs.test_Set | 0 ...ons-TestCollectionABCs.test_Set_from_iterable | 0 ...ABCs.test_Set_interoperability_with_real_sets | 0 ...ollections-TestCollectionABCs.test_issue16373 | 0 ...st_collections-TestCounter.test_copy_subclass | 0 ...llections-TestOneTrickPonyABCs.test_Generator | 0 ...-test_contextlib-ClosingTestCase.test_closing | 0 ...contextlib-ClosingTestCase.test_closing_error | 0 ...rTestCase.test_contextmanager_except_stopiter | 0 ...textlib-ContextManagerTestCase.test_nokeepref | 0 ...tAbstractContextManager.test_exit_is_abstract | 0 ...textlib-TestAbstractContextManager.test_slots | 0 ...b-TestContextDecorator.test_decorating_method | 0 ...ntextlib-TestContextDecorator.test_typo_enter | 0 ...ontextlib-TestContextDecorator.test_typo_exit | 0 ...-TestExitStack.test_dont_reraise_RuntimeError | 0 ...t_contextlib-TestExitStack.test_enter_context | 0 ...xtlib-TestExitStack.test_enter_context_errors | 0 ...tStack.test_exit_exception_chaining_reference | 0 ...ack.test_exit_exception_explicit_none_context | 0 ...contextlib-TestExitStack.test_instance_bypass | 0 ...on313-test_contextlib-TestExitStack.test_push | 0 ...faultdict-TestDefaultDict.test_recursive_repr | 0 .../CPython313-test_dict-DictTest.test_bad_key | 0 ...st_dict-DictTest.test_copy_maintains_tracking | 0 ...ict-DictTest.test_dict_contain_use_after_free | 0 ...on313-test_dict-DictTest.test_dict_copy_order | 0 .../CPython313-test_dict-DictTest.test_eq | 0 ...ictTest.test_equal_operator_modifying_operand | 0 ...ictTest.test_errors_in_view_containment_check | 0 ...test_fromkeys_operator_modifying_dict_operand | 0 ....test_fromkeys_operator_modifying_set_operand | 0 .../CPython313-test_dict-DictTest.test_getitem | 0 ...3-test_dict-DictTest.test_init_use_after_free | 0 ...tTest.test_instance_dict_getattr_str_subclass | 0 ..._dict-DictTest.test_invalid_keyword_arguments | 0 ...n313-test_dict-DictTest.test_merge_and_mutate | 0 .../CPython313-test_dict-DictTest.test_missing | 0 ...on313-test_dict-DictTest.test_mutating_lookup | 0 ...t_object_set_item_single_instance_non_str_key | 0 ...tTest.test_oob_indexing_dictiter_iternextitem | 0 .../CPython313-test_dict-DictTest.test_pop | 0 .../CPython313-test_dict-DictTest.test_resize2 | 0 ...test_reverse_iterator_for_shared_shared_dicts | 0 ...CPython313-test_dict-DictTest.test_setdefault | 0 ...313-test_dict-DictTest.test_setdefault_atomic | 0 ...t_dict-DictTest.test_setitem_atomic_at_resize | 0 ...Test.test_splittable_to_generic_combinedtable | 0 ...313-test_dict-DictTest.test_splittable_update | 0 ...CPython313-test_dict-DictTest.test_str_nonstr | 0 ...thon313-test_dict-DictTest.test_views_mapping | 0 ..._float-GeneralFloatCases.test_floatconversion | 0 ...t-GeneralFloatCases.test_keywords_in_subclass | 0 ...eneralFloatCases.test_non_numeric_input_types | 0 ...313-test_float-HexFloatTestCase.test_subclass | 0 ...test_int-IntTestCases.test_int_base_indexable | 0 ...nt-IntTestCases.test_int_returns_int_subclass | 0 ...int-IntTestCases.test_int_subclass_with_index | 0 ...t_int-IntTestCases.test_int_subclass_with_int | 0 ...n313-test_int-IntTestCases.test_intconversion | 0 ...int-IntTestCases.test_non_numeric_input_types | 0 .../CPython313-test_iter-TestCase.test_3720 | 0 ...test_iter-TestCase.test_ref_counting_behavior | 0 ...thon313-test_iter-TestCase.test_stop_sequence | 0 ...-test_iter-TestCase.test_unicode_join_endcase | 0 ...ython313-test_list-ListTest.test_constructors | 0 ...hon313-test_list-ListTest.test_contains_order | 0 ...istTest.test_equal_operator_modifying_operand | 0 .../CPython313-test_list-ListTest.test_extend | 0 ...-test_list-ListTest.test_keywords_in_subclass | 0 ...313-test_list-ListTest.test_no_comdat_folding | 0 ...Python313-test_list-ListTest.test_repr_mutate | 0 .../CPython313-test_math-MathTests.testCeil | 0 .../CPython313-test_math-MathTests.testFloor | 0 ...Python313-test_math-MathTests.test_issue39871 | 0 ...on313-test_math-MathTests.test_sumprod_stress | 0 .../CPython313-test_math-MathTests.test_trunc | 0 ...ator-CCOperatorPickleTestCase.test_attrgetter | 0 ...or-CCOperatorPickleTestCase.test_methodcaller | 0 ...st_operator-COperatorTestCase.test_attrgetter | 0 ...on313-test_operator-COperatorTestCase.test_eq | 0 ...13-test_operator-COperatorTestCase.test_index | 0 ...-test_operator-COperatorTestCase.test_inplace | 0 ...t_operator-COperatorTestCase.test_length_hint | 0 ...on313-test_operator-COperatorTestCase.test_ne | 0 ...313-test_operator-COperatorTestCase.test_not_ | 0 ...13-test_operator-COperatorTestCase.test_truth | 0 ...tor-CPyOperatorPickleTestCase.test_attrgetter | 0 ...r-CPyOperatorPickleTestCase.test_methodcaller | 0 ...tor-PyCOperatorPickleTestCase.test_attrgetter | 0 ...r-PyCOperatorPickleTestCase.test_methodcaller | 0 ...t_operator-PyOperatorTestCase.test_attrgetter | 0 ...3-test_operator-PyOperatorTestCase.test_index | 0 ...test_operator-PyOperatorTestCase.test_inplace | 0 ..._operator-PyOperatorTestCase.test_length_hint | 0 ...-test_operator-PyOperatorTestCase.test_matmul | 0 ...operator-PyOperatorTestCase.test_methodcaller | 0 ...13-test_operator-PyOperatorTestCase.test_not_ | 0 ...or-PyPyOperatorPickleTestCase.test_attrgetter | 0 ...-PyPyOperatorPickleTestCase.test_methodcaller | 0 ...nBuiltinDictTests.test_delitem_hash_collision | 0 ...nBuiltinDictTests.test_highly_nested_subclass | 0 ...DictSubclassTests.test_delitem_hash_collision | 0 ...DictSubclassTests.test_highly_nested_subclass | 0 ...ythonOrderedDictSubclassTests.test_init_calls | 0 ....test_issue119004_change_linked_list_by_clear | 0 ..._issue119004_change_linked_list_by_delete_key | 0 ...ssTests.test_issue119004_change_size_by_clear | 0 ...ts.test_issue119004_change_size_by_delete_key | 0 ...ue119004_change_size_by_delete_key_in_dict_eq | 0 ...ythonOrderedDictSubclassTests.test_issue24347 | 0 ...ythonOrderedDictSubclassTests.test_issue24348 | 0 ...nOrderedDictTests.test_delitem_hash_collision | 0 ...nOrderedDictTests.test_highly_nested_subclass | 0 ..._dict-CPythonOrderedDictTests.test_init_calls | 0 ....test_issue119004_change_linked_list_by_clear | 0 ..._issue119004_change_linked_list_by_delete_key | 0 ...ctTests.test_issue119004_change_size_by_clear | 0 ...ts.test_issue119004_change_size_by_delete_key | 0 ...ue119004_change_size_by_delete_key_in_dict_eq | 0 ..._dict-CPythonOrderedDictTests.test_issue24347 | 0 ..._dict-CPythonOrderedDictTests.test_issue24348 | 0 ...DictSubclassTests.test_delitem_hash_collision | 0 ...DictSubclassTests.test_highly_nested_subclass | 0 ...ythonOrderedDictSubclassTests.test_init_calls | 0 ...ubclassTests.test_issue119004_attribute_error | 0 ...ythonOrderedDictSubclassTests.test_issue24347 | 0 ...ythonOrderedDictSubclassTests.test_issue24348 | 0 ...OrderedDictSubclassTests.test_overridden_init | 0 ...OrderedDictSubclassTests.test_override_update | 0 ...nOrderedDictTests.test_delitem_hash_collision | 0 ...nOrderedDictTests.test_highly_nested_subclass | 0 ...ct-PurePythonOrderedDictTests.test_init_calls | 0 ...redDictTests.test_issue119004_attribute_error | 0 ...ct-PurePythonOrderedDictTests.test_issue24347 | 0 ...ct-PurePythonOrderedDictTests.test_issue24348 | 0 ...rePythonOrderedDictTests.test_overridden_init | 0 ...rePythonOrderedDictTests.test_override_update | 0 ...aryOpsMutating_Set_Set.test_and_with_mutation | 0 ...naryOpsMutating_Set_Set.test_eq_with_mutation | 0 ...naryOpsMutating_Set_Set.test_ge_with_mutation | 0 ...naryOpsMutating_Set_Set.test_gt_with_mutation | 0 ...ryOpsMutating_Set_Set.test_iadd_with_mutation | 0 ...aryOpsMutating_Set_Set.test_ior_with_mutation | 0 ...ryOpsMutating_Set_Set.test_isub_with_mutation | 0 ...Mutating_Set_Set.test_iteration_with_mutation | 0 ...ryOpsMutating_Set_Set.test_ixor_with_mutation | 0 ...naryOpsMutating_Set_Set.test_le_with_mutation | 0 ...naryOpsMutating_Set_Set.test_lt_with_mutation | 0 ...naryOpsMutating_Set_Set.test_ne_with_mutation | 0 ...naryOpsMutating_Set_Set.test_or_with_mutation | 0 ...aryOpsMutating_Set_Set.test_sub_with_mutation | 0 ...aryOpsMutating_Set_Set.test_xor_with_mutation | 0 ...sMutating_Set_Subclass.test_and_with_mutation | 0 ...psMutating_Set_Subclass.test_eq_with_mutation | 0 ...psMutating_Set_Subclass.test_ge_with_mutation | 0 ...psMutating_Set_Subclass.test_gt_with_mutation | 0 ...Mutating_Set_Subclass.test_iadd_with_mutation | 0 ...sMutating_Set_Subclass.test_ior_with_mutation | 0 ...Mutating_Set_Subclass.test_isub_with_mutation | 0 ...ing_Set_Subclass.test_iteration_with_mutation | 0 ...Mutating_Set_Subclass.test_ixor_with_mutation | 0 ...psMutating_Set_Subclass.test_le_with_mutation | 0 ...psMutating_Set_Subclass.test_lt_with_mutation | 0 ...psMutating_Set_Subclass.test_ne_with_mutation | 0 ...psMutating_Set_Subclass.test_or_with_mutation | 0 ...sMutating_Set_Subclass.test_sub_with_mutation | 0 ...sMutating_Set_Subclass.test_xor_with_mutation | 0 ...sMutating_Subclass_Set.test_and_with_mutation | 0 ...psMutating_Subclass_Set.test_eq_with_mutation | 0 ...psMutating_Subclass_Set.test_ge_with_mutation | 0 ...psMutating_Subclass_Set.test_gt_with_mutation | 0 ...Mutating_Subclass_Set.test_iadd_with_mutation | 0 ...sMutating_Subclass_Set.test_ior_with_mutation | 0 ...Mutating_Subclass_Set.test_isub_with_mutation | 0 ...ing_Subclass_Set.test_iteration_with_mutation | 0 ...Mutating_Subclass_Set.test_ixor_with_mutation | 0 ...psMutating_Subclass_Set.test_le_with_mutation | 0 ...psMutating_Subclass_Set.test_lt_with_mutation | 0 ...psMutating_Subclass_Set.test_ne_with_mutation | 0 ...psMutating_Subclass_Set.test_or_with_mutation | 0 ...sMutating_Subclass_Set.test_sub_with_mutation | 0 ...sMutating_Subclass_Set.test_xor_with_mutation | 0 ...ting_Subclass_Subclass.test_and_with_mutation | 0 ...ating_Subclass_Subclass.test_eq_with_mutation | 0 ...ating_Subclass_Subclass.test_ge_with_mutation | 0 ...ating_Subclass_Subclass.test_gt_with_mutation | 0 ...ing_Subclass_Subclass.test_iadd_with_mutation | 0 ...ting_Subclass_Subclass.test_ior_with_mutation | 0 ...ing_Subclass_Subclass.test_isub_with_mutation | 0 ...ubclass_Subclass.test_iteration_with_mutation | 0 ...ing_Subclass_Subclass.test_ixor_with_mutation | 0 ...ating_Subclass_Subclass.test_le_with_mutation | 0 ...ating_Subclass_Subclass.test_lt_with_mutation | 0 ...ating_Subclass_Subclass.test_ne_with_mutation | 0 ...ating_Subclass_Subclass.test_or_with_mutation | 0 ...ting_Subclass_Subclass.test_sub_with_mutation | 0 ...ting_Subclass_Subclass.test_xor_with_mutation | 0 ...est_set-TestFrozenSet.test_container_iterator | 0 ...ython313-test_set-TestFrozenSet.test_deepcopy | 0 .../CPython313-test_set-TestFrozenSet.test_gc | 0 ...-TestFrozenSet.test_subclass_with_custom_hash | 0 ...TestFrozenSetSubclass.test_container_iterator | 0 ...-test_set-TestFrozenSetSubclass.test_deepcopy | 0 ...hon313-test_set-TestFrozenSetSubclass.test_gc | 0 ...stFrozenSetSubclass.test_keywords_in_subclass | 0 ...zenSetSubclass.test_subclass_with_custom_hash | 0 ...Set_Dict.test_difference_update_with_mutation | 0 ...tating_Set_Dict.test_difference_with_mutation | 0 ...t_Dict.test_intersection_update_with_mutation | 0 ...ting_Set_Dict.test_intersection_with_mutation | 0 ...tating_Set_Dict.test_isdisjoint_with_mutation | 0 ...Mutating_Set_Dict.test_issubset_with_mutation | 0 ...tating_Set_Dict.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ..._Dict.test_symmetric_difference_with_mutation | 0 ...odsMutating_Set_Dict.test_union_with_mutation | 0 ...dsMutating_Set_Dict.test_update_with_mutation | 0 ...Set_List.test_difference_update_with_mutation | 0 ...tating_Set_List.test_difference_with_mutation | 0 ...t_List.test_intersection_update_with_mutation | 0 ...ting_Set_List.test_intersection_with_mutation | 0 ...tating_Set_List.test_isdisjoint_with_mutation | 0 ...Mutating_Set_List.test_issubset_with_mutation | 0 ...tating_Set_List.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ..._List.test_symmetric_difference_with_mutation | 0 ...odsMutating_Set_List.test_union_with_mutation | 0 ...dsMutating_Set_List.test_update_with_mutation | 0 ..._Set_Set.test_difference_update_with_mutation | 0 ...utating_Set_Set.test_difference_with_mutation | 0 ...et_Set.test_intersection_update_with_mutation | 0 ...ating_Set_Set.test_intersection_with_mutation | 0 ...utating_Set_Set.test_isdisjoint_with_mutation | 0 ...sMutating_Set_Set.test_issubset_with_mutation | 0 ...utating_Set_Set.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...t_Set.test_symmetric_difference_with_mutation | 0 ...hodsMutating_Set_Set.test_union_with_mutation | 0 ...odsMutating_Set_Set.test_update_with_mutation | 0 ...Subclass.test_difference_update_with_mutation | 0 ...ng_Set_Subclass.test_difference_with_mutation | 0 ...bclass.test_intersection_update_with_mutation | 0 ..._Set_Subclass.test_intersection_with_mutation | 0 ...ng_Set_Subclass.test_isdisjoint_with_mutation | 0 ...ting_Set_Subclass.test_issubset_with_mutation | 0 ...ng_Set_Subclass.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...class.test_symmetric_difference_with_mutation | 0 ...utating_Set_Subclass.test_union_with_mutation | 0 ...tating_Set_Subclass.test_update_with_mutation | 0 ...lass_Set.test_difference_update_with_mutation | 0 ...ng_Subclass_Set.test_difference_with_mutation | 0 ...ss_Set.test_intersection_update_with_mutation | 0 ..._Subclass_Set.test_intersection_with_mutation | 0 ...ng_Subclass_Set.test_isdisjoint_with_mutation | 0 ...ting_Subclass_Set.test_issubset_with_mutation | 0 ...ng_Subclass_Set.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...s_Set.test_symmetric_difference_with_mutation | 0 ...utating_Subclass_Set.test_union_with_mutation | 0 ...tating_Subclass_Set.test_update_with_mutation | 0 ...Subclass.test_difference_update_with_mutation | 0 ...bclass_Subclass.test_difference_with_mutation | 0 ...bclass.test_intersection_update_with_mutation | 0 ...lass_Subclass.test_intersection_with_mutation | 0 ...bclass_Subclass.test_isdisjoint_with_mutation | 0 ...Subclass_Subclass.test_issubset_with_mutation | 0 ...bclass_Subclass.test_issuperset_with_mutation | 0 ...est_symmetric_difference_update_with_mutation | 0 ...class.test_symmetric_difference_with_mutation | 0 ...ng_Subclass_Subclass.test_union_with_mutation | 0 ...g_Subclass_Subclass.test_update_with_mutation | 0 ...n313-test_set-TestSet.test_container_iterator | 0 .../CPython313-test_set-TestSet.test_deepcopy | 0 .../CPython313-test_set-TestSet.test_gc | 0 ...CPython313-test_set-TestSet.test_rich_compare | 0 ...st_set-TestSet.test_subclass_with_custom_hash | 0 ...t_set-TestSetSubclass.test_container_iterator | 0 ...hon313-test_set-TestSetSubclass.test_deepcopy | 0 .../CPython313-test_set-TestSetSubclass.test_gc | 0 ...set-TestSetSubclass.test_keywords_in_subclass | 0 ...13-test_set-TestSetSubclass.test_rich_compare | 0 ...estSetSubclass.test_subclass_with_custom_hash | 0 ...-test_set-TestWeirdBugs.test_merge_and_mutate | 0 ...CPython313-test_sort-TestBase.testStressfully | 0 .../CPython313-test_sort-TestBugs.test_bug453523 | 0 ...rateSortUndecorate.test_key_with_mutating_del | 0 ...tOptimizedCompares.test_unsafe_object_compare | 0 ...hon313-test_tuple-TupleTest.test_constructors | 0 ...n313-test_tuple-TupleTest.test_contains_order | 0 ...est_tuple-TupleTest.test_keywords_in_subclass | 0 ...3-test_tuple-TupleTest.test_no_comdat_folding | 0 ...n313-test_tuple-TupleTest.test_track_subtypes | 0 ...CPython313-test_userdict-UserDictTest.test_eq | 0 ...est_userlist-UserListTest.test_contains_order | 0 ...est_with-ExceptionalTestCase.testErrorsInBool | 0 ...-ExceptionalTestCase.testRaisedStopIteration2 | 0 ...with-FailureTestCase.testEnterAttributeError1 | 0 ...with-FailureTestCase.testEnterAttributeError2 | 0 ...t_with-FailureTestCase.testExitAttributeError | 0 .../TestAutograd.test_anomaly_detect_nan | 0 .../TestAutograd.test_autograd_print_tensor | 0 ...eckpointing_without_reentrant_with_context_fn | 0 ...ograd.test_custom_autograd_repeated_grad_grad | 0 .../TestAutograd.test_inplace_not_requires_grad | 0 .../TestAutograd.test_lobpcg | 0 .../TestAutograd.test_mark_non_differentiable | 0 ...estAutograd.test_mark_non_differentiable_none | 0 ...d.test_naughty_autograd_function_stashing_ctx | 0 .../TestAutograd.test_return_leaf_inplace | 0 ...test_const_fold_basic_one_attr_name_collision | 0 ...t_const_fold_basic_one_attr_no_name_collision | 0 .../TestConstFold.test_const_fold_basic_two_attr | 0 ...ld.test_const_fold_basic_two_attr_three_input | 0 ....test_const_fold_has_inlined_call_module_node | 0 .../TestConstFold.test_const_fold_module_attr | 0 ...estConstFold.test_const_fold_submod_hierarchy | 0 ...tConstFold.test_const_fold_unused_placeholder | 0 .../TestConstFold.test_dict_output | 0 .../TestConstFold.test_fold_module | 0 .../TestConstFold.test_three_outputs | 0 .../TestConstFold.test_two_outputs | 0 ...raced.test_cond_merge_graph_preserves_ph_meta | 0 ...ched_branch_output_dynamic_True_backend_eager | 0 ...test_cond_symint_operands_requires_grad_False | 0 ....test_cond_symint_operands_requires_grad_True | 0 ...rolFlowTraced.test_while_loop_autograd_simple | 0 ...nJIT.test_cpp_frontend_module_python_inter_op | 0 ...cpp_frontend_module_python_inter_op_with_cuda | 0 ...not_raised_when_exception_source_is_submodule | 0 ...mericSuiteCoreAPIs.test_user_defined_function | 0 .../TestFlag.test_writeable_any_base | 0 ...estIndexing.test_broken_sequence_not_nd_index | 0 ...etrizations_and_params_single_param_swap_True | 0 .../TestPrivateUse1.test_backend_type_methods | 0 .../TestPythonDispatch.test_maybe_tuple_bug | 0 .../TestPythonDispatch.test_set_data | 0 ...tch.test_wrapper_subclass_extra_dispatch_keys | 0 ...st_functional_call_member_reference_stateless | 0 ...t_functional_call_member_reference_torch_func | 0 ...lies_module_and_param_specific_decorators_cpu | 0 ...nDeviceTypeCPU.test_ops_composition_names_cpu | 0 ..._applies_op_and_param_specific_decorators_cpu | 0 .../TestTorch.test_as_subclass | 0 .../TestTorch.test_storage_cycle_via_slots | 0 .../TestTorch.test_storage_finalizer_dealloc | 0 .../TestTorch.test_storage_slot_dealloc | 0 .../TestTorch.test_tensor_cycle_via_slots | 0 .../TestTorch.test_tensor_finalizer_dealloc | 0 .../TestTorch.test_tensor_slot_dealloc | 0 ...TestTorchFunctionMode.test_custom_device_type | 0 ...orchFunctionMode.test_disable_enable_subclass | 0 ...tTorchFunctionMode.test_disable_subclass_mode | 0 .../TestTorchFunctionMode.test_factory_override | 0 .../TestTorchFunctionOverride.test_pow_rpow | 0 .../TestAutograd.test_naughty_anomaly_access | 0 test/dynamo_skips/TestPythonPytree.test_key_str | 1 + ...chFunctionMode.test_disable_subclass_not_mode | 0 .../TestTorch.test_storage_cycle_via_slots | 0 .../TestTorch.test_storage_finalizer_dealloc | 0 .../TestTorch.test_storage_slot_dealloc | 0 .../TestTorch.test_tensor_cycle_via_slots | 0 .../TestTorch.test_tensor_finalizer_dealloc | 0 .../TestTorch.test_tensor_slot_dealloc | 0 torch/_dynamo/symbolic_convert.py | 10 +--------- 373 files changed, 24 insertions(+), 15 deletions(-) rename test/dynamo_expected_failures/{TestNNParametrization.test_new_spectral_norm_forward_swap_True => AOTFxirTestCase.test_aoti_fx_const} (100%) create mode 100644 test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_blocked create mode 100644 test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_convert_to_bool create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Mapping create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Sequence_mixins create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_from_iterable create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_interoperability_with_real_sets create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_issue16373 create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestCounter.test_copy_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_collections-TestOneTrickPonyABCs.test_Generator create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing_error create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_contextmanager_except_stopiter create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_nokeepref create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_exit_is_abstract create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_slots create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_decorating_method create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_enter create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_exit create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_dont_reraise_RuntimeError create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context_errors create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_chaining_reference create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_explicit_none_context create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_instance_bypass create mode 100644 test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_push create mode 100644 test/dynamo_expected_failures/CPython313-test_defaultdict-TestDefaultDict.test_recursive_repr create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_bad_key create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_copy_maintains_tracking create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_contain_use_after_free create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_copy_order create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_eq create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_equal_operator_modifying_operand create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_errors_in_view_containment_check create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_dict_operand create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_set_operand create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_getitem create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_init_use_after_free create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_instance_dict_getattr_str_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_invalid_keyword_arguments create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_merge_and_mutate create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_missing create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_mutating_lookup create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_object_set_item_single_instance_non_str_key create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_oob_indexing_dictiter_iternextitem create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_pop create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_resize2 create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_reverse_iterator_for_shared_shared_dicts create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault_atomic create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setitem_atomic_at_resize create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_to_generic_combinedtable create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_update create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_str_nonstr create mode 100644 test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_views_mapping create mode 100644 test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_floatconversion create mode 100644 test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_keywords_in_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_non_numeric_input_types create mode 100644 test/dynamo_expected_failures/CPython313-test_float-HexFloatTestCase.test_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_base_indexable create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_returns_int_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_index create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_int create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_intconversion create mode 100644 test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_non_numeric_input_types create mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_3720 create mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_ref_counting_behavior create mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_stop_sequence create mode 100644 test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_unicode_join_endcase create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_constructors create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_contains_order create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_equal_operator_modifying_operand create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_extend create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_keywords_in_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_no_comdat_folding create mode 100644 test/dynamo_expected_failures/CPython313-test_list-ListTest.test_repr_mutate create mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.testCeil create mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.testFloor create mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.test_issue39871 create mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.test_sumprod_stress create mode 100644 test/dynamo_expected_failures/CPython313-test_math-MathTests.test_trunc create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_methodcaller create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_eq create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_index create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_inplace create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_length_hint create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_ne create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_not_ create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_truth create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_methodcaller create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_methodcaller create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_index create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_inplace create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_length_hint create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_matmul create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_methodcaller create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_not_ create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_attrgetter create mode 100644 test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_methodcaller create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_delitem_hash_collision create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_highly_nested_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_delitem_hash_collision create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_highly_nested_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_init_calls create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_clear create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_delete_key create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_clear create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key_in_dict_eq create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24347 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24348 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_delitem_hash_collision create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_highly_nested_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_init_calls create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_clear create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_delete_key create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_clear create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key_in_dict_eq create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24347 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24348 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_delitem_hash_collision create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_highly_nested_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_init_calls create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue119004_attribute_error create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24347 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24348 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_overridden_init create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_override_update create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_delitem_hash_collision create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_highly_nested_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_init_calls create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue119004_attribute_error create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24347 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24348 create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_overridden_init create mode 100644 test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_override_update create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_and_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_eq_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ge_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_gt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iadd_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ior_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_isub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iteration_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ixor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_le_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_lt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ne_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_or_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_sub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_xor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_and_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_eq_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ge_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_gt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iadd_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ior_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_isub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iteration_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ixor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_le_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_lt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ne_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_or_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_sub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_xor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_and_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_eq_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ge_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_gt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iadd_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ior_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_isub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iteration_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ixor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_le_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_lt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ne_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_or_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_sub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_xor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_and_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_eq_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ge_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_gt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iadd_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ior_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_isub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iteration_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ixor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_le_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_lt_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ne_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_or_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_sub_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_xor_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_container_iterator create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_deepcopy create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_gc create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_subclass_with_custom_hash create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_container_iterator create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_deepcopy create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_gc create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_keywords_in_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_subclass_with_custom_hash create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_isdisjoint_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issubset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issuperset_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_union_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_update_with_mutation create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_container_iterator create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_deepcopy create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_gc create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_rich_compare create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSet.test_subclass_with_custom_hash create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_container_iterator create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_deepcopy create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_gc create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_keywords_in_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_rich_compare create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_subclass_with_custom_hash create mode 100644 test/dynamo_expected_failures/CPython313-test_set-TestWeirdBugs.test_merge_and_mutate create mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestBase.testStressfully create mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestBugs.test_bug453523 create mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestDecorateSortUndecorate.test_key_with_mutating_del create mode 100644 test/dynamo_expected_failures/CPython313-test_sort-TestOptimizedCompares.test_unsafe_object_compare create mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_constructors create mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_contains_order create mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_keywords_in_subclass create mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_no_comdat_folding create mode 100644 test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_track_subtypes create mode 100644 test/dynamo_expected_failures/CPython313-test_userdict-UserDictTest.test_eq create mode 100644 test/dynamo_expected_failures/CPython313-test_userlist-UserListTest.test_contains_order create mode 100644 test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testErrorsInBool create mode 100644 test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testRaisedStopIteration2 create mode 100644 test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError1 create mode 100644 test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError2 create mode 100644 test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testExitAttributeError create mode 100644 test/dynamo_expected_failures/TestAutograd.test_anomaly_detect_nan create mode 100644 test/dynamo_expected_failures/TestAutograd.test_autograd_print_tensor create mode 100644 test/dynamo_expected_failures/TestAutograd.test_checkpointing_without_reentrant_with_context_fn create mode 100644 test/dynamo_expected_failures/TestAutograd.test_custom_autograd_repeated_grad_grad create mode 100644 test/dynamo_expected_failures/TestAutograd.test_inplace_not_requires_grad create mode 100644 test/dynamo_expected_failures/TestAutograd.test_lobpcg create mode 100644 test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable create mode 100644 test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable_none create mode 100644 test/dynamo_expected_failures/TestAutograd.test_naughty_autograd_function_stashing_ctx create mode 100644 test/dynamo_expected_failures/TestAutograd.test_return_leaf_inplace create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_name_collision create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_no_name_collision create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr_three_input create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_has_inlined_call_module_node create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_module_attr create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_submod_hierarchy create mode 100644 test/dynamo_expected_failures/TestConstFold.test_const_fold_unused_placeholder create mode 100644 test/dynamo_expected_failures/TestConstFold.test_dict_output create mode 100644 test/dynamo_expected_failures/TestConstFold.test_fold_module create mode 100644 test/dynamo_expected_failures/TestConstFold.test_three_outputs create mode 100644 test/dynamo_expected_failures/TestConstFold.test_two_outputs create mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_merge_graph_preserves_ph_meta create mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_mismatched_branch_output_dynamic_True_backend_eager create mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_False create mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_True create mode 100644 test/dynamo_expected_failures/TestControlFlowTraced.test_while_loop_autograd_simple create mode 100644 test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op create mode 100644 test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op_with_cuda create mode 100644 test/dynamo_expected_failures/TestFX.test_custom_traceback_not_raised_when_exception_source_is_submodule create mode 100644 test/dynamo_expected_failures/TestFXNumericSuiteCoreAPIs.test_user_defined_function create mode 100644 test/dynamo_expected_failures/TestFlag.test_writeable_any_base create mode 100644 test/dynamo_expected_failures/TestIndexing.test_broken_sequence_not_nd_index create mode 100644 test/dynamo_expected_failures/TestNNParametrization.test_transfer_parametrizations_and_params_single_param_swap_True create mode 100644 test/dynamo_expected_failures/TestPrivateUse1.test_backend_type_methods create mode 100644 test/dynamo_expected_failures/TestPythonDispatch.test_maybe_tuple_bug create mode 100644 test/dynamo_expected_failures/TestPythonDispatch.test_set_data create mode 100644 test/dynamo_expected_failures/TestPythonDispatch.test_wrapper_subclass_extra_dispatch_keys create mode 100644 test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_stateless create mode 100644 test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_torch_func create mode 100644 test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_modules_decorator_applies_module_and_param_specific_decorators_cpu create mode 100644 test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_composition_names_cpu create mode 100644 test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_decorator_applies_op_and_param_specific_decorators_cpu create mode 100644 test/dynamo_expected_failures/TestTorch.test_as_subclass create mode 100644 test/dynamo_expected_failures/TestTorch.test_storage_cycle_via_slots create mode 100644 test/dynamo_expected_failures/TestTorch.test_storage_finalizer_dealloc create mode 100644 test/dynamo_expected_failures/TestTorch.test_storage_slot_dealloc create mode 100644 test/dynamo_expected_failures/TestTorch.test_tensor_cycle_via_slots create mode 100644 test/dynamo_expected_failures/TestTorch.test_tensor_finalizer_dealloc create mode 100644 test/dynamo_expected_failures/TestTorch.test_tensor_slot_dealloc create mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_custom_device_type create mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_enable_subclass create mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_subclass_mode create mode 100644 test/dynamo_expected_failures/TestTorchFunctionMode.test_factory_override create mode 100644 test/dynamo_expected_failures/TestTorchFunctionOverride.test_pow_rpow create mode 100644 test/dynamo_skips/TestAutograd.test_naughty_anomaly_access create mode 100644 test/dynamo_skips/TestPythonPytree.test_key_str create mode 100644 test/dynamo_skips/TestTorchFunctionMode.test_disable_subclass_not_mode create mode 100644 test/inductor_expected_failures/TestTorch.test_storage_cycle_via_slots create mode 100644 test/inductor_expected_failures/TestTorch.test_storage_finalizer_dealloc create mode 100644 test/inductor_expected_failures/TestTorch.test_storage_slot_dealloc create mode 100644 test/inductor_expected_failures/TestTorch.test_tensor_cycle_via_slots create mode 100644 test/inductor_expected_failures/TestTorch.test_tensor_finalizer_dealloc create mode 100644 test/inductor_expected_failures/TestTorch.test_tensor_slot_dealloc diff --git a/test/dynamo/test_error_messages.py b/test/dynamo/test_error_messages.py index 847f3a6fd2166..081ceb5065dfa 100644 --- a/test/dynamo/test_error_messages.py +++ b/test/dynamo/test_error_messages.py @@ -726,14 +726,14 @@ class Foo: Unsupported, lambda: torch.compile(fn, backend="eager", fullgraph=True)(), """\ -LOAD_BUILD_CLASS bytecode not supported - Explanation: Dynamo does not support tracing classes that are defined in the compiled region. - Hint: Move the class definition out of the compiled region. - Hint: It may be possible to write Dynamo tracing rules for this code. Please report an issue to PyTorch if you encounter this graph break often and it is causing performance issues. +Attempted to call function marked as skipped + Explanation: Dynamo does not know how to trace the builtin `builtins.__build_class__.` This function is either a Python builtin (e.g. _warnings.warn) or a third-party C/C++ Python extension (perhaps created with pybind). + Hint: If it is a Python builtin, please file an issue on GitHub so the PyTorch team can add support for it and see the next case for a workaround. + Hint: If it is a third-party C/C++ Python extension, please either wrap it into a PyTorch-understood custom operator (see https://pytorch.org/tutorials/advanced/custom_ops_landing_page.html for more details) or, if it is traceable, use `torch.compiler.allow_in_graph`. - Developer debug context: + Developer debug context: module: builtins, qualname: __build_class__, skip reason: - For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0075.html + For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0007.html from user code: File "test_error_messages.py", line N, in fn diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py index b7fb01be17152..c9a2a0730b08a 100644 --- a/test/dynamo/test_misc.py +++ b/test/dynamo/test_misc.py @@ -12684,6 +12684,22 @@ def f(*args, **kwargs): self.assertRaises(Unsupported, f, []) self.assertRaises(Unsupported, f, "1 + j") + def test_compiled_class_graph_break(self): + counter = CompileCounter() + + @torch.compile(backend=counter, fullgraph=False) + def f(x): + x += 1 + + class C: + pass + + return x.sin() + + x = torch.randn(3) + f(x) + self.assertEqual(counter.frame_count, 2) + class MiscTestsPyTree(torch._inductor.test_case.TestCase): @parametrize_pytree_module diff --git a/test/dynamo_expected_failures/TestNNParametrization.test_new_spectral_norm_forward_swap_True b/test/dynamo_expected_failures/AOTFxirTestCase.test_aoti_fx_const similarity index 100% rename from test/dynamo_expected_failures/TestNNParametrization.test_new_spectral_norm_forward_swap_True rename to test/dynamo_expected_failures/AOTFxirTestCase.test_aoti_fx_const diff --git a/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_blocked b/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_blocked new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_convert_to_bool b/test/dynamo_expected_failures/CPython313-test_bool-BoolTest.test_convert_to_bool new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Mapping b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Mapping new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Sequence_mixins b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Sequence_mixins new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_from_iterable b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_from_iterable new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_interoperability_with_real_sets b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_Set_interoperability_with_real_sets new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_issue16373 b/test/dynamo_expected_failures/CPython313-test_collections-TestCollectionABCs.test_issue16373 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestCounter.test_copy_subclass b/test/dynamo_expected_failures/CPython313-test_collections-TestCounter.test_copy_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_collections-TestOneTrickPonyABCs.test_Generator b/test/dynamo_expected_failures/CPython313-test_collections-TestOneTrickPonyABCs.test_Generator new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing b/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing_error b/test/dynamo_expected_failures/CPython313-test_contextlib-ClosingTestCase.test_closing_error new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_contextmanager_except_stopiter b/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_contextmanager_except_stopiter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_nokeepref b/test/dynamo_expected_failures/CPython313-test_contextlib-ContextManagerTestCase.test_nokeepref new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_exit_is_abstract b/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_exit_is_abstract new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_slots b/test/dynamo_expected_failures/CPython313-test_contextlib-TestAbstractContextManager.test_slots new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_decorating_method b/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_decorating_method new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_enter b/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_enter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_exit b/test/dynamo_expected_failures/CPython313-test_contextlib-TestContextDecorator.test_typo_exit new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_dont_reraise_RuntimeError b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_dont_reraise_RuntimeError new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context_errors b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_enter_context_errors new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_chaining_reference b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_chaining_reference new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_explicit_none_context b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_exit_exception_explicit_none_context new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_instance_bypass b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_instance_bypass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_push b/test/dynamo_expected_failures/CPython313-test_contextlib-TestExitStack.test_push new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_defaultdict-TestDefaultDict.test_recursive_repr b/test/dynamo_expected_failures/CPython313-test_defaultdict-TestDefaultDict.test_recursive_repr new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_bad_key b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_bad_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_copy_maintains_tracking b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_copy_maintains_tracking new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_contain_use_after_free b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_contain_use_after_free new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_copy_order b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_dict_copy_order new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_eq b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_eq new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_equal_operator_modifying_operand b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_equal_operator_modifying_operand new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_errors_in_view_containment_check b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_errors_in_view_containment_check new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_dict_operand b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_dict_operand new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_set_operand b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_fromkeys_operator_modifying_set_operand new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_getitem b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_getitem new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_init_use_after_free b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_init_use_after_free new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_instance_dict_getattr_str_subclass b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_instance_dict_getattr_str_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_invalid_keyword_arguments b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_invalid_keyword_arguments new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_merge_and_mutate b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_merge_and_mutate new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_missing b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_missing new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_mutating_lookup b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_mutating_lookup new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_object_set_item_single_instance_non_str_key b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_object_set_item_single_instance_non_str_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_oob_indexing_dictiter_iternextitem b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_oob_indexing_dictiter_iternextitem new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_pop b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_pop new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_resize2 b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_resize2 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_reverse_iterator_for_shared_shared_dicts b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_reverse_iterator_for_shared_shared_dicts new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault_atomic b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setdefault_atomic new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setitem_atomic_at_resize b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_setitem_atomic_at_resize new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_to_generic_combinedtable b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_to_generic_combinedtable new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_update b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_splittable_update new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_str_nonstr b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_str_nonstr new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_views_mapping b/test/dynamo_expected_failures/CPython313-test_dict-DictTest.test_views_mapping new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_floatconversion b/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_floatconversion new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_keywords_in_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_non_numeric_input_types b/test/dynamo_expected_failures/CPython313-test_float-GeneralFloatCases.test_non_numeric_input_types new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_float-HexFloatTestCase.test_subclass b/test/dynamo_expected_failures/CPython313-test_float-HexFloatTestCase.test_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_base_indexable b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_base_indexable new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_returns_int_subclass b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_returns_int_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_index b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_index new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_int b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_int_subclass_with_int new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_intconversion b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_intconversion new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_non_numeric_input_types b/test/dynamo_expected_failures/CPython313-test_int-IntTestCases.test_non_numeric_input_types new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_3720 b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_3720 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_ref_counting_behavior b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_ref_counting_behavior new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_stop_sequence b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_stop_sequence new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_unicode_join_endcase b/test/dynamo_expected_failures/CPython313-test_iter-TestCase.test_unicode_join_endcase new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_constructors b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_constructors new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_contains_order b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_contains_order new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_equal_operator_modifying_operand b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_equal_operator_modifying_operand new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_extend b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_extend new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_keywords_in_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_no_comdat_folding b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_no_comdat_folding new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_repr_mutate b/test/dynamo_expected_failures/CPython313-test_list-ListTest.test_repr_mutate new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.testCeil b/test/dynamo_expected_failures/CPython313-test_math-MathTests.testCeil new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.testFloor b/test/dynamo_expected_failures/CPython313-test_math-MathTests.testFloor new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_issue39871 b/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_issue39871 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_sumprod_stress b/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_sumprod_stress new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_trunc b/test/dynamo_expected_failures/CPython313-test_math-MathTests.test_trunc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-CCOperatorPickleTestCase.test_methodcaller new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_eq b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_eq new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_index b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_index new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_inplace b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_inplace new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_length_hint b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_length_hint new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_ne b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_ne new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_not_ b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_not_ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_truth b/test/dynamo_expected_failures/CPython313-test_operator-COperatorTestCase.test_truth new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-CPyOperatorPickleTestCase.test_methodcaller new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-PyCOperatorPickleTestCase.test_methodcaller new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_index b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_index new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_inplace b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_inplace new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_length_hint b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_length_hint new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_matmul b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_matmul new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_methodcaller new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_not_ b/test/dynamo_expected_failures/CPython313-test_operator-PyOperatorTestCase.test_not_ new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_attrgetter b/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_attrgetter new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_methodcaller b/test/dynamo_expected_failures/CPython313-test_operator-PyPyOperatorPickleTestCase.test_methodcaller new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_delitem_hash_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonBuiltinDictTests.test_highly_nested_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_delitem_hash_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_highly_nested_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_init_calls new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_clear new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_linked_list_by_delete_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_clear new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key_in_dict_eq b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue119004_change_size_by_delete_key_in_dict_eq new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24347 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictSubclassTests.test_issue24348 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_delitem_hash_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_highly_nested_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_init_calls new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_clear new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_linked_list_by_delete_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_clear b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_clear new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key_in_dict_eq b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue119004_change_size_by_delete_key_in_dict_eq new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24347 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-CPythonOrderedDictTests.test_issue24348 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_delitem_hash_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_highly_nested_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_init_calls new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue119004_attribute_error b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue119004_attribute_error new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24347 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_issue24348 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_overridden_init b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_overridden_init new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_override_update b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictSubclassTests.test_override_update new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_delitem_hash_collision b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_delitem_hash_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_highly_nested_subclass b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_highly_nested_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_init_calls b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_init_calls new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue119004_attribute_error b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue119004_attribute_error new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24347 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24347 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24348 b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_issue24348 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_overridden_init b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_overridden_init new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_override_update b/test/dynamo_expected_failures/CPython313-test_ordered_dict-PurePythonOrderedDictTests.test_override_update new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_and_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_eq_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ge_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_gt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iadd_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ior_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_isub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_iteration_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ixor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_le_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_lt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_ne_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_or_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_sub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Set.test_xor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_and_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_eq_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ge_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_gt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iadd_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ior_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_isub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_iteration_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ixor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_le_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_lt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_ne_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_or_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_sub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Set_Subclass.test_xor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_and_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_eq_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ge_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_gt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iadd_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ior_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_isub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_iteration_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ixor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_le_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_lt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_ne_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_or_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_sub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Set.test_xor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_and_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_and_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_eq_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_eq_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ge_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ge_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_gt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_gt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iadd_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iadd_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ior_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ior_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_isub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_isub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iteration_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_iteration_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ixor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ixor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_le_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_le_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_lt_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_lt_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ne_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_ne_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_or_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_or_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_sub_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_sub_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_xor_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestBinaryOpsMutating_Subclass_Subclass.test_xor_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_container_iterator new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_deepcopy new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_gc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSet.test_subclass_with_custom_hash new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_container_iterator new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_deepcopy new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_gc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_keywords_in_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestFrozenSetSubclass.test_subclass_with_custom_hash new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Dict.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_List.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Set.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Set_Subclass.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Set.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_intersection_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_isdisjoint_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_isdisjoint_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issubset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issubset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issuperset_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_issuperset_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_symmetric_difference_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_union_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_union_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_update_with_mutation b/test/dynamo_expected_failures/CPython313-test_set-TestMethodsMutating_Subclass_Subclass.test_update_with_mutation new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_container_iterator new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_deepcopy new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_gc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_rich_compare b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_rich_compare new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestSet.test_subclass_with_custom_hash new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_container_iterator b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_container_iterator new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_deepcopy b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_deepcopy new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_gc b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_gc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_keywords_in_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_rich_compare b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_rich_compare new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_subclass_with_custom_hash b/test/dynamo_expected_failures/CPython313-test_set-TestSetSubclass.test_subclass_with_custom_hash new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_set-TestWeirdBugs.test_merge_and_mutate b/test/dynamo_expected_failures/CPython313-test_set-TestWeirdBugs.test_merge_and_mutate new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestBase.testStressfully b/test/dynamo_expected_failures/CPython313-test_sort-TestBase.testStressfully new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestBugs.test_bug453523 b/test/dynamo_expected_failures/CPython313-test_sort-TestBugs.test_bug453523 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestDecorateSortUndecorate.test_key_with_mutating_del b/test/dynamo_expected_failures/CPython313-test_sort-TestDecorateSortUndecorate.test_key_with_mutating_del new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_sort-TestOptimizedCompares.test_unsafe_object_compare b/test/dynamo_expected_failures/CPython313-test_sort-TestOptimizedCompares.test_unsafe_object_compare new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_constructors b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_constructors new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_contains_order b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_contains_order new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_keywords_in_subclass b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_keywords_in_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_no_comdat_folding b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_no_comdat_folding new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_track_subtypes b/test/dynamo_expected_failures/CPython313-test_tuple-TupleTest.test_track_subtypes new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_userdict-UserDictTest.test_eq b/test/dynamo_expected_failures/CPython313-test_userdict-UserDictTest.test_eq new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_userlist-UserListTest.test_contains_order b/test/dynamo_expected_failures/CPython313-test_userlist-UserListTest.test_contains_order new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testErrorsInBool b/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testErrorsInBool new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testRaisedStopIteration2 b/test/dynamo_expected_failures/CPython313-test_with-ExceptionalTestCase.testRaisedStopIteration2 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError1 b/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError1 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError2 b/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testEnterAttributeError2 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testExitAttributeError b/test/dynamo_expected_failures/CPython313-test_with-FailureTestCase.testExitAttributeError new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_anomaly_detect_nan b/test/dynamo_expected_failures/TestAutograd.test_anomaly_detect_nan new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_autograd_print_tensor b/test/dynamo_expected_failures/TestAutograd.test_autograd_print_tensor new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_checkpointing_without_reentrant_with_context_fn b/test/dynamo_expected_failures/TestAutograd.test_checkpointing_without_reentrant_with_context_fn new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_custom_autograd_repeated_grad_grad b/test/dynamo_expected_failures/TestAutograd.test_custom_autograd_repeated_grad_grad new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_inplace_not_requires_grad b/test/dynamo_expected_failures/TestAutograd.test_inplace_not_requires_grad new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_lobpcg b/test/dynamo_expected_failures/TestAutograd.test_lobpcg new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable b/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable_none b/test/dynamo_expected_failures/TestAutograd.test_mark_non_differentiable_none new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_naughty_autograd_function_stashing_ctx b/test/dynamo_expected_failures/TestAutograd.test_naughty_autograd_function_stashing_ctx new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestAutograd.test_return_leaf_inplace b/test/dynamo_expected_failures/TestAutograd.test_return_leaf_inplace new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_name_collision b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_name_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_no_name_collision b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_one_attr_no_name_collision new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr_three_input b/test/dynamo_expected_failures/TestConstFold.test_const_fold_basic_two_attr_three_input new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_has_inlined_call_module_node b/test/dynamo_expected_failures/TestConstFold.test_const_fold_has_inlined_call_module_node new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_module_attr b/test/dynamo_expected_failures/TestConstFold.test_const_fold_module_attr new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_submod_hierarchy b/test/dynamo_expected_failures/TestConstFold.test_const_fold_submod_hierarchy new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_const_fold_unused_placeholder b/test/dynamo_expected_failures/TestConstFold.test_const_fold_unused_placeholder new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_dict_output b/test/dynamo_expected_failures/TestConstFold.test_dict_output new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_fold_module b/test/dynamo_expected_failures/TestConstFold.test_fold_module new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_three_outputs b/test/dynamo_expected_failures/TestConstFold.test_three_outputs new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestConstFold.test_two_outputs b/test/dynamo_expected_failures/TestConstFold.test_two_outputs new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_merge_graph_preserves_ph_meta b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_merge_graph_preserves_ph_meta new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_mismatched_branch_output_dynamic_True_backend_eager b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_mismatched_branch_output_dynamic_True_backend_eager new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_False b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_False new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_True b/test/dynamo_expected_failures/TestControlFlowTraced.test_cond_symint_operands_requires_grad_True new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestControlFlowTraced.test_while_loop_autograd_simple b/test/dynamo_expected_failures/TestControlFlowTraced.test_while_loop_autograd_simple new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op b/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op_with_cuda b/test/dynamo_expected_failures/TestCppExtensionJIT.test_cpp_frontend_module_python_inter_op_with_cuda new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestFX.test_custom_traceback_not_raised_when_exception_source_is_submodule b/test/dynamo_expected_failures/TestFX.test_custom_traceback_not_raised_when_exception_source_is_submodule new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestFXNumericSuiteCoreAPIs.test_user_defined_function b/test/dynamo_expected_failures/TestFXNumericSuiteCoreAPIs.test_user_defined_function new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestFlag.test_writeable_any_base b/test/dynamo_expected_failures/TestFlag.test_writeable_any_base new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestIndexing.test_broken_sequence_not_nd_index b/test/dynamo_expected_failures/TestIndexing.test_broken_sequence_not_nd_index new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestNNParametrization.test_transfer_parametrizations_and_params_single_param_swap_True b/test/dynamo_expected_failures/TestNNParametrization.test_transfer_parametrizations_and_params_single_param_swap_True new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestPrivateUse1.test_backend_type_methods b/test/dynamo_expected_failures/TestPrivateUse1.test_backend_type_methods new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestPythonDispatch.test_maybe_tuple_bug b/test/dynamo_expected_failures/TestPythonDispatch.test_maybe_tuple_bug new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestPythonDispatch.test_set_data b/test/dynamo_expected_failures/TestPythonDispatch.test_set_data new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestPythonDispatch.test_wrapper_subclass_extra_dispatch_keys b/test/dynamo_expected_failures/TestPythonDispatch.test_wrapper_subclass_extra_dispatch_keys new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_stateless b/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_stateless new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_torch_func b/test/dynamo_expected_failures/TestStatelessFunctionalAPI.test_functional_call_member_reference_torch_func new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_modules_decorator_applies_module_and_param_specific_decorators_cpu b/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_modules_decorator_applies_module_and_param_specific_decorators_cpu new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_composition_names_cpu b/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_composition_names_cpu new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_decorator_applies_op_and_param_specific_decorators_cpu b/test/dynamo_expected_failures/TestTestParametrizationDeviceTypeCPU.test_ops_decorator_applies_op_and_param_specific_decorators_cpu new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_as_subclass b/test/dynamo_expected_failures/TestTorch.test_as_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_storage_cycle_via_slots b/test/dynamo_expected_failures/TestTorch.test_storage_cycle_via_slots new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_storage_finalizer_dealloc b/test/dynamo_expected_failures/TestTorch.test_storage_finalizer_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_storage_slot_dealloc b/test/dynamo_expected_failures/TestTorch.test_storage_slot_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_tensor_cycle_via_slots b/test/dynamo_expected_failures/TestTorch.test_tensor_cycle_via_slots new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_tensor_finalizer_dealloc b/test/dynamo_expected_failures/TestTorch.test_tensor_finalizer_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorch.test_tensor_slot_dealloc b/test/dynamo_expected_failures/TestTorch.test_tensor_slot_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_custom_device_type b/test/dynamo_expected_failures/TestTorchFunctionMode.test_custom_device_type new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_enable_subclass b/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_enable_subclass new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_subclass_mode b/test/dynamo_expected_failures/TestTorchFunctionMode.test_disable_subclass_mode new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorchFunctionMode.test_factory_override b/test/dynamo_expected_failures/TestTorchFunctionMode.test_factory_override new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_expected_failures/TestTorchFunctionOverride.test_pow_rpow b/test/dynamo_expected_failures/TestTorchFunctionOverride.test_pow_rpow new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_skips/TestAutograd.test_naughty_anomaly_access b/test/dynamo_skips/TestAutograd.test_naughty_anomaly_access new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/dynamo_skips/TestPythonPytree.test_key_str b/test/dynamo_skips/TestPythonPytree.test_key_str new file mode 100644 index 0000000000000..a8d6b4d65e03c --- /dev/null +++ b/test/dynamo_skips/TestPythonPytree.test_key_str @@ -0,0 +1 @@ +Passes under python 3.10, fails under 3.13 diff --git a/test/dynamo_skips/TestTorchFunctionMode.test_disable_subclass_not_mode b/test/dynamo_skips/TestTorchFunctionMode.test_disable_subclass_not_mode new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_storage_cycle_via_slots b/test/inductor_expected_failures/TestTorch.test_storage_cycle_via_slots new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_storage_finalizer_dealloc b/test/inductor_expected_failures/TestTorch.test_storage_finalizer_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_storage_slot_dealloc b/test/inductor_expected_failures/TestTorch.test_storage_slot_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_tensor_cycle_via_slots b/test/inductor_expected_failures/TestTorch.test_tensor_cycle_via_slots new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_tensor_finalizer_dealloc b/test/inductor_expected_failures/TestTorch.test_tensor_finalizer_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test/inductor_expected_failures/TestTorch.test_tensor_slot_dealloc b/test/inductor_expected_failures/TestTorch.test_tensor_slot_dealloc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/torch/_dynamo/symbolic_convert.py b/torch/_dynamo/symbolic_convert.py index 4dd1321a5057d..beebea05a0e3e 100644 --- a/torch/_dynamo/symbolic_convert.py +++ b/torch/_dynamo/symbolic_convert.py @@ -3290,15 +3290,7 @@ def LOAD_ASSERTION_ERROR(self, inst: Instruction) -> None: self.push(self.load_builtin_from_argval("AssertionError")) def LOAD_BUILD_CLASS(self, inst: Instruction) -> None: - unimplemented_v2( - gb_type="LOAD_BUILD_CLASS bytecode not supported", - context="", - explanation="Dynamo does not support tracing classes that are defined in the compiled region.", - hints=[ - "Move the class definition out of the compiled region.", - *graph_break_hints.SUPPORTABLE, - ], - ) + self.push(self.load_builtin_from_argval("__build_class__")) UNARY_POSITIVE = stack_op(operator.pos) UNARY_NEGATIVE = stack_op(operator.neg) From 96ef26f71aeb386cd1ac591cb50ba98a9c417f18 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 10 Sep 2025 04:42:28 +0000 Subject: [PATCH 028/693] Revert "[ROCm] Integrate AITER Fav3 fwd kernels (#160105)" This reverts commit d2393c2d7da03a1523a12e6f80edb6bd7b464ec5. Reverted https://github.com/pytorch/pytorch/pull/160105 on behalf of https://github.com/huydhn due to Sorry for reverting your change but it is failing internal ROCm build ([comment](https://github.com/pytorch/pytorch/pull/160105#issuecomment-3273297183)) --- .../hip/flash_attn/ck/fav_v3/CMakeLists.txt | 31 ++----------------- .../hip/flash_attn/ck/mha_fwd_ck.hip | 12 ++----- .../hip/flash_attn/ck/mha_varlen_fwd_ck.hip | 2 +- third_party/aiter | 2 +- 4 files changed, 6 insertions(+), 41 deletions(-) diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/ck/fav_v3/CMakeLists.txt b/aten/src/ATen/native/transformers/hip/flash_attn/ck/fav_v3/CMakeLists.txt index 19d2930f31777..cccf026690dc0 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/fav_v3/CMakeLists.txt +++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/fav_v3/CMakeLists.txt @@ -1,22 +1,13 @@ include(CMakePrintHelpers) # Generate AITER/CK Asm code -execute_process( - COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/py_itfs_cu/fmha_v3_fwd_kernel_generate.py --output_dir ${CMAKE_CURRENT_LIST_DIR} - RESULT_VARIABLE ret -) - -if(ret AND NOT ret EQUAL 0) - message( FATAL_ERROR "Failed to generate FAv3 fwd CK Kernels") -endif() - execute_process( COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/py_itfs_cu/fmha_v3_bwd_kernel_generate.py --receipt 1 --output_dir ${CMAKE_CURRENT_LIST_DIR} RESULT_VARIABLE ret ) if(ret AND NOT ret EQUAL 0) - message( FATAL_ERROR "Failed to generate FAv3 bwd CK Kernels") + message( FATAL_ERROR "Failed to generate FAv3 CK Kernels") endif() execute_process( @@ -24,24 +15,6 @@ execute_process( RESULT_VARIABLE ret ) -if(ret AND NOT ret EQUAL 0) - message( FATAL_ERROR "Failed to generate FAv3 bwd api") -endif() - -execute_process( - COMMAND python3 ${CMAKE_SOURCE_DIR}/third_party/aiter/csrc/cpp_itfs/mha_fwd_generate.py --receipt 6 --output_dir ${CMAKE_CURRENT_LIST_DIR} - RESULT_VARIABLE ret -) - -if(ret AND NOT ret EQUAL 0) - message( FATAL_ERROR "Failed to generate FAv3 fwd api") -endif() # Change file extensions to .hip -execute_process(COMMAND bash -c "for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done" - RESULT_VARIABLE ret -) - -if(ret AND NOT ret EQUAL 0) - message( FATAL_ERROR "Failed to modify aiter file extensions") -endif() +execute_process(COMMAND bash -c "for file in ${CMAKE_CURRENT_LIST_DIR}/*.cpp; do mv -- \"$file\" \"\${file%.cpp}.hip\"; done") diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_fwd_ck.hip b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_fwd_ck.hip index 492e0e4f3498b..05f97414acdd8 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_fwd_ck.hip +++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_fwd_ck.hip @@ -3,7 +3,6 @@ ******************************************************************************/ #include -#include #include #include @@ -142,7 +141,7 @@ fmha_fwd_args get_ck_fmha_fwd_args(bool has_lse, mask.left, mask.right, static_cast(mask.type), - 0, // min_seqlen_q + -1, // min_seqlen_q p_dropout, has_dropout_randval, drop_seed_offset}; @@ -351,14 +350,7 @@ mha_fwd_ck(const at::Tensor &q, // batch_size x seqlen_q x softmax_scale, p_dropout, drop_seed_offset); - float t = aiter::mha_fwd(args, // mha_fwd_args args - stream_config, // stream_config - q_dtype_str, // q_dtype_str - false, // is_group_mode - mask.type, // mask_type - attn_bias_.has_value() ? bias_enum::elementwise_bias : bias_enum::no_bias, - has_lse, // has_lse - true); // use_ext_asm + float t = fmha_fwd(traits, args, stream_config); TORCH_CHECK(t >= 0, "invalid argument for fmha_fwd"); } else { diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_fwd_ck.hip b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_fwd_ck.hip index d4ffc2ec424c5..ee6261df8a91a 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_fwd_ck.hip +++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_fwd_ck.hip @@ -349,7 +349,7 @@ mha_varlen_fwd_ck(const at::Tensor &q, // total_q x num_heads p_dropout, drop_seed_offset); float t = fmha_fwd(traits, args, stream_config); - TORCH_CHECK(t >= 0, "invalid argument for fmha_varlen_fwd"); + TORCH_CHECK(t >= 0, "invalid argument for fmha_fwd"); } else { // If seqlen_k == 0, then we have an empty tensor. We need to set the output to 0. diff --git a/third_party/aiter b/third_party/aiter index 28918c0e68d28..01aae101b9e5e 160000 --- a/third_party/aiter +++ b/third_party/aiter @@ -1 +1 @@ -Subproject commit 28918c0e68d28e2c217e0f05344d178877ba611e +Subproject commit 01aae101b9e5e94d6c16a9514c9fb8df99c93150 From 1c16c18a534d320d101ebb10c88bdf57cf84b3b1 Mon Sep 17 00:00:00 2001 From: dolpm <34420038+dolpm@users.noreply.github.com> Date: Wed, 10 Sep 2025 04:52:57 +0000 Subject: [PATCH 029/693] [nativert][triton] improve hardware registration (#162499) Summary: att Test Plan: ci Rollback Plan: Differential Revision: D82031814 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162499 Approved by: https://github.com/angelayi --- caffe2/CMakeLists.txt | 5 ++ test/cpp/nativert/CMakeLists.txt | 9 ++- ...est_triton_kernel_manager_registration.cpp | 17 +++++- .../triton/CpuTritonKernelManager.cpp | 57 ++++++++++++++++++- .../executor/triton/CpuTritonKernelManager.h | 51 ----------------- .../triton/CudaTritonKernelManager.cpp | 36 ++++++++---- .../executor/triton/TritonKernelManager.h | 12 +++- torch/nativert/kernels/TritonKernel.cpp | 43 ++++++++------ 8 files changed, 142 insertions(+), 88 deletions(-) delete mode 100644 torch/nativert/executor/triton/CpuTritonKernelManager.h diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 4623fec08fe32..99d4b2cd5aa93 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -552,6 +552,11 @@ if(USE_CUDA OR USE_ROCM) append_filelist("libtorch_cuda_core_sources" Caffe2_GPU_HIP_JIT_FUSERS_SRCS) endif() +if(USE_CUDA) + # eventually do rocm + append_filelist("libtorch_nativert_cuda_sources" Caffe2_GPU_SRCS) +endif() + if(USE_CUDA) list(APPEND Caffe2_GPU_CU_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS}) add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS}) diff --git a/test/cpp/nativert/CMakeLists.txt b/test/cpp/nativert/CMakeLists.txt index 1b4752ed9089f..91605c0933d2c 100644 --- a/test/cpp/nativert/CMakeLists.txt +++ b/test/cpp/nativert/CMakeLists.txt @@ -40,21 +40,24 @@ set(NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/nativert/graph/passes/pass_manager/GraphPasses.cpp ${TORCH_ROOT}/torch/nativert/graph/passes/pass_manager/PassManager.cpp ${TORCH_ROOT}/torch/nativert/kernels/KernelHandlerRegistry.cpp - ${TORCH_ROOT}/torch/nativert/kernels/TritonKernel.cpp ${TORCH_ROOT}/torch/nativert/executor/triton/CpuTritonKernelManager.cpp + ${TORCH_ROOT}/torch/nativert/kernels/TritonKernel.cpp ${TORCH_ROOT}/torch/nativert/executor/DelegateExecutor.cpp ) if(USE_CUDA) list(APPEND NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/nativert/executor/triton/CudaTritonKernelManager.cpp) -endif(MSVC) - +endif() add_executable(test_nativert ${TORCH_ROOT}/test/cpp/common/main.cpp ${NATIVERT_TEST_SRCS} ) +if(MSVC) + target_compile_definitions(test_nativert PRIVATE NATIVERT_MSVC_TEST) +endif() + # TODO temporary until we can delete the old gtest polyfills. target_compile_definitions(test_nativert PRIVATE USE_GTEST) diff --git a/test/cpp/nativert/test_triton_kernel_manager_registration.cpp b/test/cpp/nativert/test_triton_kernel_manager_registration.cpp index ca864158e3122..8cedb84abf218 100644 --- a/test/cpp/nativert/test_triton_kernel_manager_registration.cpp +++ b/test/cpp/nativert/test_triton_kernel_manager_registration.cpp @@ -6,9 +6,20 @@ using namespace ::testing; using namespace torch::nativert; TEST(TritonKernelManagerRegistrationTests, TestRegister) { -#ifndef USE_CUDA - EXPECT_TRUE(create_cuda_triton_kernel_manager == nullptr); + EXPECT_TRUE(TritonKernelManagerRegistry()->Has(at::kCPU)); + +#ifdef USE_CUDA +#ifdef USE_ROCM + EXPECT_TRUE(TritonKernelManagerRegistry()->Has(at::kHIP)); + EXPECT_FALSE(TritonKernelManagerRegistry()->Has(at::kCUDA)); + +#else + EXPECT_TRUE(TritonKernelManagerRegistry()->Has(at::kCUDA)); + EXPECT_FALSE(TritonKernelManagerRegistry()->Has(at::kHIP)); + +#endif // USE_ROCM #else - EXPECT_FALSE(create_cuda_triton_kernel_manager == nullptr); + EXPECT_FALSE(TritonKernelManagerRegistry()->Has(at::kCUDA)); + EXPECT_FALSE(TritonKernelManagerRegistry()->Has(at::kHIP)); #endif // USE_CUDA } diff --git a/torch/nativert/executor/triton/CpuTritonKernelManager.cpp b/torch/nativert/executor/triton/CpuTritonKernelManager.cpp index 1f8d394ecf391..c212539e49304 100644 --- a/torch/nativert/executor/triton/CpuTritonKernelManager.cpp +++ b/torch/nativert/executor/triton/CpuTritonKernelManager.cpp @@ -1,5 +1,6 @@ -#include +#include +#include #include #ifndef _WIN32 @@ -35,6 +36,43 @@ char* _dlerror() { } // namespace +typedef void* kernel_ptr_t; +typedef void ( + *launcher_ptr_t)(uint32_t, uint32_t, uint32_t, void**, kernel_ptr_t); + +struct DlcloseDeleter { + void operator()(void* p) const { + if (p) { +#if defined(_WIN32) + TORCH_CHECK(false, "Windows is not supported"); +#else + dlclose(p); +#endif + } + } +}; + +class CpuTritonKernelManager final : public TritonKernelManager { + public: + CpuTritonKernelManager( + std::string kernel_name, + std::string kernel_bin_path, + std::string kernel_launcher_bin_path); + ~CpuTritonKernelManager() final = default; + void launch(const LaunchParams& launch_params, void** args) final; + + private: + void load(); + + kernel_ptr_t kernel_fn_{nullptr}; + launcher_ptr_t launcher_fn_{nullptr}; + + std::unique_ptr kernel_handle_{nullptr}; + std::unique_ptr launcher_handle_{nullptr}; + + std::string kernel_launcher_bin_path_; +}; + CpuTritonKernelManager::CpuTritonKernelManager( std::string kernel_name, std::string kernel_bin_path, @@ -88,4 +126,21 @@ void CpuTritonKernelManager::launch( kernel_fn_); } +namespace { +std::unique_ptr create_cpu_triton_kernel_manager( + std::string kernel_name, + std::string kernel_bin_path, + std::string kernel_launcher_bin_path) { + return std::make_unique( + std::move(kernel_name), + std::move(kernel_bin_path), + std::move(kernel_launcher_bin_path)); +} +} // namespace + +C10_REGISTER_TYPED_CREATOR( + TritonKernelManagerRegistry, + at::kCPU, + create_cpu_triton_kernel_manager) + } // namespace torch::nativert diff --git a/torch/nativert/executor/triton/CpuTritonKernelManager.h b/torch/nativert/executor/triton/CpuTritonKernelManager.h deleted file mode 100644 index 45b3327c878e4..0000000000000 --- a/torch/nativert/executor/triton/CpuTritonKernelManager.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include - -#include -#include - -#ifndef _WIN32 -#include -#endif - -typedef void* kernel_ptr_t; -typedef void ( - *launcher_ptr_t)(uint32_t, uint32_t, uint32_t, void**, kernel_ptr_t); - -namespace torch::nativert { - -struct DlcloseDeleter { - void operator()(void* p) const { - if (p) { -#if defined(_WIN32) - TORCH_CHECK(false, "Windows is not supported"); -#else - dlclose(p); -#endif - } - } -}; - -class CpuTritonKernelManager final : public TritonKernelManager { - public: - CpuTritonKernelManager( - std::string kernel_name, - std::string kernel_bin_path, - std::string kernel_launcher_bin_path); - ~CpuTritonKernelManager() final = default; - void launch(const LaunchParams& launch_params, void** args) final; - - private: - void load(); - - kernel_ptr_t kernel_fn_{nullptr}; - launcher_ptr_t launcher_fn_{nullptr}; - - std::unique_ptr kernel_handle_{nullptr}; - std::unique_ptr launcher_handle_{nullptr}; - - std::string kernel_launcher_bin_path_; -}; - -} // namespace torch::nativert diff --git a/torch/nativert/executor/triton/CudaTritonKernelManager.cpp b/torch/nativert/executor/triton/CudaTritonKernelManager.cpp index 47f72ce0c5e37..d18efcc178f46 100644 --- a/torch/nativert/executor/triton/CudaTritonKernelManager.cpp +++ b/torch/nativert/executor/triton/CudaTritonKernelManager.cpp @@ -29,7 +29,7 @@ namespace torch::nativert { class CudaKernelInputs final : public KernelInputs { public: CudaKernelInputs(size_t num_args, size_t num_attrs) - : KernelInputs(num_args, num_attrs), arg_ptrs_(num_args) {}; + : KernelInputs(num_args, num_attrs), arg_ptrs_(num_args) {} ~CudaKernelInputs() final = default; void add_arg(void* arg) override { @@ -73,7 +73,7 @@ CudaTritonKernelManager::CudaTritonKernelManager( TORCH_CHECK( at::globalContext().hasCUDA() || at::globalContext().hasHIP(), "cuda or hip required"); -}; +} CudaTritonKernelManager::~CudaTritonKernelManager() { const auto& nvrtc = get_nvrtc(); @@ -137,19 +137,31 @@ void CudaTritonKernelManager::launch( nullptr)); } -static std::unique_ptr _create_cuda_triton_kernel_manager( +namespace { +std::unique_ptr create_cuda_triton_kernel_manager( std::string kernel_name, - std::string kernel_bin_path) { + std::string kernel_bin_path, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + [[maybe_unused]] std::string kernel_launcher_bin_path) { return std::make_unique( std::move(kernel_name), std::move(kernel_bin_path)); } +} // namespace -} // namespace torch::nativert +#ifdef USE_ROCM -namespace { -static bool _initialized_cuda_triton_kernel_manager = []() { - torch::nativert::create_cuda_triton_kernel_manager = - &torch::nativert::_create_cuda_triton_kernel_manager; - return true; -}(); -} // namespace +C10_REGISTER_TYPED_CREATOR( + TritonKernelManagerRegistry, + at::kHIP, + create_cuda_triton_kernel_manager) + +#else + +C10_REGISTER_TYPED_CREATOR( + TritonKernelManagerRegistry, + at::kCUDA, + create_cuda_triton_kernel_manager) + +#endif // USE_ROCM + +} // namespace torch::nativert diff --git a/torch/nativert/executor/triton/TritonKernelManager.h b/torch/nativert/executor/triton/TritonKernelManager.h index ffa8e2573bc02..976fb3921f0ab 100644 --- a/torch/nativert/executor/triton/TritonKernelManager.h +++ b/torch/nativert/executor/triton/TritonKernelManager.h @@ -2,7 +2,9 @@ #include +#include #include +#include namespace torch::nativert { @@ -69,7 +71,13 @@ class TritonKernelManager { std::string kernel_name_, kernel_bin_path_; }; -inline std::unique_ptr ( - *create_cuda_triton_kernel_manager)(std::string, std::string) = nullptr; +C10_DECLARE_TYPED_REGISTRY( + TritonKernelManagerRegistry, + c10::DeviceType, + TritonKernelManager, + std::unique_ptr, + std::string /* kernel_name */, + std::string /* kernel_bin_path */, + std::string /* kernel_launcher_bin_path */); } // namespace torch::nativert diff --git a/torch/nativert/kernels/TritonKernel.cpp b/torch/nativert/kernels/TritonKernel.cpp index 84fbf09a37f43..3843036aead97 100644 --- a/torch/nativert/kernels/TritonKernel.cpp +++ b/torch/nativert/kernels/TritonKernel.cpp @@ -16,10 +16,20 @@ #include #endif -#include - namespace torch::nativert { +// in this case, we want to use the symbol from torch_cpu.dll +#ifndef NATIVERT_MSVC_TEST +C10_DEFINE_TYPED_REGISTRY( + TritonKernelManagerRegistry, + c10::DeviceType, + TritonKernelManager, + std::unique_ptr, + std::string /* kernel_name */, + std::string /* kernel_bin_path */, + std::string /* kernel_launcher_bin_path */) +#endif + TritonKernel::TritonKernel( const Node* node, caffe2::serialize::PyTorchStreamReader* reader) @@ -74,27 +84,28 @@ TritonKernel::TritonKernel( auto tmp_dir = extractToTemporaryFolder(*reader, kernel_prefix) + "/"; if (reader->hasRecord(kernel_prefix + "/" + kernel_name + ".cubin")) { + loader_ = TritonKernelManagerRegistry()->Create( + at::kCUDA, kernel_name, tmp_dir + kernel_name + ".cubin", ""); TORCH_CHECK( - create_cuda_triton_kernel_manager != nullptr, + loader_ != nullptr, "couldn't find cuda loader -- is this a gpu build?"); - loader_ = create_cuda_triton_kernel_manager( - kernel_name, tmp_dir + kernel_name + ".cubin"); - } - - if (reader->hasRecord(kernel_prefix + "/" + kernel_name + ".hsaco")) { + } else if (reader->hasRecord(kernel_prefix + "/" + kernel_name + ".hsaco")) { + loader_ = TritonKernelManagerRegistry()->Create( + at::kHIP, kernel_name, tmp_dir + kernel_name + ".hsaco", ""); TORCH_CHECK( - create_cuda_triton_kernel_manager != nullptr, + loader_ != nullptr, "couldn't find cuda loader -- is this a gpu build?"); - loader_ = create_cuda_triton_kernel_manager( - kernel_name, tmp_dir + kernel_name + ".hsaco"); - } - - if (loader_ == nullptr) { - loader_ = std::unique_ptr(new CpuTritonKernelManager( + } else { + loader_ = TritonKernelManagerRegistry()->Create( + at::kCPU, kernel_name, tmp_dir + kernel_name + ".so", - tmp_dir + kernel_name + ".launcher.so")); + tmp_dir + kernel_name + ".launcher.so"); } + + TORCH_CHECK( + loader_ != nullptr, + "couldn't find triton kernel loader -- are you trying to run gpu kernels on a cpu build?"); } TritonKernel::~TritonKernel() = default; From b9a7d0e13b4a34be83c778734dbad437c7c5117b Mon Sep 17 00:00:00 2001 From: Benjamin Girard <32196439+Eldalie@users.noreply.github.com> Date: Wed, 10 Sep 2025 06:02:11 +0000 Subject: [PATCH 030/693] Fix DCE eliminating in-place operations by improving Node.is_impure() (#162267) Change is_impure to check in-place operations on Node to prevent eliminate_dead_code from eliminating in-place operations. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162267 Approved by: https://github.com/ezyang --- test/fx/test_dce_pass.py | 88 +++++++++++++++++++++++++++++++++++++++- torch/fx/node.py | 28 +++++++++++++ 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/test/fx/test_dce_pass.py b/test/fx/test_dce_pass.py index 7fd3a6dbb0041..5c0230e14c432 100644 --- a/test/fx/test_dce_pass.py +++ b/test/fx/test_dce_pass.py @@ -1,5 +1,6 @@ # Owner(s): ["module: fx"] import copy +import inspect import unittest from typing import Optional @@ -38,12 +39,39 @@ def _get_num_placeholders(self, m: torch.fx.GraphModule) -> int: count += 1 return count + @torch.compiler.disable + def _trace_with_dynamo(self, m: torch.nn.Module) -> torch.fx.GraphModule: + """Dynamo will keep in-place operations, whereas torch.fx.Tracer will remove them.""" + graph_module: torch.fx.GraphModule | None = None + + def _backend(gm: torch.fx.GraphModule, _): + nonlocal graph_module + graph_module = gm + return gm + + inputs = [ + torch.tensor([1.5]) + for _ in range(len(inspect.signature(m.forward).parameters)) + ] + torch.compile( + m, + backend=_backend, + fullgraph=True, + )(*inputs) + assert graph_module is not None + + # TorchDynamo returns a graph with flattened output; unflatten here for the test + graph_module.graph.output_node().args = graph_module.graph.output_node().args[0] + graph_module.recompile() + return graph_module + def _run_dce_and_test( self, m: torch.nn.Module, expect_dce_changes: bool, modules_to_be_leafs: Optional[set[type]] = None, custom: bool = False, + use_dynamo_for_tracing: bool = False, ): class TestTracer(torch.fx.Tracer): def is_leaf_module(self, m, qualname): @@ -51,7 +79,12 @@ def is_leaf_module(self, m, qualname): return True return super().trace(m, qualname) - traced: torch.fx.GraphModule = torch.fx.GraphModule(m, TestTracer().trace(m)) + if use_dynamo_for_tracing: + traced = self._trace_with_dynamo(m) + else: + traced: torch.fx.GraphModule = torch.fx.GraphModule( + m, TestTracer().trace(m) + ) print(str(traced.graph)) # Verify there are nodes without users (if expected). @@ -80,7 +113,7 @@ def is_leaf_module(self, m, qualname): traced.recompile() # Make sure we run and get the same results before/after DCE. - inputs = [torch.tensor([1.5])] * new_num_phs + inputs = [torch.tensor([1.5]) for _ in range(new_num_phs)] inputs_copy = copy.deepcopy(inputs) self.assertTrue(torch.equal(m(*inputs), traced(*inputs_copy))) @@ -182,6 +215,57 @@ def forward(self, a: torch.Tensor) -> torch.Tensor: TestModule(), expect_dce_changes=False, modules_to_be_leafs={ReLUImpure} ) + def test_keep_inplace_with_side_effects(self): + """ + Test that DCE doesn't remove an inplace operation. + """ + + class TestModule(torch.nn.Module): + def forward(self, x: torch.Tensor) -> torch.Tensor: + x.add_(2) + y = 2 * x + x.add_(y) + return y + + self._run_dce_and_test(TestModule(), expect_dce_changes=False) + + def test_keep_inplace_python_operator_with_side_effects(self): + """ + Test that DCE doesn't remove an inplace operation. + """ + + class TestModule(torch.nn.Module): + def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + x += y + x //= y + x %= y + x *= y + x -= y + x /= y + x @= y + + x = x.reshape_as(y) + concat_a = [x] + concat_b = [y] + concat_a += concat_b + + a = x.to(dtype=torch.long) + b = y.to(dtype=torch.long) + + a //= b + a <<= b + a %= b + a |= b + a **= b + a >>= b + a ^= b + + return x + y + concat_a[0] + a + b + + self._run_dce_and_test( + TestModule(), expect_dce_changes=False, use_dynamo_for_tracing=True + ) + def test_keep_torch_assert(self): """ Test that DCE doesn't remove torch._assert since it has side effects. diff --git a/torch/fx/node.py b/torch/fx/node.py index dbd6ed93ef26c..19eb9a0e46aa4 100644 --- a/torch/fx/node.py +++ b/torch/fx/node.py @@ -84,6 +84,23 @@ torch.amp._exit_autocast, ] +_side_effect_inplace: set[Callable[..., Any]] = { + operator.iadd, + operator.iand, + operator.iconcat, + operator.ifloordiv, + operator.ilshift, + operator.imod, + operator.imul, + operator.imatmul, + operator.ior, + operator.ipow, + operator.irshift, + operator.isub, + operator.itruediv, + operator.ixor, +} + # TODO: Either refactor this into 2 functions 1 dce for functional graphs and 1 dce for all graphs, # or add logic to correctly mark all inplace ops as side effectful. _side_effectful_functions: set[Callable[..., Any]] = { @@ -99,6 +116,7 @@ _ops.profiler._record_function_exit, _ops.inductor.accumulate_grad_.default, operator.setitem, + *_side_effect_inplace, *_side_effectful_need_to_be_preserved_pre_dispatch, } @@ -813,6 +831,16 @@ def is_impure(self, impure_random: bool = True) -> bool: ) return getattr(target_mod, "_is_impure", False) + if self.op == "call_method": + target_name = ( + self.target + if isinstance(self.target, str) + else torch.typename(self.target) + ) + # Check for functions with names ending in an underscore (e.g., 'add_') that are inplace in torch + if target_name.endswith("_"): + return True + return False @compatibility(is_backward_compatible=False) From 833997a6fdfc2a52a535744689dceb666c944d2e Mon Sep 17 00:00:00 2001 From: Xingyuan Li Date: Wed, 10 Sep 2025 06:47:57 +0000 Subject: [PATCH 031/693] [Inductor][UT] Fix flex attention related inductor cases (#162450) ## Motivation Fixes #162435, Fixes #162436 UT failures: * https://github.com/pytorch/pytorch/actions/runs/17523991468/job/49772651636 * https://github.com/pytorch/pytorch/actions/runs/17523991468/job/49772651637 To fix flex attention related cases. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162450 Approved by: https://github.com/drisspg --- test/inductor/test_flex_attention.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/inductor/test_flex_attention.py b/test/inductor/test_flex_attention.py index d2a5019d47966..f90d7aa77229f 100644 --- a/test/inductor/test_flex_attention.py +++ b/test/inductor/test_flex_attention.py @@ -1967,6 +1967,7 @@ def score_mod_scale(qk, b, h, q, kv): @supported_platform @dtypes(*device_configs["cpu"].dtypes_fast) @dtypesIfCUDA(*device_configs["cuda"].dtypes_fast) + @dtypesIfXPU(*device_configs["xpu"].dtypes_fast) @common_utils.parametrize( "score_mod", test_score_mods, name_fn=lambda score_mod: score_mod.__name__ ) @@ -2060,6 +2061,7 @@ def test_return_max(self, device, dtype, score_mod): @supported_platform @dtypes(*device_configs["cpu"].dtypes_fast) @dtypesIfCUDA(*device_configs["cuda"].dtypes_fast) + @dtypesIfXPU(*device_configs["xpu"].dtypes_fast) @common_utils.parametrize( "score_mod", test_score_mods, name_fn=lambda score_mod: score_mod.__name__ ) @@ -2142,6 +2144,7 @@ def test_return_aux(self, device, dtype, score_mod): @supported_platform @dtypes(*device_configs["cpu"].dtypes_fast) @dtypesIfCUDA(*device_configs["cuda"].dtypes_fast) + @dtypesIfXPU(*device_configs["xpu"].dtypes_fast) @skip_on_cpu def test_return_aux_deprecation_warnings(self, device, dtype): """Test that deprecation warnings are issued for legacy parameters""" @@ -2195,6 +2198,7 @@ def test_return_aux_deprecation_warnings(self, device, dtype): @supported_platform @dtypes(*device_configs["cpu"].dtypes_fast) @dtypesIfCUDA(*device_configs["cuda"].dtypes_fast) + @dtypesIfXPU(*device_configs["xpu"].dtypes_fast) @skip_on_cpu def test_dynamic_divisibility_guards(self, device, dtype): """Test guards for divisible/non-divisible shape transitions""" @@ -4141,7 +4145,7 @@ def debug_compile_fx_inner(graph, example_inputs, *args, **kwargs): """\ class GraphModule(torch.nn.Module): def forward(self, primals_1: "f64[2, 2, 128, 4]", primals_2: "f64[2, 2, 128, 4]", primals_3: "f64[2, 2, 128, 4]", full: "i32[1, 1, 1]", full_default: "i32[1, 1, 1, 1]", convert_element_type: "i32[1, 1, 1]", convert_element_type_1: "i32[1, 1, 1, 1]", getitem_2: "f64[2, 2, 128, 4]", getitem_3: "f32[2, 2, 128]", tangents_1: "f64[2, 2, 128, 4]"): - full_default_4: "f32[2, 2, 128]" = torch.ops.aten.full.default([2, 2, 128], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + full_default_4: "f32[2, 2, 128]" = torch.ops.aten.full.default([2, 2, 128], 0, dtype = torch.float32, layout = torch.strided, device = device(type='GPU_TYPE', index=0), pin_memory = False) fw_graph0 = self.fw_graph0 joint_graph0 = self.joint_graph0 mask_graph0 = self.mask_graph0 @@ -4165,7 +4169,7 @@ def forward(self, arg0_1: "f64[]", arg1_1: "i32[]", arg2_1: "i32[]", arg3_1: "i3 class mask_graph0(torch.nn.Module): def forward(self, arg0_1: "i32[]", arg1_1: "i32[]", arg2_1: "i32[]", arg3_1: "i32[]"): - full_default: "b8[]" = torch.ops.aten.full.default([], True, dtype = torch.bool, layout = torch.strided, device = device(type='cuda', index=0), pin_memory = False) + full_default: "b8[]" = torch.ops.aten.full.default([], True, dtype = torch.bool, layout = torch.strided, device = device(type='GPU_TYPE', index=0), pin_memory = False) return full_default """.replace( # noqa: B950 "GPU_TYPE", torch.device(device).type From e1f0a6994391d4be7ebb60b3b46b9bdcfc0aa906 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 10 Sep 2025 06:55:35 +0000 Subject: [PATCH 032/693] Revert "test fixing benchmarks (#162503)" This reverts commit 484c4093a87a3e6767e55ed553f95db8fc137442. Reverted https://github.com/pytorch/pytorch/pull/162503 on behalf of https://github.com/huydhn due to Sorry for reverting your change but it regresses CPU perf smoke test ([comment](https://github.com/pytorch/pytorch/pull/162503#issuecomment-3273554680)) --- .ci/pytorch/macos-test.sh | 4 ++-- benchmarks/dynamo/common.py | 10 ++-------- .../inductor/aoti_package/model_package_loader.cpp | 11 ++--------- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh index 53f5b46714639..3437802da4eaa 100755 --- a/.ci/pytorch/macos-test.sh +++ b/.ci/pytorch/macos-test.sh @@ -386,10 +386,10 @@ elif [[ $TEST_CONFIG == *"perf_hf"* ]]; then test_hf_perf elif [[ $TEST_CONFIG == *"perf_timm"* ]]; then test_timm_perf -elif [[ $TEST_CONFIG == *"aot_inductor_perf_smoketest"* ]]; then - test_aoti_torchbench_smoketest "${SHARD_NUMBER}" elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then test_torchbench_smoketest "${SHARD_NUMBER}" +elif [[ $TEST_CONFIG == *"aot_inductor_perf_smoketest"* ]]; then + test_aoti_torchbench_smoketest "${SHARD_NUMBER}" elif [[ $TEST_CONFIG == *"mps"* ]]; then test_python_mps elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py index 83d55682247e7..2901009f7c4d1 100644 --- a/benchmarks/dynamo/common.py +++ b/benchmarks/dynamo/common.py @@ -1424,7 +1424,7 @@ def load(cls, model, example_inputs, mode): torch.hpu.max_memory_allocated() - pre_clone_memory_used ) / 1e9 - inductor_configs = {"aot_inductor.package_constants_in_so": False} + inductor_configs = {} if mode == "max-autotune": inductor_configs["max_autotune"] = True ep = torch.export.export( @@ -1439,14 +1439,8 @@ def load(cls, model, example_inputs, mode): ep, inductor_configs=inductor_configs ) # type: ignore[arg-type] - compiled = torch._inductor.aoti_load_package(package_path) - compiled.load_constants( - {**ep.state_dict, **ep.constants}, - check_full_update=False, - user_managed=True, - ) cls.cache[key] = ( - compiled, + torch._inductor.aoti_load_package(package_path), clone_memory_used, ) diff --git a/torch/csrc/inductor/aoti_package/model_package_loader.cpp b/torch/csrc/inductor/aoti_package/model_package_loader.cpp index 1fae20572b923..aa8ef905d57aa 100644 --- a/torch/csrc/inductor/aoti_package/model_package_loader.cpp +++ b/torch/csrc/inductor/aoti_package/model_package_loader.cpp @@ -721,15 +721,8 @@ void AOTIModelPackageLoader::load_constants( for (const auto& it : constants_map) { if (fqn_to_constant_name.find(it.first) != fqn_to_constant_name.end()) { updated_constants_map.emplace(fqn_to_constant_name[it.first], it.second); - } else if (check_full_update) { - std::string constant_fqns = ""; - for (const auto& it2 : fqn_to_constant_name) { - constant_fqns += it2.first + ", "; - } - throw std::runtime_error( - "The constant with FQN " + it.first + - " was not found in the model. The available constants are: " + - constant_fqns); + } else { + throw std::runtime_error("Constant not found: " + it.first); } } From 98e22c8a693644c6d235d7a858dc411b1aefafa7 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 10 Sep 2025 07:05:10 +0000 Subject: [PATCH 033/693] Skip test_ind_worker_queue on Windows and macOS (flaky) (#162555) Fixes https://github.com/pytorch/pytorch/issues/68643 It was closed by the bot yesterday and the issue was still there https://github.com/pytorch/pytorch/actions/runs/17595694816/job/49989589647. It's better to just skip it directly in the code as this test has been disabled on Windows and MacOS since 2021 O_o Pull Request resolved: https://github.com/pytorch/pytorch/pull/162555 Approved by: https://github.com/clee2000 --- test/test_dataloader.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/test_dataloader.py b/test/test_dataloader.py index 8c98181e8b99e..da0c120822448 100644 --- a/test/test_dataloader.py +++ b/test/test_dataloader.py @@ -25,6 +25,7 @@ from torch.testing._internal.common_utils import ( IS_CI, IS_JETSON, + IS_MACOS, IS_S390X, IS_SANDCASTLE, IS_WINDOWS, @@ -3472,6 +3473,10 @@ def _run_ind_worker_queue_test(self, batch_size, num_workers): if current_worker_idx == num_workers: current_worker_idx = 0 + @unittest.skipIf( + IS_WINDOWS or IS_MACOS, + "Flaky on Windows and MacOS https://github.com/pytorch/pytorch/issues/68643", + ) def test_ind_worker_queue(self): max_num_workers = None if hasattr(os, "sched_getaffinity"): From b2d8f6a6af09faa8590efcae1f8ef6f8d46e2ac8 Mon Sep 17 00:00:00 2001 From: FFFrog Date: Thu, 4 Sep 2025 19:23:18 +0800 Subject: [PATCH 034/693] [OpenReg] Update the docs about Accelerator Integration (#162046) Fix the issue describled by this [comment](https://github.com/pytorch/pytorch/pull/161845#discussion_r2317299390) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162046 Approved by: https://github.com/albanD --- docs/source/accelerator/index.md | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/docs/source/accelerator/index.md b/docs/source/accelerator/index.md index 4c604ba10b01a..68db62e075975 100644 --- a/docs/source/accelerator/index.md +++ b/docs/source/accelerator/index.md @@ -2,6 +2,10 @@ Since PyTorch 2.1, the community has made significant progress in streamlining the process of integrating new accelerators into the PyTorch ecosystem. These improvements include, but are not limited to: refinements to the `PrivateUse1` Dispatch Key, the introduction and enhancement of core subsystem extension mechanisms, and the device-agnostic refactoring of key modules (e.g., `torch.accelerator`, `memory management`). Taken together, these advances provide the foundation for a **robust**, **flexible**, and **developer-friendly** pathway for accelerator integration. +```{note} +This guide is a work in progress. For more details, please refer to the [roadmap](https://github.com/pytorch/pytorch/issues/158917). +``` + ## Why Does This Matter? This integration pathway offers several major benefits: @@ -10,16 +14,6 @@ This integration pathway offers several major benefits: * **Future-proofing**: This is the default integration path for all future PyTorch features, meaning that as new modules and features are added, they will automatically support scaling to new accelerators if this path is followed. * **Autonomy**: Vendors maintain full control over their accelerator integration timelines, enabling fast iteration cycles and reducing reliance on upstream coordination. -## About This Document - -This guide aims to provide a **comprehensive overview of the modern integration pathway** for new accelerator in PyTorch. It walks through the full integration surface, from low-level device primitives to higher-level domain modules like compilation and quantization. The structure follows a **modular and scenario-driven approach**, where each topic is paired with corresponding code examples from [torch_openreg][OpenReg URL], an official reference implementation. - -The goal is to help developers: - -* Understand the full scope of accelerator integration; -* Follow best practices to quickly launch new accelerators; -* Avoid common pitfalls through clear, targeted examples. - ## Target Audience This document is intended for: @@ -27,20 +21,22 @@ This document is intended for: * **Accelerator Developers** who are integrating accelerator into PyTorch; * **Advanced PyTorch Users** interested in the inner workings of key modules; -## Quick Overview +## About This Document -This document outlines the key processes and practical scenarios involved in integrating new devices into PyTorch, providing developers with a comprehensive and detailed guide for bringing up new backends. The discussion is structured around four major axes: +This guide aims to provide a **comprehensive overview of the modern integration pathway** for new accelerator in PyTorch. It walks through the full integration surface, from low-level device primitives to higher-level domain modules like compilation and quantization. The structure follows a **modular and scenario-driven approach**, where each topic is paired with corresponding code examples from [torch_openreg][OpenReg URL], an official reference implementation, and this series is structured around four major axes: * **Runtime**: Covers core components such as Event, Stream, Memory, Generator, Guard, Hooks, as well as the supporting C++ scaffolding. * **Operators**: Involve the minimum necessary set of operators, forward and backward operators, fallback operators, fallthroughs, STUBs, etc. in both C++ and Python implementations. * **Python Frontend**: Focuses on Python bindings for modules and device-agnostic APIs. * **High-level Modules**: Explores integration with major subsystems such as `AMP`, `Compiler`, `ONNX`, and `Distributed` and so on. -Next, we will officially embark on the integration journey for a new PyTorch accelerator. +The goal is to help developers: -```{note} -This guide is a work in progress. For more details, please refer to the [roadmap](https://github.com/pytorch/pytorch/issues/158917). -``` +* Understand the full scope of accelerator integration; +* Follow best practices to quickly launch new accelerators; +* Avoid common pitfalls through clear, targeted examples. + +Next, we will delve into each chapter of this guide. Each chapter focuses on a key aspect of integration, providing detailed explanations and illustrative examples. Since some chapters build upon previous ones, readers are encouraged to follow the sequence to achieve a more coherent understanding. ```{toctree} :glob: From be8095b07f789f3b79564b7e2a8f6dfb4f4c9103 Mon Sep 17 00:00:00 2001 From: fduwjj Date: Tue, 9 Sep 2025 22:00:28 -0700 Subject: [PATCH 035/693] [DeviceMesh] Clarifying flatten use case (#161311) Since we are in the middle of big refactoring and simplying the bookkeeping for device mesh. We found an interesting bug inside DeviceMesh flatten implementation. Here is the finding: 1. In unit test, we assume users can call `dp_cp_mesh._flatten()` many times but no backend will be created (aka cached). 2. From the implementation of slicing, we actually throw exception erroring out doing the `_flatten` more than once. But there is bug which was partially fixed in https://github.com/pytorch/pytorch/pull/160709 but it does not fixed the check for the case when we call the `_flatten` twice. What's more important question to ask is, what behavior we want for `_flatten`? Do we allow calling `_flatten` multiple times (with same mesh_name)? I think we should, why? 1. We allow slicing for the same mesh_name or name_list multiple times, and we cache the PG behinds. Although we will return a new device mesh object everytime, when we compare them they are all the same (according to __eq__). 2. We actually cached the flattened mesh today inside `root_to_flatten_mapping` and actually do the early return but that line will never be reached if we error out before that. Also we should allow a no-op for flatten a 1D mesh into itself's mesh_dim_name, I added a unit test for it. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161311 Approved by: https://github.com/fegin --- test/distributed/test_device_mesh.py | 16 ++++++++++++++++ torch/distributed/device_mesh.py | 15 ++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/test/distributed/test_device_mesh.py b/test/distributed/test_device_mesh.py index 693a63bd5d4ed..98557c9fe941a 100644 --- a/test/distributed/test_device_mesh.py +++ b/test/distributed/test_device_mesh.py @@ -825,6 +825,15 @@ def test_get_item_3d_noncontiguous_slicing(self): ): mesh_3d["cp", "dp"] + @with_comms + def test_flatten_mesh_1d(self): + mesh_shape = (4,) + mesh_dim_names = ("default",) + mesh_1d = init_device_mesh( + self.device_type, mesh_shape, mesh_dim_names=mesh_dim_names + ) + mesh_1d._flatten() + @with_comms def test_flatten_mesh_3d(self): mesh_shape = (2, 2, 2) @@ -833,6 +842,13 @@ def test_flatten_mesh_3d(self): self.device_type, mesh_shape, mesh_dim_names=mesh_dim_names ) + # Test flatten into an existing mesh_dim_name inside the mesh + with self.assertRaisesRegex( + RuntimeError, + "already exists for submesh of the DeviceMesh", + ): + mesh_3d._flatten("dp") + # Test flatten contiguous dims dp_cp_mesh = mesh_3d["dp", "cp"] flattened_dp_cp_mesh = dp_cp_mesh._flatten() diff --git a/torch/distributed/device_mesh.py b/torch/distributed/device_mesh.py index 1907f730c2e64..3a9363090bf71 100644 --- a/torch/distributed/device_mesh.py +++ b/torch/distributed/device_mesh.py @@ -7,7 +7,7 @@ import warnings from collections.abc import Iterator from functools import reduce -from itertools import chain, zip_longest +from itertools import zip_longest from typing import Optional, TYPE_CHECKING, Union import torch @@ -185,12 +185,15 @@ def create_flatten_mesh( if not mesh_dim_name: mesh_dim_name = "_".join(not_none(device_mesh.mesh_dim_names)) + # Flatten a 1D device mesh into its original mesh_dim_name will return itself. + if device_mesh.ndim == 1 and mesh_dim_name in not_none( + device_mesh.mesh_dim_names + ): + return device_mesh + # Check whether the mesh_dim_name for flattened mesh is valid. self.flatten_name_to_root_dims.setdefault(root_mesh, {}) - invalid_dim_names = chain( - list(not_none(root_mesh.mesh_dim_names)), - *self.flatten_name_to_root_dims[root_mesh].keys(), - ) + invalid_dim_names = not_none(root_mesh.mesh_dim_names) if mesh_dim_name in invalid_dim_names: raise RuntimeError( f"{mesh_dim_name} already exists for submesh of the {root_mesh}. ", @@ -199,8 +202,6 @@ def create_flatten_mesh( ) # Quick return if the flatten mesh has been created before. - # TODO: If we decide to restrict flatten initialization once, we should remove - # this check and throw an error if the flatten mesh is already created before. if ( root_mesh in self.root_to_flatten_mapping and mesh_dim_name in self.root_to_flatten_mapping[root_mesh] From 26b3ae58908becbb03b28636f7384d2972a8c9a5 Mon Sep 17 00:00:00 2001 From: Robert Hardwick Date: Tue, 9 Sep 2025 22:27:07 +0000 Subject: [PATCH 036/693] Move prioritized text linker optimization code from setup.py to cmake (#160078) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note. This is a replica PR of #155901 which will be closed. I had to create a new PR in order to add it into my ghstack as there are some later commits which depend on it. ### Summary 🚀 This PR moves the prioritized text linker optimization from setup.py to cmake ( and enables by default on Linux aarch64 systems ) This change consolidates what was previously manual CI logic into a single location (cmake), ensuring consistent behavior across local builds, CI pipelines, and developer environments. ### Motivation Prioritized text layout has measurable performance benefits on Arm systems by reducing code padding and improving cache utilization. This optimization was previously triggered manually via CI scripts (.ci/aarch64_linux/aarch64_ci_build.sh) or user-set environment variables. By detecting the target architecture within setup.py, this change enables the optimization automatically where applicable, improving maintainability and usability. Note: Due to ninja/cmake graph generation issues we cannot apply the linker file globally to all targets to the targets must be manually defined. See CMakeLists.txt the main libraries torch_python, torch, torch_cpu, torch_cuda, torch_xpu have been targetted which should be enough to maintain the performance benefits outlined above. Co-authored-by: Usamah Zaheer Pull Request resolved: https://github.com/pytorch/pytorch/pull/160078 Approved by: https://github.com/seemethere --- .ci/aarch64_linux/aarch64_ci_build.sh | 6 +- .ci/aarch64_linux/aarch64_wheel_ci_build.py | 2 +- .gitignore | 3 + CMakeLists.txt | 66 +++++++++++++++++++ cmake/Summary.cmake | 1 + cmake/public/utils.cmake | 20 ++++++ setup.py | 24 ------- tools/setup_helpers/generate_linker_script.py | 23 +++++++ 8 files changed, 116 insertions(+), 29 deletions(-) diff --git a/.ci/aarch64_linux/aarch64_ci_build.sh b/.ci/aarch64_linux/aarch64_ci_build.sh index bf8bab6dde232..9878c4afa3bfb 100644 --- a/.ci/aarch64_linux/aarch64_ci_build.sh +++ b/.ci/aarch64_linux/aarch64_ci_build.sh @@ -31,8 +31,7 @@ pip install -r /pytorch/requirements.txt pip install auditwheel==6.2.0 wheel if [ "$DESIRED_CUDA" = "cpu" ]; then echo "BASE_CUDA_VERSION is not set. Building cpu wheel." - #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files - USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn + python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn else echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA" export USE_SYSTEM_NCCL=1 @@ -49,6 +48,5 @@ else export USE_NVIDIA_PYPI_LIBS=1 fi - #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files - USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda + python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda fi diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py index 4bb9c64ea7772..e63ce012652aa 100755 --- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py +++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py @@ -319,7 +319,7 @@ def parse_arguments(): ).decode() print("Building PyTorch wheel") - build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " + build_vars = "" # MAX_JOB=5 is not required for CPU backend (see commit 465d98b) if enable_cuda: build_vars += "MAX_JOBS=5 " diff --git a/.gitignore b/.gitignore index d1fa4cd3caf28..774ce14f17d63 100644 --- a/.gitignore +++ b/.gitignore @@ -259,6 +259,9 @@ gen .pytest_cache aten/build/* +# Linker scripts for prioritized text optimization +cmake/linker_script.ld + # Bram plsdontbreak diff --git a/CMakeLists.txt b/CMakeLists.txt index dc5405ecef235..105e0db67ecd1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -379,6 +379,13 @@ cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler" OFF "USE_CUDA" OFF) cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON "CPU_AARCH64" OFF) +# prioritized text linker, ON by default for AArch64+Linux, option visible to all AArch64, x86 and ppc64le. +set(USE_PRIORITIZED_TEXT_DEFAULT OFF) +if(LINUX AND CPU_AARCH64) + set(USE_PRIORITIZED_TEXT_DEFAULT ON) +endif() +cmake_dependent_option(USE_PRIORITIZED_TEXT_FOR_LD "Use prioritized text linker for ld." + "${USE_PRIORITIZED_TEXT_DEFAULT}" "CPU_INTEL OR CPU_AARCH64 OR CPU_POWER" OFF) option(USE_MIMALLOC "Use mimalloc" OFF) # Enable third party mimalloc library to improve memory allocation performance @@ -657,6 +664,11 @@ endif(MSVC) string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all") +# Set linker max-page-size to 64KiB on AArch64 Linux +if(LINUX AND CPU_AARCH64) + add_link_options_if_supported("-z,max-page-size=0x10000") +endif() + # Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not # applicable to mobile are disabled by this variable. Setting # `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it @@ -1421,3 +1433,57 @@ if(BUILD_BUNDLE_PTXAS AND USE_CUDA) install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas" DESTINATION "${CMAKE_INSTALL_BINDIR}") endif() + +if(USE_PRIORITIZED_TEXT_FOR_LD) + add_compile_options( + $<$:-ffunction-sections> + $<$:-fdata-sections> + ) + set(LINKER_SCRIPT_FILE_OUT "${CMAKE_SOURCE_DIR}/cmake/linker_script.ld") + set(LINKER_SCRIPT_FILE_IN "${CMAKE_SOURCE_DIR}/cmake/prioritized_text.txt") + + add_custom_command( + OUTPUT "${LINKER_SCRIPT_FILE_OUT}" + COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py --filein "${LINKER_SCRIPT_FILE_IN}" --fout "${LINKER_SCRIPT_FILE_OUT}" + DEPENDS ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py "${LINKER_SCRIPT_FILE_IN}" + COMMENT "Generating prioritized text linker files" + VERBATIM + ) + + add_custom_target(generate_linker_script DEPENDS "${LINKER_SCRIPT_FILE_OUT}") + + if(BUILD_PYTHON) + set(LINKER_OPT_TARGETS torch_python) + endif() + + if(NOT BUILD_LIBTORCHLESS) + list(APPEND LINKER_OPT_TARGETS torch_cpu c10) + if(USE_CUDA) + list(APPEND LINKER_OPT_TARGETS torch_cuda c10_cuda) + endif() + if(USE_XPU) + list(APPEND LINKER_OPT_TARGETS torch_xpu c10_xpu) + endif() + if(USE_ROCM) + list(APPEND LINKER_OPT_TARGETS torch_hip c10_hip) + endif() + endif() + + foreach(tgt IN LISTS LINKER_OPT_TARGETS) + if(TARGET ${tgt}) + add_dependencies("${tgt}" generate_linker_script) + target_link_options_if_supported(${tgt} "-T,${LINKER_SCRIPT_FILE_OUT}") + set_property(TARGET ${tgt} APPEND PROPERTY LINK_DEPENDS "${LINKER_SCRIPT_FILE_OUT}") + else() + message(WARNING "Requested target '${tgt}' for linker script optimization was not found.") + endif() + endforeach() + +else() + if(LINUX AND CPU_AARCH64) + message(WARNING [[ + It is strongly recommend to enable linker script optimization for all AArch64 Linux builds. + To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1 + ]]) + endif() +endif() \ No newline at end of file diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index ffd4b5298a890..2e2fd370a994f 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -158,6 +158,7 @@ function(caffe2_print_configuration_summary) if(${USE_KLEIDIAI}) message(STATUS " USE_KLEIDIAI : ${USE_KLEIDIAI}") endif() + message(STATUS " USE_PRIORITIZED_TEXT_FOR_LD : ${USE_PRIORITIZED_TEXT_FOR_LD}") message(STATUS " USE_UCC : ${USE_UCC}") if(${USE_UCC}) message(STATUS " USE_SYSTEM_UCC : ${USE_SYSTEM_UCC}") diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake index 68e66bb3fc386..c96ffebf858e3 100644 --- a/cmake/public/utils.cmake +++ b/cmake/public/utils.cmake @@ -482,6 +482,7 @@ function(torch_update_find_cuda_flags) endfunction() include(CheckCXXCompilerFlag) +include(CheckLinkerFlag) ############################################################################## # CHeck if given flag is supported and append it to provided outputvar @@ -511,3 +512,22 @@ function(target_compile_options_if_supported target flag) target_compile_options(${target} PRIVATE ${flag}) endif() endfunction() + +# Check if a global link option is supported +function(add_link_options_if_supported flag) + check_linker_flag(C "LINKER:${flag}" _supported) + if("${_supported}") + add_link_options("LINKER:${flag}") + else() + message(WARNING "Attempted to use unsupported link option : ${flag}.") + endif() +endfunction() + +function(target_link_options_if_supported tgt flag) + check_linker_flag(C "LINKER:${flag}" _supported) + if("${_supported}") + target_link_options("${tgt}" PRIVATE "LINKER:${flag}") + else() + message(WARNING "Attempted to use unsupported link option : ${flag}.") + endif() +endfunction() \ No newline at end of file diff --git a/setup.py b/setup.py index c0523a1b5c601..2bb63a93cec83 100644 --- a/setup.py +++ b/setup.py @@ -227,9 +227,6 @@ # Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free. # By default, It is only enabled on Windows. # -# USE_PRIORITIZED_TEXT_FOR_LD -# Uses prioritized text form cmake/prioritized_text.txt for LD -# # BUILD_LIBTORCH_WHL # Builds libtorch.so and its dependencies as a wheel # @@ -323,7 +320,6 @@ IS_LINUX, IS_WINDOWS, ) -from tools.setup_helpers.generate_linker_script import gen_linker_script def str2bool(value: str | None) -> bool: @@ -1627,26 +1623,6 @@ def main() -> None: if BUILD_PYTHON_ONLY: install_requires += [f"{LIBTORCH_PKG_NAME}=={TORCH_VERSION}"] - if str2bool(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD")): - gen_linker_script( - filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld" - ) - linker_script_path = os.path.abspath("cmake/linker_script.ld") - os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}" - os.environ["CFLAGS"] = ( - os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections" - ) - os.environ["CXXFLAGS"] = ( - os.getenv("CXXFLAGS", "") + " -ffunction-sections -fdata-sections" - ) - elif platform.system() == "Linux" and platform.processor() == "aarch64": - print_box( - """ - WARNING: we strongly recommend enabling linker script optimization for ARM + CUDA. - To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1 - """ - ) - # Parse the command line and check the arguments before we proceed with # building deps and setup. We need to set values so `--help` works. dist = Distribution() diff --git a/tools/setup_helpers/generate_linker_script.py b/tools/setup_helpers/generate_linker_script.py index e66fc197062ad..b5a7a4ce7dec9 100644 --- a/tools/setup_helpers/generate_linker_script.py +++ b/tools/setup_helpers/generate_linker_script.py @@ -1,5 +1,7 @@ +import argparse import os import subprocess +from pathlib import Path def gen_linker_script( @@ -28,6 +30,10 @@ def gen_linker_script( assert len(text_line_start) == 1, "The linker script has multiple text sections!" text_line_start = text_line_start[0] + # ensure that parent directory exists before writing + fout = Path(fout) + fout.parent.mkdir(parents=True, exist_ok=True) + with open(fout, "w") as f: for lineid, line in enumerate(linker_script_lines): if lineid == text_line_start + 2: @@ -36,3 +42,20 @@ def gen_linker_script( f.write(f" .text.{plines}\n") f.write(" )\n") f.write(f"{line}\n") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Generate linker file based on prioritized symbols. Used for link-time optimization.", + ) + parser.add_argument( + "--filein", + help="Path to prioritized_text.txt input file", + default=argparse.SUPPRESS, + ) + parser.add_argument( + "--fout", help="Output path for linker ld file", default=argparse.SUPPRESS + ) + # convert args to a dict to pass to gen_linker_script + kwargs = vars(parser.parse_args()) + gen_linker_script(**kwargs) From f0ae3a57f62087e0cb552db1df75f6ebf7976b88 Mon Sep 17 00:00:00 2001 From: Menglu Yu Date: Wed, 10 Sep 2025 09:49:01 +0000 Subject: [PATCH 037/693] [Optimus] Add batch dropout pattern (#162443) Summary: We observe dropout pattern in AFOC, such add a new pattern to Optimus Test Plan: ``` buck2 test 'fbcode//mode/dev-nosan' fbcode//caffe2/test/inductor:group_batch_fusion -- test_batch_dropout_pre_grad_fusion ``` Buck UI: https://www.internalfb.com/buck2/2c899fb5-6e8b-43eb-8fb3-b53abfbfa6d9 Test UI: https://www.internalfb.com/intern/testinfra/testrun/15762598805248688 Network: Up: 0B Down: 0B (reSessionID-bfbb9e6a-7e2a-425a-a027-b44282cef419) Executing actions. Remaining 0/3 1.3s exec time total Command: test. Finished 2 local Time elapsed: 1:22.3s Tests finished: Pass 2. Fail 0. Fatal 0. Skip 0. Build failure 0 ### E2E baseline f791163796 proposal f793225207 Rollback Plan: Differential Revision: D81981264 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162443 Approved by: https://github.com/Yuzhen11, https://github.com/mlazos --- test/inductor/test_group_batch_fusion.py | 50 +++++++++++++++++++ .../_inductor/fx_passes/group_batch_fusion.py | 6 +++ 2 files changed, 56 insertions(+) diff --git a/test/inductor/test_group_batch_fusion.py b/test/inductor/test_group_batch_fusion.py index 090a7e8e29d3f..01c9962e0087b 100644 --- a/test/inductor/test_group_batch_fusion.py +++ b/test/inductor/test_group_batch_fusion.py @@ -286,6 +286,38 @@ def forward(self, x): return torch.stack((stack_input, stack_other), dim=0) +class TestDropout(torch.nn.Module): + def __init__(self, device): + super().__init__() + self.device = device + + def forward( + self, x: torch.Tensor + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + split = x.split([20, 20, 20, 20, 20], 1) + getitem_1 = split[0] + getitem_2 = split[1] + getitem_3 = split[2] + getitem_4 = split[3] + getitem_5 = split[4] + dropout = torch.nn.functional.dropout( + getitem_1, p=0.05, training=True, inplace=False + ) + dropout_1 = torch.nn.functional.dropout( + getitem_2, p=0.05, training=True, inplace=False + ) + dropout_2 = torch.nn.functional.dropout( + getitem_3, p=0.05, training=True, inplace=False + ) + dropout_3 = torch.nn.functional.dropout( + getitem_4, p=0.05, training=True, inplace=False + ) + dropout_4 = torch.nn.functional.dropout( + getitem_5, p=0.05, training=True, inplace=False + ) + return (dropout, dropout_1, dropout_2, dropout_3, dropout_4) + + class TestGroupBatchFusion(TestCase): def compare_dict_tensors(self, ref_dict, res_dict, rtol=1e-3, atol=1e-3): if len(set(ref_dict.keys())) != len(set(res_dict.keys())): @@ -581,6 +613,24 @@ def test_math_op_fusion(self): self.assertTrue(torch.allclose(ref, res)) counters.clear() + @requires_gpu() + @torch._inductor.config.patch( + pre_grad_fusion_options={ + "normalization_pass": {}, + "batch_dropout": {}, + } + ) + def test_batch_dropout_pre_grad_fusion(self): + counters.clear() + module = TestDropout(GPU_TYPE) + input = [torch.randn(10, 100, requires_grad=True, device=GPU_TYPE)] + traced = torch.compile(module) + module(*input) + traced(*input) + self.assertEqual(counters["inductor"]["normalization_pass"], 1) + self.assertEqual(counters["inductor"]["batch_dropout"], 1) + counters.clear() + class TestBMMFusionModule(torch.nn.Module): def __init__(self) -> None: diff --git a/torch/_inductor/fx_passes/group_batch_fusion.py b/torch/_inductor/fx_passes/group_batch_fusion.py index 3f8ebe0a7d57d..f081374585ee5 100644 --- a/torch/_inductor/fx_passes/group_batch_fusion.py +++ b/torch/_inductor/fx_passes/group_batch_fusion.py @@ -1141,6 +1141,12 @@ def __init__(self, **kwargs): super().__init__(torch.clamp, **kwargs) +@register_fusion("batch_dropout") +class BatchDropoutPreGradFusion(BatchMathOpsPreGradFusion): + def __init__(self, **kwargs): + super().__init__(torch.nn.functional.dropout, **kwargs) + + @register_fusion("batch_aten_tanh", pre_grad=False) class BatchTanhPostGradFusion(BatchPointwiseOpsPostGradFusion): def __init__(self, **kwargs) -> None: From d9832d8425376971c8f4ee790383a683cbb80d74 Mon Sep 17 00:00:00 2001 From: dolpm <34420038+dolpm@users.noreply.github.com> Date: Wed, 10 Sep 2025 09:49:08 +0000 Subject: [PATCH 038/693] [triton][export] serialization in internal path + unit tests (#162200) Summary: will package triton artifacts to be runnable in nativert if wrappers exist. Test Plan: unit tests Rollback Plan: Differential Revision: D81368559 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162200 Approved by: https://github.com/angelayi --- test/export/test_serialize.py | 3 +- torch/_export/serde/serialize.py | 77 ++++++++++++++++++++++++-------- 2 files changed, 61 insertions(+), 19 deletions(-) diff --git a/test/export/test_serialize.py b/test/export/test_serialize.py index ebc6e6d0672ea..f96fc4ade4377 100644 --- a/test/export/test_serialize.py +++ b/test/export/test_serialize.py @@ -673,7 +673,7 @@ def forward(self, x, y): kwargs.append(arg.arg) self.assertEqual(len(args), 4) - self.assertEqual(len(kwargs), 4) + self.assertEqual(len(kwargs), 5) for i in range(3): self.assertIsNotNone(args[i].as_tensor) @@ -686,6 +686,7 @@ def forward(self, x, y): self.assertEqual( kwargs[3].as_int, 8 if isinstance(m, MyModelAutotune) else 4 ) # num warps + self.assertEqual(kwargs[4].as_int, 0) # shared mem bytes self.assertEqual(len(triton_node.outputs), 1) self.assertIsNotNone(triton_node.outputs[0].as_tensors) diff --git a/torch/_export/serde/serialize.py b/torch/_export/serde/serialize.py index 07674b5702947..8cc139f7cf3d3 100644 --- a/torch/_export/serde/serialize.py +++ b/torch/_export/serde/serialize.py @@ -510,6 +510,59 @@ def __new__(metacls, name, bases, classdict): return type.__new__(metacls, name, bases, dict(classdict)) +def get_triton_kernel_and_cache_entry(node: torch.fx.Node): + assert ( + node.target + is torch._higher_order_ops.triton_kernel_wrap.triton_kernel_wrapper_functional + ) + + assert has_triton(), "triton required to serialize triton kernels" + from triton.runtime.autotuner import Autotuner + + assert isinstance(node.kwargs["kernel_idx"], int) + kernel = torch._higher_order_ops.triton_kernel_wrap.kernel_side_table.get_kernel( + node.kwargs["kernel_idx"] + ) + + kNumWarpsDefault = 4 + + # currently we only support specialization of + # num_warps -- so search for the entry that + # matches the value from the associated kernel + if isinstance(kernel, Autotuner): + assert len(kernel.configs) == 1 + num_warps = kernel.configs[0].num_warps + assert kernel.configs[0].num_ctas == 1, ( + "serialization only supports num_ctas == 1" + ) + kernel = kernel.fn + else: + num_warps = kNumWarpsDefault + + if hasattr(kernel, "device_caches"): + caches = kernel.device_caches + assert len(caches.keys()) == 1 + cache = next(iter(caches.values()))[0] + elif hasattr(kernel, "cache"): + # old path, still used for cpu triton builds + caches = kernel.cache + assert len(caches.keys()) == 1 + cache = next(iter(caches.values())) + else: + raise AssertionError(f"kernel caches not found for kernel {kernel.__name__}") + + # can also get num_warps, num_ctas, etc. from here ig + if len(cache.keys()) == 1: + return kernel, next(iter(cache.values())) + else: + for cache_entry in cache.values(): + if cache_entry.metadata.num_warps == num_warps: + return kernel, cache_entry + raise AssertionError( + f"couldn't find a kernel cache entry with metadata matching the autotuner configs for kernel {kernel.__name__}" + ) + + @final class GraphModuleSerializer(metaclass=Final): def __init__( @@ -676,8 +729,8 @@ def serialize_tensor_list_output(node): node.target is torch._higher_order_ops.triton_kernel_wrap.triton_kernel_wrapper_functional ): - assert has_triton(), "triton required to serialize triton kernels" - from triton.runtime.autotuner import Autotuner + kernel, kernel_cache_entry = get_triton_kernel_and_cache_entry(node) + kernel_cache_metadata = kernel_cache_entry.metadata meta_val = node.meta["val"] assert isinstance(meta_val, dict) @@ -685,21 +738,6 @@ def serialize_tensor_list_output(node): output_keys = meta_val.keys() output_indices = [] - assert isinstance(node.kwargs["kernel_idx"], int) - kernel = torch._higher_order_ops.triton_kernel_wrap.kernel_side_table.get_kernel( - node.kwargs["kernel_idx"] - ) - - if isinstance(kernel, Autotuner): - assert len(kernel.configs) == 1 - num_warps = kernel.configs[0].num_warps - assert kernel.configs[0].num_ctas == 1, ( - "serialization only supports num_ctas == 1" - ) - kernel = kernel.fn - else: - num_warps = 4 - constexpr_keys = set() for p in kernel.params: if p.is_constexpr: @@ -732,9 +770,12 @@ def serialize_tensor_list_output(node): "name": kernel.fn.__name__, "grid": node.kwargs["grid"][0], "output_indices": output_indices, - "num_warps": num_warps, + "num_warps": kernel_cache_metadata.num_warps, } + if hasattr(kernel_cache_metadata, "shared"): + kwargs_new["shared_memory_bytes"] = kernel_cache_metadata.shared + ex_node = Node( target=self.serialize_operator(node.target), inputs=self.serialize_hoo_inputs(args_new, kwargs_new), From b494547f0bd6cb1ce5d8d104cb419802434c9c08 Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Wed, 10 Sep 2025 09:08:29 +0200 Subject: [PATCH 039/693] Make functorch notebook symlinks PEP 517 valid (#157813) Pull Request resolved: https://github.com/pytorch/pytorch/pull/157813 Approved by: https://github.com/zou3519, https://github.com/atalman --- functorch/docs/source/notebooks | 1 - functorch/{ => docs/source}/notebooks/_src/plot_ensembling.py | 0 .../source}/notebooks/_src/plot_jacobians_and_hessians.py | 0 .../source}/notebooks/_src/plot_per_sample_gradients.py | 0 .../{ => docs/source}/notebooks/aot_autograd_optimizations.ipynb | 0 functorch/{ => docs/source}/notebooks/ensembling.ipynb | 0 functorch/{ => docs/source}/notebooks/jacobians_hessians.ipynb | 0 functorch/{ => docs/source}/notebooks/minifier.ipynb | 0 .../{ => docs/source}/notebooks/neural_tangent_kernels.ipynb | 0 functorch/{ => docs/source}/notebooks/per_sample_grads.ipynb | 0 functorch/{ => docs/source}/notebooks/whirlwind_tour.ipynb | 0 functorch/notebooks | 1 + 12 files changed, 1 insertion(+), 1 deletion(-) delete mode 120000 functorch/docs/source/notebooks rename functorch/{ => docs/source}/notebooks/_src/plot_ensembling.py (100%) rename functorch/{ => docs/source}/notebooks/_src/plot_jacobians_and_hessians.py (100%) rename functorch/{ => docs/source}/notebooks/_src/plot_per_sample_gradients.py (100%) rename functorch/{ => docs/source}/notebooks/aot_autograd_optimizations.ipynb (100%) rename functorch/{ => docs/source}/notebooks/ensembling.ipynb (100%) rename functorch/{ => docs/source}/notebooks/jacobians_hessians.ipynb (100%) rename functorch/{ => docs/source}/notebooks/minifier.ipynb (100%) rename functorch/{ => docs/source}/notebooks/neural_tangent_kernels.ipynb (100%) rename functorch/{ => docs/source}/notebooks/per_sample_grads.ipynb (100%) rename functorch/{ => docs/source}/notebooks/whirlwind_tour.ipynb (100%) create mode 120000 functorch/notebooks diff --git a/functorch/docs/source/notebooks b/functorch/docs/source/notebooks deleted file mode 120000 index d4082256dcfe3..0000000000000 --- a/functorch/docs/source/notebooks +++ /dev/null @@ -1 +0,0 @@ -../../notebooks/ \ No newline at end of file diff --git a/functorch/notebooks/_src/plot_ensembling.py b/functorch/docs/source/notebooks/_src/plot_ensembling.py similarity index 100% rename from functorch/notebooks/_src/plot_ensembling.py rename to functorch/docs/source/notebooks/_src/plot_ensembling.py diff --git a/functorch/notebooks/_src/plot_jacobians_and_hessians.py b/functorch/docs/source/notebooks/_src/plot_jacobians_and_hessians.py similarity index 100% rename from functorch/notebooks/_src/plot_jacobians_and_hessians.py rename to functorch/docs/source/notebooks/_src/plot_jacobians_and_hessians.py diff --git a/functorch/notebooks/_src/plot_per_sample_gradients.py b/functorch/docs/source/notebooks/_src/plot_per_sample_gradients.py similarity index 100% rename from functorch/notebooks/_src/plot_per_sample_gradients.py rename to functorch/docs/source/notebooks/_src/plot_per_sample_gradients.py diff --git a/functorch/notebooks/aot_autograd_optimizations.ipynb b/functorch/docs/source/notebooks/aot_autograd_optimizations.ipynb similarity index 100% rename from functorch/notebooks/aot_autograd_optimizations.ipynb rename to functorch/docs/source/notebooks/aot_autograd_optimizations.ipynb diff --git a/functorch/notebooks/ensembling.ipynb b/functorch/docs/source/notebooks/ensembling.ipynb similarity index 100% rename from functorch/notebooks/ensembling.ipynb rename to functorch/docs/source/notebooks/ensembling.ipynb diff --git a/functorch/notebooks/jacobians_hessians.ipynb b/functorch/docs/source/notebooks/jacobians_hessians.ipynb similarity index 100% rename from functorch/notebooks/jacobians_hessians.ipynb rename to functorch/docs/source/notebooks/jacobians_hessians.ipynb diff --git a/functorch/notebooks/minifier.ipynb b/functorch/docs/source/notebooks/minifier.ipynb similarity index 100% rename from functorch/notebooks/minifier.ipynb rename to functorch/docs/source/notebooks/minifier.ipynb diff --git a/functorch/notebooks/neural_tangent_kernels.ipynb b/functorch/docs/source/notebooks/neural_tangent_kernels.ipynb similarity index 100% rename from functorch/notebooks/neural_tangent_kernels.ipynb rename to functorch/docs/source/notebooks/neural_tangent_kernels.ipynb diff --git a/functorch/notebooks/per_sample_grads.ipynb b/functorch/docs/source/notebooks/per_sample_grads.ipynb similarity index 100% rename from functorch/notebooks/per_sample_grads.ipynb rename to functorch/docs/source/notebooks/per_sample_grads.ipynb diff --git a/functorch/notebooks/whirlwind_tour.ipynb b/functorch/docs/source/notebooks/whirlwind_tour.ipynb similarity index 100% rename from functorch/notebooks/whirlwind_tour.ipynb rename to functorch/docs/source/notebooks/whirlwind_tour.ipynb diff --git a/functorch/notebooks b/functorch/notebooks new file mode 120000 index 0000000000000..cacf787bdade8 --- /dev/null +++ b/functorch/notebooks @@ -0,0 +1 @@ +docs/source/notebooks \ No newline at end of file From 337fe1079dfec12f019e9f74512b5f546abcb8d5 Mon Sep 17 00:00:00 2001 From: Yiming Zhou Date: Wed, 10 Sep 2025 11:35:44 +0000 Subject: [PATCH 040/693] [nativert] AOTI delegate with flat inputs and outputs (#162538) Summary: `executorch_call_delegate` should have flattened inputs and outputs. So that it can be correctly serialized and the input/output specs are consistent with runtime. Test Plan: CI Rollback Plan: Differential Revision: D82064354 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162538 Approved by: https://github.com/dolpm --- torch/nativert/backends/lower_utils.py | 55 ++++++++++++++++++- .../nativert/backends/lowered_aoti_module.py | 4 +- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/torch/nativert/backends/lower_utils.py b/torch/nativert/backends/lower_utils.py index 2b337f4f2c9d4..9bbd3aad9b701 100644 --- a/torch/nativert/backends/lower_utils.py +++ b/torch/nativert/backends/lower_utils.py @@ -1,4 +1,7 @@ +import types + import torch +import torch.utils._pytree as pytree from torch.export import ExportedProgram from torch.export.pt2_archive._package import AOTI_FILES, package_pt2 from torch.types import FileLike @@ -6,6 +9,38 @@ from .lowered_aoti_module import LoweredBackendModule +def get_new_ep_with_flat_inputs_outputs(ep: ExportedProgram) -> ExportedProgram: + class FlattenedModule(torch.nn.Module): + def __init__( + self, + original_module: torch.fx.GraphModule, + in_spec: pytree.TreeSpec, + out_spec: pytree.TreeSpec, + ) -> None: + super().__init__() + self.original_module = original_module + self.in_spec = in_spec + self.out_spec = out_spec + + def forward(self, *flat_inputs): # type: ignore[no-untyped-def] + # Unflatten inputs to original structure + inputs = pytree.tree_unflatten(flat_inputs, self.in_spec) + args, kwargs = inputs + outputs = self.original_module(*args, **kwargs) + # Flatten outputs + flat_outputs, _ = pytree.tree_flatten(outputs) + return tuple(flat_outputs) + + flattened_module = FlattenedModule( + ep.module(), ep.call_spec.in_spec, ep.call_spec.out_spec + ) + args, kwargs = ep.example_inputs + flat_inputs, _ = pytree.tree_flatten((args, kwargs)) + flat_ep = torch.export.export(flattened_module, tuple(flat_inputs)) + + return flat_ep + + def lower_exported_program( exported_program: ExportedProgram, model_name: str, backend_id: str ) -> tuple[ExportedProgram, AOTI_FILES]: @@ -14,15 +49,31 @@ def lower_exported_program( with the `executorch_call_delegate` HOP """ args, kwargs = exported_program.example_inputs + out_spec = exported_program.call_spec.out_spec + flat_ep = get_new_ep_with_flat_inputs_outputs(exported_program) + flat_inputs, _ = pytree.tree_flatten((args, kwargs)) + aoti_files = torch._inductor.aot_compile( - exported_program.module(), args, kwargs, options={"aot_inductor.package": True} + flat_ep.module(), tuple(flat_inputs), options={"aot_inductor.package": True} ) assert isinstance(aoti_files, list) lowered_aoti_module = LoweredBackendModule( - exported_program, backend_id, module_name=model_name + flat_ep, backend_id, module_name=model_name ) + def patched_forward(self, *args, **kwargs): # type: ignore[no-untyped-def] + flat_inputs, _ = pytree.tree_flatten((args, kwargs)) + flat_outputs = torch._higher_order_ops.executorch_call_delegate( + self, *flat_inputs + ) + if out_spec is not None and flat_outputs is not None: + return pytree.tree_unflatten(flat_outputs, out_spec) + else: + return flat_outputs + + lowered_aoti_module.forward = types.MethodType(patched_forward, lowered_aoti_module) # type: ignore[method-assign] + aoti_delegate_ep = torch.export.export(lowered_aoti_module, args, kwargs) return aoti_delegate_ep, aoti_files diff --git a/torch/nativert/backends/lowered_aoti_module.py b/torch/nativert/backends/lowered_aoti_module.py index b0de0e3a26d1b..b3379f3b1a94e 100644 --- a/torch/nativert/backends/lowered_aoti_module.py +++ b/torch/nativert/backends/lowered_aoti_module.py @@ -29,5 +29,5 @@ def module_name(self) -> Optional[str]: def original_module(self) -> ExportedProgram: return self._original_exported_program - def forward(self, *args): # type: ignore[no-untyped-def] - return torch._higher_order_ops.executorch_call_delegate(self, *args) + def forward(self, *args, **kwargs): # type: ignore[no-untyped-def] + return torch._higher_order_ops.executorch_call_delegate(self, *args, **kwargs) From a6f9e0e62ae25d8e125b588ca48d90c4785ad407 Mon Sep 17 00:00:00 2001 From: Tan Hoang Date: Wed, 10 Sep 2025 11:35:45 +0000 Subject: [PATCH 041/693] [c10d][nvshmem] fix override function modifier (#162515) Summary: Fix compilation error in fbsource by missing override modifier Differential Revision: D82038876 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162515 Approved by: https://github.com/Skylion007, https://github.com/kwen2501 --- torch/csrc/distributed/c10d/symm_mem/NVSHMEMSymmetricMemory.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/csrc/distributed/c10d/symm_mem/NVSHMEMSymmetricMemory.cu b/torch/csrc/distributed/c10d/symm_mem/NVSHMEMSymmetricMemory.cu index f3a63b1c2d11c..f3ba9763f2a6a 100644 --- a/torch/csrc/distributed/c10d/symm_mem/NVSHMEMSymmetricMemory.cu +++ b/torch/csrc/distributed/c10d/symm_mem/NVSHMEMSymmetricMemory.cu @@ -240,7 +240,7 @@ class NVSHMEMSymmetricMemory : public SymmetricMemory { return pai_->rank_to_global_rank_dev_; }; - bool world_within_direct_access() { + bool world_within_direct_access() override { return pai_->world_within_cuda_p2p_; } From c2388201fc85b0748173212de5a17514c7a71f21 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 10 Sep 2025 12:25:42 +0000 Subject: [PATCH 042/693] Fix decorators skipping NCCL tests (#158846) Avoid failures caused by tests exiting via sys.exit instead of `unittest.skip` In particular it will not try to start the test (causing forks into subprocess) just to stop them (killing the subprocess) which is done in the test setup Using `unittest.skip` decorators avoids the starting of the test in the first place. Pull Request resolved: https://github.com/pytorch/pytorch/pull/158846 Approved by: https://github.com/Skylion007 --- .../fsdp/test_fully_shard_logging.py | 6 +- test/distributed/test_functional_api.py | 25 ++---- torch/testing/_internal/common_distributed.py | 89 ++++++------------- .../_shard/sharded_tensor/__init__.py | 9 +- .../distributed/_tensor/common_dtensor.py | 7 +- .../_internal/distributed/distributed_test.py | 18 ++-- 6 files changed, 51 insertions(+), 103 deletions(-) diff --git a/test/distributed/_composable/fsdp/test_fully_shard_logging.py b/test/distributed/_composable/fsdp/test_fully_shard_logging.py index c9450a2b8f475..9b666eb55ba08 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_logging.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_logging.py @@ -1,7 +1,7 @@ # Owner(s): ["module: fsdp"] import functools import os -import unittest.mock +import unittest import torch.distributed as dist from torch._dynamo.test_case import run_tests @@ -37,9 +37,9 @@ def test_fsdp_logging(self): import torch.distributed as dist import torch.nn as nn from torch.distributed.fsdp import fully_shard -logger = logging.getLogger("torch.distributed._composable.fsdp") +logger = logging.getLogger("torch.distributed.fsdp.fully_shard") logger.setLevel(logging.DEBUG) -device = {device_type.type} +device = '{device_type.type}' torch.manual_seed(0) model = nn.Sequential(*[nn.Linear(4, 4, device=device, bias=False) for _ in range(2)]) for layer in model: diff --git a/test/distributed/test_functional_api.py b/test/distributed/test_functional_api.py index b5522fe2bef06..a21eb0dbf4447 100644 --- a/test/distributed/test_functional_api.py +++ b/test/distributed/test_functional_api.py @@ -13,6 +13,7 @@ from torch._inductor.utils import run_and_get_code from torch.testing import FileCheck from torch.testing._internal.common_device_type import instantiate_device_type_tests +from torch.testing._internal.common_distributed import exit_if_lt_x_accelerators from torch.testing._internal.inductor_utils import HAS_GPU @@ -24,7 +25,7 @@ DistributedTestBase, MultiThreadedTestCase, requires_accelerator_dist_backend, - TEST_SKIPS, + skip_if_no_gpu, ) from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, @@ -479,25 +480,14 @@ def allred_mesh_dim(input): BACKEND = dist.Backend.XCCL -# allows you to check for multiple accelerator irrespective of device type -# to add new device types to this check simply follow the same format -# and append an elif with the conditional and appropriate device count function for your new device -def exit_if_lt_x_accelerators(x): - if torch.accelerator.is_available(): - if torch.accelerator.device_count() < x: - sys.exit(TEST_SKIPS[f"multi-accelerator-{x}"].exit_code) - - def with_comms(func=None): if func is None: return partial(with_comms) @wraps(func) def wrapper(self, *args, **kwargs): - if ( - BACKEND == dist.Backend.NCCL or BACKEND == dist.Backend.XCCL - ) and torch.accelerator.device_count() < self.world_size: - sys.exit(TEST_SKIPS[f"multi-gpu-{self.world_size}"].exit_code) + if BACKEND in (dist.Backend.NCCL, dist.Backend.XCCL): + exit_if_lt_x_accelerators(self.world_size) kwargs["device"] = DEVICE self.pg = self.create_pg(device=DEVICE) @@ -510,9 +500,9 @@ def wrapper(self, *args, **kwargs): class TestCollectivesWithDistributedBackend(DistributedTestBase): + @skip_if_no_gpu @with_comms() def test_all_gather_into_tensor_coalesced(self, device): - exit_if_lt_x_accelerators(self.world_size) tensors = [ torch.ones([4], device=device), torch.ones([4], device=device) + 1, @@ -584,9 +574,8 @@ def allreduce(t, pg): compiled_allreduce(torch.randn(8, device=device), self.pg) @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") + @skip_if_no_gpu def test_tracing_with_fakepg(self, device=DEVICE): - exit_if_lt_x_accelerators(self.world_size) - def allreduce(t, pg): return ft_c.all_reduce(t, "sum", pg) @@ -627,9 +616,9 @@ class TestDistributedBackendCollectivesWithWorldSize4( def world_size(self): return 4 + @skip_if_no_gpu @with_comms() def test_permute_tensor_with_sub_group(self, device): - exit_if_lt_x_accelerators(self.world_size) mesh_dim_names = ["dp", "tp"] mesh_2d = dt.init_device_mesh( diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py index c1f75697fe889..d9d07dddea3d8 100644 --- a/torch/testing/_internal/common_distributed.py +++ b/torch/testing/_internal/common_distributed.py @@ -118,14 +118,26 @@ def requires_ddp_rank(device): return device in DDP_RANK_DEVICES +def exit_if_lt_x_cuda_devs(x): + """Exit process unless at least the given number of CUDA devices are available""" + if torch.cuda.device_count() < x: + sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code) + + +# allows you to check for multiple accelerator irrespective of device type +# to add new device types to this check simply follow the same format +# and append an elif with the conditional and appropriate device count function for your new device +def exit_if_lt_x_accelerators(x): + if torch.accelerator.device_count() < x: + sys.exit(TEST_SKIPS[f"multi-accelerator-{x}"].exit_code) + + def skip_if_no_gpu(func): """Skips if the world size exceeds the number of GPUs, ensuring that if the test is run, each rank has its own GPU via ``torch.cuda.device(rank)``.""" @wraps(func) def wrapper(*args, **kwargs): - if not (TEST_CUDA or TEST_HPU or TEST_XPU): - sys.exit(TEST_SKIPS["no_cuda"].exit_code) world_size = int(os.environ["WORLD_SIZE"]) if TEST_CUDA and torch.cuda.device_count() < world_size: sys.exit(TEST_SKIPS[f"multi-gpu-{world_size}"].exit_code) @@ -136,7 +148,9 @@ def wrapper(*args, **kwargs): return func(*args, **kwargs) - return wrapper + return unittest.skipUnless( + TEST_CUDA or TEST_HPU or TEST_XPU, TEST_SKIPS["no_cuda"].message + )(wrapper) # TODO (kwen2501): what is the purpose of this decorator? Tests with this @@ -168,33 +182,16 @@ def wrapper(*args, **kwargs): def require_n_gpus_for_nccl_backend(n, backend): - def decorator(func): - @wraps(func) - def wrapper(*args, **kwargs): - if backend == "nccl" and torch.cuda.device_count() < n: - sys.exit(TEST_SKIPS[f"multi-gpu-{n}"].exit_code) - else: - return func(*args, **kwargs) - - return wrapper - - return decorator + return skip_if_lt_x_gpu(n) if backend == "nccl" else unittest.skipIf(False, None) def import_transformers_or_skip(): - def decorator(func): - @wraps(func) - def wrapper(*args, **kwargs): - try: - from transformers import AutoModelForMaskedLM, BertConfig # noqa: F401 - - return func(*args, **kwargs) - except ImportError: - sys.exit(TEST_SKIPS["importerror"].exit_code) - - return wrapper + try: + from transformers import AutoModelForMaskedLM, BertConfig # noqa: F401 - return decorator + return unittest.skipIf(False) + except ImportError: + return unittest.skip(TEST_SKIPS["importerror"].message) def at_least_x_gpu(x): @@ -208,36 +205,7 @@ def at_least_x_gpu(x): def skip_if_lt_x_gpu(x): - def decorator(func): - @wraps(func) - def wrapper(*args, **kwargs): - if torch.cuda.is_available() and torch.cuda.device_count() >= x: - return func(*args, **kwargs) - if TEST_HPU and torch.hpu.device_count() >= x: - return func(*args, **kwargs) - if TEST_XPU and torch.xpu.device_count() >= x: - return func(*args, **kwargs) - sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code) - - return wrapper - - return decorator - - -# This decorator helps avoiding initializing cuda while testing other backends -def nccl_skip_if_lt_x_gpu(backend, x): - def decorator(func): - @wraps(func) - def wrapper(*args, **kwargs): - if backend != "nccl": - return func(*args, **kwargs) - if torch.cuda.is_available() and torch.cuda.device_count() >= x: - return func(*args, **kwargs) - sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code) - - return wrapper - - return decorator + return unittest.skipUnless(at_least_x_gpu(x), TEST_SKIPS[f"multi-gpu-{x}"].message) def verify_ddp_error_logged(model_DDP, err_substr): @@ -424,14 +392,7 @@ def requires_multicast_support(): def skip_if_rocm_multiprocess(func): """Skips a test for ROCm""" func.skip_if_rocm_multiprocess = True - - @wraps(func) - def wrapper(*args, **kwargs): - if not TEST_WITH_ROCM: - return func(*args, **kwargs) - sys.exit(TEST_SKIPS["skipIfRocm"].exit_code) - - return wrapper + return unittest.skipUnless(TEST_WITH_ROCM, TEST_SKIPS["skipIfRocm"].message)(func) def skip_if_win32(): diff --git a/torch/testing/_internal/distributed/_shard/sharded_tensor/__init__.py b/torch/testing/_internal/distributed/_shard/sharded_tensor/__init__.py index 60c744ac1a84c..a0a38837c14b2 100644 --- a/torch/testing/_internal/distributed/_shard/sharded_tensor/__init__.py +++ b/torch/testing/_internal/distributed/_shard/sharded_tensor/__init__.py @@ -7,8 +7,9 @@ import torch.distributed as dist from torch.distributed import rpc from torch.testing._internal.common_distributed import ( + exit_if_lt_x_cuda_devs, MultiProcessTestCase, - TEST_SKIPS, + require_n_gpus_for_nccl_backend, tp_transports, ) @@ -94,10 +95,10 @@ def with_comms(func=None, init_rpc=True, backend="nccl"): @wraps(func) def wrapper(self, *args, **kwargs): - if backend == "nccl" and torch.cuda.device_count() < self.world_size: - sys.exit(TEST_SKIPS[f"multi-gpu-{self.world_size}"].exit_code) + if backend == "nccl": + exit_if_lt_x_cuda_devs(self.world_size) self.init_comms(init_rpc=init_rpc, backend=backend) func(self, *args, **kwargs) self.destroy_comms(destroy_rpc=init_rpc) - return wrapper + return require_n_gpus_for_nccl_backend(1, backend)(wrapper) diff --git a/torch/testing/_internal/distributed/_tensor/common_dtensor.py b/torch/testing/_internal/distributed/_tensor/common_dtensor.py index e25e08fbf5090..9758fa5d1e7d3 100644 --- a/torch/testing/_internal/distributed/_tensor/common_dtensor.py +++ b/torch/testing/_internal/distributed/_tensor/common_dtensor.py @@ -3,7 +3,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates import itertools -import sys from collections.abc import Iterator, Sequence from dataclasses import dataclass from functools import partial, wraps @@ -31,12 +30,12 @@ SequenceParallel, ) from torch.testing._internal.common_distributed import ( + exit_if_lt_x_cuda_devs, MultiProcContinuousTest, MultiProcessTestCase, MultiThreadedTestCase, run_subtests, skip_if_lt_x_gpu, - TEST_SKIPS, ) from torch.testing._internal.common_utils import TEST_CUDA, TEST_HPU, TEST_XPU from torch.utils._pytree import tree_flatten, tree_unflatten, TreeSpec @@ -374,8 +373,8 @@ def build_device_mesh(self) -> DeviceMesh: return init_device_mesh(self.device_type, (self.world_size,)) def init_pg(self, eager_init, backend: Optional[str] = None) -> None: - if "nccl" in self.backend and torch.cuda.device_count() < self.world_size: - sys.exit(TEST_SKIPS[f"multi-gpu-{self.world_size}"].exit_code) + if "nccl" in self.backend: + exit_if_lt_x_cuda_devs(self.world_size) if backend is None: backend = self.backend diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py index 024fd47285ae8..21d51b66ad03a 100644 --- a/torch/testing/_internal/distributed/distributed_test.py +++ b/torch/testing/_internal/distributed/distributed_test.py @@ -59,10 +59,10 @@ captured_output, cleanup_temp_dir, DistTestCases, + exit_if_lt_x_cuda_devs, init_multigpu_helper, initialize_temp_directories, MultiProcessTestCase, - nccl_skip_if_lt_x_gpu, require_n_gpus_for_nccl_backend, requires_nccl_version, simple_sparse_reduce_tests, @@ -609,10 +609,8 @@ def _run(cls, rank, test_name, file_name, pipe, **kwargs): self.rank = rank self.file_name = file_name - if torch.cuda.is_available() and torch.cuda.device_count() < int( - self.world_size - ): - sys.exit(TEST_SKIPS[f"multi-gpu-{self.world_size}"].exit_code) + if torch.cuda.is_available(): + exit_if_lt_x_cuda_devs(int(self.world_size)) try: pg_timeout_seconds = CUSTOM_PG_TIMEOUT.get(test_name, default_pg_timeout) timeout = timedelta(seconds=pg_timeout_seconds) @@ -5344,7 +5342,7 @@ def step_model(model, input, target): BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", "get_future is only supported on mpi, nccl and gloo", ) - @nccl_skip_if_lt_x_gpu(BACKEND, 2) + @require_n_gpus_for_nccl_backend(2, BACKEND) def test_accumulate_gradients_no_sync(self): """ Runs _test_accumulate_gradients_no_sync using default inputs @@ -5355,7 +5353,7 @@ def test_accumulate_gradients_no_sync(self): BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", "get_future is only supported on mpi, nccl and gloo", ) - @nccl_skip_if_lt_x_gpu(BACKEND, 2) + @require_n_gpus_for_nccl_backend(2, BACKEND) def test_accumulate_gradients_no_sync_grad_is_view(self): """ Runs _test_accumulate_gradients_no_sync using default inputs @@ -5366,7 +5364,7 @@ def test_accumulate_gradients_no_sync_grad_is_view(self): BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", "get_future is only supported on mpi, nccl and gloo", ) - @nccl_skip_if_lt_x_gpu(BACKEND, 2) + @require_n_gpus_for_nccl_backend(2, BACKEND) def test_accumulate_gradients_no_sync_allreduce_hook(self): """ Runs multiple iterations on _test_accumulate_gradients_no_sync @@ -5394,7 +5392,7 @@ def allreduce_hook( BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", "get_future is only supported on mpi, nccl and gloo", ) - @nccl_skip_if_lt_x_gpu(BACKEND, 2) + @require_n_gpus_for_nccl_backend(2, BACKEND) def test_accumulate_gradients_no_sync_allreduce_with_then_hook(self): """ Runs multiple iterations on _test_accumulate_gradients_no_sync using allreduce @@ -5428,7 +5426,7 @@ def div(fut): BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", "get_future is only supported on mpi, nccl and gloo", ) - @nccl_skip_if_lt_x_gpu(BACKEND, 2) + @require_n_gpus_for_nccl_backend(2, BACKEND) def test_get_future(self): def mult(fut): return [t * 3 for t in fut.wait()] From fc1b09a52ab995559b7825240031ad66c1ba0166 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 10 Sep 2025 14:12:22 +0000 Subject: [PATCH 043/693] Revert "Fix DCE eliminating in-place operations by improving Node.is_impure() (#162267)" This reverts commit b9a7d0e13b4a34be83c778734dbad437c7c5117b. Reverted https://github.com/pytorch/pytorch/pull/162267 on behalf of https://github.com/malfet due to Not sure how it happened, but looks like it broke everything, see https://hud.pytorch.org/hud/pytorch/pytorch/c2388201fc85b0748173212de5a17514c7a71f21/1?per_page=50 ([comment](https://github.com/pytorch/pytorch/pull/162267#issuecomment-3275164109)) --- test/fx/test_dce_pass.py | 88 +--------------------------------------- torch/fx/node.py | 28 ------------- 2 files changed, 2 insertions(+), 114 deletions(-) diff --git a/test/fx/test_dce_pass.py b/test/fx/test_dce_pass.py index 5c0230e14c432..7fd3a6dbb0041 100644 --- a/test/fx/test_dce_pass.py +++ b/test/fx/test_dce_pass.py @@ -1,6 +1,5 @@ # Owner(s): ["module: fx"] import copy -import inspect import unittest from typing import Optional @@ -39,39 +38,12 @@ def _get_num_placeholders(self, m: torch.fx.GraphModule) -> int: count += 1 return count - @torch.compiler.disable - def _trace_with_dynamo(self, m: torch.nn.Module) -> torch.fx.GraphModule: - """Dynamo will keep in-place operations, whereas torch.fx.Tracer will remove them.""" - graph_module: torch.fx.GraphModule | None = None - - def _backend(gm: torch.fx.GraphModule, _): - nonlocal graph_module - graph_module = gm - return gm - - inputs = [ - torch.tensor([1.5]) - for _ in range(len(inspect.signature(m.forward).parameters)) - ] - torch.compile( - m, - backend=_backend, - fullgraph=True, - )(*inputs) - assert graph_module is not None - - # TorchDynamo returns a graph with flattened output; unflatten here for the test - graph_module.graph.output_node().args = graph_module.graph.output_node().args[0] - graph_module.recompile() - return graph_module - def _run_dce_and_test( self, m: torch.nn.Module, expect_dce_changes: bool, modules_to_be_leafs: Optional[set[type]] = None, custom: bool = False, - use_dynamo_for_tracing: bool = False, ): class TestTracer(torch.fx.Tracer): def is_leaf_module(self, m, qualname): @@ -79,12 +51,7 @@ def is_leaf_module(self, m, qualname): return True return super().trace(m, qualname) - if use_dynamo_for_tracing: - traced = self._trace_with_dynamo(m) - else: - traced: torch.fx.GraphModule = torch.fx.GraphModule( - m, TestTracer().trace(m) - ) + traced: torch.fx.GraphModule = torch.fx.GraphModule(m, TestTracer().trace(m)) print(str(traced.graph)) # Verify there are nodes without users (if expected). @@ -113,7 +80,7 @@ def is_leaf_module(self, m, qualname): traced.recompile() # Make sure we run and get the same results before/after DCE. - inputs = [torch.tensor([1.5]) for _ in range(new_num_phs)] + inputs = [torch.tensor([1.5])] * new_num_phs inputs_copy = copy.deepcopy(inputs) self.assertTrue(torch.equal(m(*inputs), traced(*inputs_copy))) @@ -215,57 +182,6 @@ def forward(self, a: torch.Tensor) -> torch.Tensor: TestModule(), expect_dce_changes=False, modules_to_be_leafs={ReLUImpure} ) - def test_keep_inplace_with_side_effects(self): - """ - Test that DCE doesn't remove an inplace operation. - """ - - class TestModule(torch.nn.Module): - def forward(self, x: torch.Tensor) -> torch.Tensor: - x.add_(2) - y = 2 * x - x.add_(y) - return y - - self._run_dce_and_test(TestModule(), expect_dce_changes=False) - - def test_keep_inplace_python_operator_with_side_effects(self): - """ - Test that DCE doesn't remove an inplace operation. - """ - - class TestModule(torch.nn.Module): - def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: - x += y - x //= y - x %= y - x *= y - x -= y - x /= y - x @= y - - x = x.reshape_as(y) - concat_a = [x] - concat_b = [y] - concat_a += concat_b - - a = x.to(dtype=torch.long) - b = y.to(dtype=torch.long) - - a //= b - a <<= b - a %= b - a |= b - a **= b - a >>= b - a ^= b - - return x + y + concat_a[0] + a + b - - self._run_dce_and_test( - TestModule(), expect_dce_changes=False, use_dynamo_for_tracing=True - ) - def test_keep_torch_assert(self): """ Test that DCE doesn't remove torch._assert since it has side effects. diff --git a/torch/fx/node.py b/torch/fx/node.py index 19eb9a0e46aa4..dbd6ed93ef26c 100644 --- a/torch/fx/node.py +++ b/torch/fx/node.py @@ -84,23 +84,6 @@ torch.amp._exit_autocast, ] -_side_effect_inplace: set[Callable[..., Any]] = { - operator.iadd, - operator.iand, - operator.iconcat, - operator.ifloordiv, - operator.ilshift, - operator.imod, - operator.imul, - operator.imatmul, - operator.ior, - operator.ipow, - operator.irshift, - operator.isub, - operator.itruediv, - operator.ixor, -} - # TODO: Either refactor this into 2 functions 1 dce for functional graphs and 1 dce for all graphs, # or add logic to correctly mark all inplace ops as side effectful. _side_effectful_functions: set[Callable[..., Any]] = { @@ -116,7 +99,6 @@ _ops.profiler._record_function_exit, _ops.inductor.accumulate_grad_.default, operator.setitem, - *_side_effect_inplace, *_side_effectful_need_to_be_preserved_pre_dispatch, } @@ -831,16 +813,6 @@ def is_impure(self, impure_random: bool = True) -> bool: ) return getattr(target_mod, "_is_impure", False) - if self.op == "call_method": - target_name = ( - self.target - if isinstance(self.target, str) - else torch.typename(self.target) - ) - # Check for functions with names ending in an underscore (e.g., 'add_') that are inplace in torch - if target_name.endswith("_"): - return True - return False @compatibility(is_backward_compatible=False) From de05dbc39c0960348a1df91ea614879aee81cc18 Mon Sep 17 00:00:00 2001 From: "Tugsbayasgalan (Tugsuu) Manlaibaatar" Date: Wed, 10 Sep 2025 14:19:34 +0000 Subject: [PATCH 044/693] Replace export_for_training with export (#162396) Summary: replace export_for_training with epxort Test Plan: CI Rollback Plan: Differential Revision: D81935792 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162396 Approved by: https://github.com/angelayi, https://github.com/jerryzh168 --- .../tensor/test_dtensor_compile.py | 2 +- test/export/test_db.py | 8 +- test/export/test_experimental.py | 10 +-- test/export/test_export.py | 80 +++++++++---------- .../test_export_training_ir_to_run_decomp.py | 6 +- test/export/test_serialize.py | 70 ++++++++-------- test/export/test_torchbind.py | 16 ++-- test/export/test_unflatten_training_ir.py | 4 +- test/export/test_verifier.py | 24 +++--- test/fx/test_matcher_utils.py | 14 ++-- test/inductor/test_aot_inductor.py | 6 +- test/quantization/pt2e/test_duplicate_dq.py | 4 +- .../pt2e/test_metadata_porting.py | 2 +- .../pt2e/test_numeric_debugger.py | 24 +++--- test/quantization/pt2e/test_quantize_pt2e.py | 44 +++++----- .../pt2e/test_quantize_pt2e_qat.py | 26 +++--- test/quantization/pt2e/test_representation.py | 4 +- .../pt2e/test_x86inductor_quantizer.py | 6 +- .../pt2e/test_xnnpack_quantizer.py | 16 ++-- test/test_model_exports_to_core_aten.py | 4 +- torch/ao/quantization/pt2e/lowering.py | 2 +- torch/ao/quantization/pt2e/utils.py | 2 +- torch/distributed/pipelining/_IR.py | 4 +- .../testing/_internal/common_quantization.py | 17 ++-- 24 files changed, 180 insertions(+), 215 deletions(-) diff --git a/test/distributed/tensor/test_dtensor_compile.py b/test/distributed/tensor/test_dtensor_compile.py index 15e3daf6b9413..fa4c217716b2c 100644 --- a/test/distributed/tensor/test_dtensor_compile.py +++ b/test/distributed/tensor/test_dtensor_compile.py @@ -183,7 +183,7 @@ def forward(self, x): ) torch.utils._pytree.register_constant(DeviceMesh) - ep = torch.export.export_for_training( + ep = torch.export.export( Foo(), (torch.randn(4, 4, dtype=torch.float64),), strict=False ) self.assertExpectedInline( diff --git a/test/export/test_db.py b/test/export/test_db.py index a035bdd239167..e783b3152abe2 100644 --- a/test/export/test_db.py +++ b/test/export/test_db.py @@ -9,7 +9,7 @@ filter_examples_by_support_level, get_rewrite_cases, ) -from torch.export import export_for_training +from torch.export import export from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, IS_WINDOWS, @@ -35,7 +35,7 @@ def test_exportdb_supported(self, name: str, case: ExportCase) -> None: kwargs_export = case.example_kwargs args_model = copy.deepcopy(args_export) kwargs_model = copy.deepcopy(kwargs_export) - exported_program = export_for_training( + exported_program = export( model, args_export, kwargs_export, @@ -68,7 +68,7 @@ def test_exportdb_not_supported(self, name: str, case: ExportCase) -> None: with self.assertRaises( (torchdynamo.exc.Unsupported, AssertionError, RuntimeError) ): - export_for_training( + export( model, case.example_args, case.example_kwargs, @@ -94,7 +94,7 @@ def test_exportdb_not_supported_rewrite( self, name: str, rewrite_case: ExportCase ) -> None: # pyre-ignore - export_for_training( + export( rewrite_case.model, rewrite_case.example_args, rewrite_case.example_kwargs, diff --git a/test/export/test_experimental.py b/test/export/test_experimental.py index 871dc813a687f..f47e787599f6e 100644 --- a/test/export/test_experimental.py +++ b/test/export/test_experimental.py @@ -9,7 +9,7 @@ import torch._dynamo from torch._dynamo.test_case import run_tests, TestCase from torch._functorch.aot_autograd import aot_export_module -from torch.export import export, export_for_training +from torch.export import export from torch.export.experimental import _export_forward_backward, _sticky_export from torch.export.graph_signature import OutputKind from torch.testing import FileCheck @@ -32,7 +32,7 @@ def forward(self, x): m = Module() example_inputs = (torch.randn(3),) m(*example_inputs) - ep = torch.export.export_for_training(m, example_inputs, strict=True) + ep = torch.export.export(m, example_inputs, strict=True) joint_ep = _export_forward_backward(ep) self.assertExpectedInline( str(joint_ep.graph_module.code).strip(), @@ -141,7 +141,7 @@ def forward(self, x): m = Module() example_inputs = (torch.randn(3),) m(*example_inputs) - ep = torch.export.export_for_training( + ep = torch.export.export( m, example_inputs, dynamic_shapes={"x": {0: Dim("x0")}}, strict=True ) _export_forward_backward(ep) @@ -177,7 +177,7 @@ def forward(self, x, labels): labels = torch.ones(4, dtype=torch.int64) inputs = (x, labels) - ep = export_for_training(net, inputs, strict=True) + ep = export(net, inputs, strict=True) ep = _export_forward_backward(ep) def test_joint_loss_index(self): @@ -197,7 +197,7 @@ def forward(self, x): inputs = (torch.randn(4, 4),) for i in [0, 1]: - ep = export_for_training(Foo(i), inputs, strict=True) + ep = export(Foo(i), inputs, strict=True) ep_joint = _export_forward_backward(ep, joint_loss_index=i) for j, spec in enumerate(ep_joint.graph_signature.output_specs): if i == j: diff --git a/test/export/test_export.py b/test/export/test_export.py index 4b3f97345d06e..ed19fbe2bec81 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -42,13 +42,7 @@ from torch._higher_order_ops.while_loop import while_loop from torch._inductor.compile_fx import split_const_gm from torch._subclasses import FakeTensorMode -from torch.export import ( - default_decompositions, - Dim, - export, - export_for_training, - unflatten, -) +from torch.export import default_decompositions, Dim, export, unflatten from torch.export._trace import ( _export, _export_to_torch_ir, @@ -1058,7 +1052,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: args = (torch.randn(15, 3, 256, 256), torch.ones(15, 32, 256, 256)) self.assertEqual(exported_program.module()(*args), m(*args)) - gm: torch.fx.GraphModule = torch.export.export_for_training( + gm: torch.fx.GraphModule = torch.export.export( m, args=example_args, dynamic_shapes=dynamic_shapes ).module() @@ -2456,7 +2450,7 @@ def forward(self, x): ref_x = torch.randn(3, 4) ref_out = m(ref_x) - ep_training = torch.export.export_for_training(m, (ref_x,)) + ep_training = torch.export.export(m, (ref_x,)) self.assertExpectedInline( str(ep_training.graph).strip(), """\ @@ -2519,7 +2513,7 @@ def forward(self, x): ref_x = torch.randn(2, 2) ref_out = m(ref_x) - ep_training = torch.export.export_for_training(m, (ref_x,), strict=False) + ep_training = torch.export.export(m, (ref_x,), strict=False) self.assertExpectedInline( str(ep_training.graph).strip(), """\ @@ -2651,7 +2645,7 @@ def forward(self, x): m = Foo() ref_x = torch.randn(3, 4) ref_out = m(ref_x) - ep_training = torch.export.export_for_training(m, (ref_x,), strict=False) + ep_training = torch.export.export(m, (ref_x,), strict=False) self.assertTrue(torch.allclose(ep_training.module()(ref_x), ref_out)) self.assertExpectedInline( str(ep_training.graph).strip(), @@ -2706,7 +2700,7 @@ def forward(self, x): m = Foo() ref_x = torch.randn(3, 4) ref_out = m(ref_x) - ep_training = torch.export.export_for_training(m, (ref_x,), strict=False) + ep_training = torch.export.export(m, (ref_x,), strict=False) self.assertExpectedInline( str(ep_training.graph).strip(), """\ @@ -2746,7 +2740,7 @@ def forward(self, x): m = Foo() ref_x = torch.randn(3, 4) ref_out = m(ref_x) - ep_training = torch.export.export_for_training(m, (ref_x,), strict=False) + ep_training = torch.export.export(m, (ref_x,), strict=False) self.assertExpectedInline( str(ep_training.graph).strip(), """\ @@ -2784,7 +2778,7 @@ def forward(self, x): m = Foo() ref_x = torch.randn(3, 4) ref_out = m(ref_x) - ep_training = torch.export.export_for_training(m, (ref_x,), strict=False) + ep_training = torch.export.export(m, (ref_x,), strict=False) self.assertExpectedInline( str(ep_training.graph).strip(), """\ @@ -2823,7 +2817,7 @@ def forward(self, x): m = Foo() ref_x = torch.randn(3, 4) ref_out = m(ref_x) - ep_training = torch.export.export_for_training(m, (ref_x,), strict=False) + ep_training = torch.export.export(m, (ref_x,), strict=False) self.assertExpectedInline( str(ep_training.graph).strip(), """\ @@ -2863,7 +2857,7 @@ def forward(self, x): m = Foo() ref_x = torch.randn(3, 4) ref_out = m(ref_x) - ep_training = torch.export.export_for_training(m, (ref_x,), strict=False) + ep_training = torch.export.export(m, (ref_x,), strict=False) self.assertExpectedInline( str(ep_training.graph).strip(), """\ @@ -3983,7 +3977,7 @@ def forward(self, x, y): x_linear = self.linear(x_conv) return x_linear.cos() + y_conv_1d.sum() - ep = torch.export.export_for_training( + ep = torch.export.export( Foo(), (torch.randn(20, 16, 50, 100), torch.randn(20, 16, 50)) ) @@ -4251,9 +4245,7 @@ def forward(self, x): return self.linear(x) eager_model = Foo() - ep_for_training = torch.export.export_for_training( - eager_model, (torch.ones(2, 2),) - ) + ep_for_training = torch.export.export(eager_model, (torch.ones(2, 2),)) self.assertExpectedInline( str(ep_for_training.graph_module.code).strip(), """\ @@ -4291,7 +4283,7 @@ def forward(self, x): eager_model_for_export = Foo() eager_model_for_testing = Foo() - ep_for_training = torch.export.export_for_training( + ep_for_training = torch.export.export( eager_model_for_export, (torch.ones(4, 4),) ) self.assertExpectedInline( @@ -4337,7 +4329,7 @@ def forward(self, x): eager_model_for_export_training = Foo() eager_model_for_export_inference = Foo() eager_model_for_testing = Foo() - ep_for_training = torch.export.export_for_training( + ep_for_training = torch.export.export( eager_model_for_export_training, (torch.ones(4, 4),), dynamic_shapes=({0: Dim("x")},), @@ -4391,7 +4383,7 @@ def forward(self, container): return x + y + self.buffer.sum() eager_model = Foo() - ep_for_training = torch.export.export_for_training( + ep_for_training = torch.export.export( eager_model, ([torch.ones(4, 4), torch.ones(4, 4)],), ) @@ -4597,7 +4589,7 @@ def forward(self, x): return self.linear(x) + self.buffer.sum() eager_model = Foo() - ep_for_training = torch.export.export_for_training( + ep_for_training = torch.export.export( eager_model, (torch.ones(2, 2),), ) @@ -7530,7 +7522,7 @@ def forward(self, x): inp = torch.randn(4, 4) - ep = export_for_training( + ep = torch.export.export( Foo(), (inp,), strict=False, preserve_module_call_signature=("bar",) ) unflat = unflatten(ep).bar @@ -7836,7 +7828,7 @@ def forward(self, x): decomp_table = {**default_decompositions(), **decomposition_table} - ep = export_for_training(M(), (torch.randn(2, 2),)).run_decompositions( + ep = torch.export.export(M(), (torch.randn(2, 2),)).run_decompositions( decomp_table ) @@ -7865,7 +7857,7 @@ def forward(self, x): mod.eval() inp = torch.randn(1, 1, 3, 3) - gm = torch.export.export_for_training(mod, (inp,)).module() + gm = torch.export.export(mod, (inp,)).module() self.assertExpectedInline( str(gm.code).strip(), """\ @@ -7885,7 +7877,7 @@ def forward(self, x): ) mod.train() - gm_train = torch.export.export_for_training(mod, (inp,)).module() + gm_train = torch.export.export(mod, (inp,)).module() self.assertExpectedInline( str(gm_train.code).strip(), """\ @@ -8450,7 +8442,7 @@ def forward(self, x, m): ref_x = torch.randn(2, 2) ref_out = f(ref_x, mod) - ep = torch.export.export_for_training(f, (torch.randn(2, 2), mod), strict=False) + ep = torch.export.export(f, (torch.randn(2, 2), mod), strict=False) self.assertEqual(ref_out, ep.module()(ref_x, mod)) def test_unbacked_noncontig_lin(self): @@ -9645,7 +9637,7 @@ def forward(self, x): return m(x) * x inps = (torch.randn(3, 3),) - ep = export_for_training(M2(), inps).run_decompositions({}) + ep = torch.export.export(M2(), inps).run_decompositions({}) self.assertTrue(torch.allclose(ep.module()(*inps), M2()(*inps))) self.assertEqual(len(ep.state_dict), 0) @@ -9682,7 +9674,7 @@ def forward(self, x): inps = (torch.randn(3, 3),) # Strict export segfaults (Issue #128109) - ep = export_for_training(M2(), inps, strict=False).run_decompositions({}) + ep = torch.export.export(M2(), inps, strict=False).run_decompositions({}) self.assertTrue(torch.allclose(ep.module()(*inps), M2()(*inps))) self.assertEqual(len(ep.state_dict), 0) @@ -12013,7 +12005,7 @@ def test(ep): if is_training_ir_test(self._testMethodName): test( - torch.export.export_for_training( + torch.export.export( M(), inp, strict=not is_non_strict_test(self._testMethodName), @@ -12134,7 +12126,7 @@ def test(ep, swap=None): test(export(M(), inp)) strict = not is_non_strict_test(self._testMethodName) - ept = torch.export.export_for_training( + ept = torch.export.export( M(), inp, strict=strict, @@ -12209,7 +12201,7 @@ def forward(self, x): x = torch.zeros((4, 4, 10)) - ep_training = torch.export.export_for_training(model, (x,), strict=False) + ep_training = torch.export.export(model, (x,), strict=False) state_dict_before = ep_training.state_dict ep = export(model, (x,), strict=False).run_decompositions() @@ -12253,7 +12245,7 @@ def forward(self, x): x = torch.zeros((4, 4, 10)) - ep_training = torch.export.export_for_training(model, (x,), strict=False) + ep_training = torch.export.export(model, (x,), strict=False) state_dict_before = ep_training.state_dict ep = export(model, (x,), strict=False).run_decompositions() @@ -12772,7 +12764,7 @@ def true_fn(x, y): model = Model() with torch.no_grad(): - exported_program = torch.export.export_for_training( + exported_program = torch.export.export( model, (torch.tensor(10), torch.tensor(12)), {}, @@ -12868,7 +12860,7 @@ def forward(self, x, y): # no grad model = Model() with torch.no_grad(): - ep_nograd = torch.export.export_for_training( + ep_nograd = torch.export.export( model, (torch.tensor(10), torch.tensor(12)), {}, @@ -12888,7 +12880,7 @@ def forward(self, x, y): # enable grad model = Model() - ep_grad = torch.export.export_for_training( + ep_grad = torch.export.export( model, (torch.tensor(10), torch.tensor(12)), {}, @@ -13011,7 +13003,7 @@ def forward(self, x): "torch.ops.higher_order.wrap_with_set_grad_enabled", ep.graph_module.code, ) - gm = torch.export.export_for_training(model, (torch.randn(4, 4),)).module() + gm = torch.export.export(model, (torch.randn(4, 4),)).module() self.assertIn( "set_grad_enabled", gm.code, @@ -13040,7 +13032,7 @@ def forward(self, x): ) # _export_for_traininig is using pre_dispatch=False # Therefore the autocast calls are not replaced with a hop. - gm = torch.export.export_for_training(model, (torch.randn(4, 4),)).module() + gm = torch.export.export(model, (torch.randn(4, 4),)).module() self.assertIn( "autocast", gm.code, @@ -13287,7 +13279,7 @@ def forward(self, x): inps = (torch.ones(5),) - ep = export_for_training(M(), inps).run_decompositions({}) + ep = torch.export.export(M(), inps).run_decompositions({}) self.assertExpectedInline( str(ep.graph_module.code.strip()), """\ @@ -13608,7 +13600,7 @@ def forward(self, x, y): return y + y_sum + unbacked_shape.sum() inps = (torch.tensor(4), torch.randn(5, 5)) - ep_pre = torch.export.export_for_training(Foo(), inps, strict=False) + ep_pre = torch.export.export(Foo(), inps, strict=False) self.assertExpectedInline( str(ep_pre.graph_module.submod_1.code).strip(), """\ @@ -14298,7 +14290,7 @@ def forward(self, x): return val.b.a mod = Foo() - ep = export_for_training(mod, (torch.randn(4, 4),), strict=False) + ep = torch.export.export(mod, (torch.randn(4, 4),), strict=False) self.assertExpectedInline( str(ep.graph).strip(), """\ @@ -15311,7 +15303,7 @@ def outer_body_fn(x, y): x = torch.randn(2, 4) y = torch.ones(4) - ep_for_training = torch.export.export_for_training(M(), (x, y), strict=strict) + ep_for_training = torch.export.export(M(), (x, y), strict=strict) self.assertExpectedInline( normalize_gm( ep_for_training.graph_module.print_readable(print_output=False) diff --git a/test/export/test_export_training_ir_to_run_decomp.py b/test/export/test_export_training_ir_to_run_decomp.py index 1f5b7c952701a..6781e6d3c7d75 100644 --- a/test/export/test_export_training_ir_to_run_decomp.py +++ b/test/export/test_export_training_ir_to_run_decomp.py @@ -15,14 +15,14 @@ def mocked_training_ir_to_run_decomp_export_strict(*args, **kwargs): if "strict" in kwargs: - ep = torch.export.export_for_training(*args, **kwargs) + ep = torch.export.export(*args, **kwargs) else: - ep = torch.export.export_for_training(*args, **kwargs, strict=True) + ep = torch.export.export(*args, **kwargs, strict=True) return ep.run_decompositions({}) def mocked_training_ir_to_run_decomp_export_non_strict(*args, **kwargs): - ep = torch.export.export_for_training(*args, **kwargs) + ep = torch.export.export(*args, **kwargs) return ep.run_decompositions({}) diff --git a/test/export/test_serialize.py b/test/export/test_serialize.py index f96fc4ade4377..44ecb13ab5724 100644 --- a/test/export/test_serialize.py +++ b/test/export/test_serialize.py @@ -45,7 +45,7 @@ ) from torch._higher_order_ops.torchbind import enable_torchbind_tracing from torch._subclasses.fake_tensor import FakeTensor, FakeTensorMode -from torch.export import Dim, export_for_training, load, save, unflatten +from torch.export import Dim, export, load, save, unflatten from torch.export.pt2_archive.constants import ARCHIVE_VERSION_PATH from torch.fx.experimental.symbolic_shapes import is_concrete_int, ValueRanges from torch.testing._internal.common_utils import ( @@ -115,7 +115,7 @@ def op_schema(cls, op): return torch.ops.aten.add.Tensor._schema inp = (torch.ones(10),) - ep = export_for_training(TestModule(), inp, strict=True) + ep = export(TestModule(), inp, strict=True) # Register the custom op handler. foo_custom_op = FooExtensionOp() @@ -180,9 +180,7 @@ def forward(self, x, y, use_p=False): model = MyModule().eval() random_inputs = (torch.rand([2, 3]), torch.rand([2, 3])) - exp_program = export_for_training( - model, random_inputs, {"use_p": True}, strict=True - ) + exp_program = export(model, random_inputs, {"use_p": True}, strict=True) output_buffer = io.BytesIO() # Tests that example inputs are preserved when saving and loading module. @@ -201,7 +199,7 @@ class M(torch.nn.Module): def forward(self, x): return x.sin() - exp_program = export_for_training(M(), (torch.randn(4, 4),), strict=True) + exp_program = export(M(), (torch.randn(4, 4),), strict=True) output_buffer = io.BytesIO() # Tests that example forward arg names are preserved when saving and loading module. @@ -241,7 +239,7 @@ def forward(self, x): inp = (torch.ones(10),) # Module will only be able to roundtrip if metadata # can be correctly parsed. - ep = export_for_training(MyModule(), inp, strict=True) + ep = export(MyModule(), inp, strict=True) buffer = io.BytesIO() save(ep, buffer) loaded_ep = load(buffer) @@ -282,7 +280,7 @@ def forward(self, x): return h + out_c inp = (torch.ones(10),) - ep = export_for_training(Foo(), inp, strict=True) + ep = export(Foo(), inp, strict=True) buffer = io.BytesIO() save(ep, buffer) loaded_ep = load(buffer) @@ -324,7 +322,7 @@ def forward(self, x): # Check that module can be roundtripped, thereby confirming proper deserialization. inp = (torch.ones(10),) - ep = export_for_training(MyModule(), inp, strict=True) + ep = export(MyModule(), inp, strict=True) buffer = io.BytesIO() save(ep, buffer) loaded_ep = load(buffer) @@ -347,7 +345,7 @@ def forward(self, x, w, b): eps=1e-5, ) - exported_module = export_for_training( + exported_module = export( MyModule(), ( torch.ones([512, 512], requires_grad=True), @@ -391,7 +389,7 @@ def forward(self, a, b, c) -> torch.Tensor: "b": {1: dim1_bc}, "c": {0: dim0_ac, 1: dim1_bc}, } - exported_module = export_for_training( + exported_module = export( DynamicShapeSimpleModel(), inputs, dynamic_shapes=dynamic_shapes, @@ -455,7 +453,7 @@ def forward(self, a, b, c) -> torch.Tensor: "b": {1: dim1_bc}, "c": {0: dim0_ac, 1: dim1_bc}, } - exported_module = export_for_training( + exported_module = export( DynamicShapeSimpleModel(), inputs, dynamic_shapes=dynamic_shapes, @@ -485,9 +483,7 @@ def forward(self, x): return torch.split(x, 2) input = torch.arange(10.0).reshape(5, 2) - exported_module = export_for_training( - MyModule(), (input,), strict=True - ).run_decompositions() + exported_module = export(MyModule(), (input,), strict=True).run_decompositions() serialized = ExportedProgramSerializer().serialize(exported_module) node = serialized.exported_program.graph_module.graph.nodes[-1] @@ -550,7 +546,7 @@ def __init__(self) -> None: def forward(self, x): return torch.ops.aten.var_mean.correction(x, [1])[0] - exported_module = export_for_training( + exported_module = export( MyModule(), (torch.ones([512, 512], requires_grad=True),), strict=True ).run_decompositions() @@ -571,7 +567,7 @@ class M(torch.nn.Module): def forward(self, x): return x + x - ep = export_for_training( + ep = export( M(), (torch.randn(4),), dynamic_shapes=({0: Dim("temp")},), strict=True ) @@ -720,7 +716,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: f = Foo() x, _ = torch.sort(torch.randn(3, 4)) - exported_module = export_for_training(f, (x,), strict=True).run_decompositions() + exported_module = export(f, (x,), strict=True).run_decompositions() serialized = ExportedProgramSerializer().serialize(exported_module) node = serialized.exported_program.graph_module.graph.nodes[-1] @@ -738,9 +734,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: b = x + y return b + a - ep = export_for_training( - Module(), (torch.randn(3, 2), torch.randn(3, 2)), strict=True - ) + ep = export(Module(), (torch.randn(3, 2), torch.randn(3, 2)), strict=True) s = ExportedProgramSerializer().serialize(ep) c = canonicalize(s.exported_program) g = c.graph_module.graph @@ -754,7 +748,7 @@ class M(torch.nn.Module): def forward(self, x): return torch.ops.aten.sum.dim_IntList(x, []) - ep = torch.export.export_for_training(M(), (torch.randn(3, 2),), strict=True) + ep = torch.export.export(M(), (torch.randn(3, 2),), strict=True) serialized = ExportedProgramSerializer().serialize(ep) for node in serialized.exported_program.graph_module.graph.nodes: if "aten.sum.dim_IntList" in node.target: @@ -1024,7 +1018,7 @@ def _deepcopy_inputs(inputs): def _check_graph(pre_dispatch): if pre_dispatch: - ep = torch.export.export_for_training( + ep = torch.export.export( fn, _deepcopy_inputs(inputs), {}, @@ -1574,7 +1568,7 @@ def forward(self, x): a = a * 2 return a, b - ep = torch.export.export_for_training(M(), (torch.ones(3),), strict=True) + ep = torch.export.export(M(), (torch.ones(3),), strict=True) # insert another getitem node for node in ep.graph.nodes: @@ -1720,7 +1714,7 @@ def __init__(self) -> None: def forward(self): return self.p * self.p - ep = torch.export.export_for_training(M(), (), strict=True) + ep = torch.export.export(M(), (), strict=True) ep._example_inputs = None roundtrip_ep = deserialize(serialize(ep)) self.assertTrue(torch.allclose(ep.module()(), roundtrip_ep.module()())) @@ -1762,7 +1756,7 @@ def forward(self, x): return x + x f = Module() - ep = export_for_training(f, (torch.randn(1, 3),), strict=True) + ep = export(f, (torch.randn(1, 3),), strict=True) serialized_program = ExportedProgramSerializer().serialize(ep) serialized_program.exported_program.schema_version.major = -1 @@ -1798,7 +1792,7 @@ def forward(self, x): y = self.linear(y) return y - ep = export_for_training(Module(), inp, strict=True) + ep = export(Module(), inp, strict=True) buffer = io.BytesIO() save(ep, buffer) @@ -1816,7 +1810,7 @@ def forward(self, x): f = Foo() inp = (torch.randn(2, 2),) - ep = export_for_training(f, inp, strict=True) + ep = export(f, inp, strict=True) with tempfile.NamedTemporaryFile(suffix=".pt2") as f: save(ep, f.name) @@ -1833,7 +1827,7 @@ def forward(self, x, y): f = Foo() inp = (torch.tensor([6]), torch.tensor([7])) - ep = export_for_training(f, inp, strict=True) + ep = export(f, inp, strict=True) with TemporaryFileName(suffix=".pt2") as fname: path = Path(fname) @@ -1851,7 +1845,7 @@ def forward(self, x): f = Foo() - ep = export_for_training(f, inp, strict=True) + ep = export(f, inp, strict=True) buffer = io.BytesIO() save(ep, buffer, extra_files={"extra.txt": "moo"}) @@ -1872,7 +1866,7 @@ def forward(self, x): f = Foo() - ep = export_for_training(f, (torch.randn(1, 3),), strict=True) + ep = export(f, (torch.randn(1, 3),), strict=True) with self.assertRaisesRegex( ValueError, r"Saved archive version -1 does not match our current" @@ -1908,7 +1902,7 @@ def forward(self, x): list_tensor = [torch.tensor(3), torch.tensor(4)] return x + self.a + list_tensor[0] + list_tensor[1] - ep = export_for_training(Foo(), (torch.tensor(1),), strict=True) + ep = export(Foo(), (torch.tensor(1),), strict=True) buffer = io.BytesIO() save(ep, buffer) buffer.seek(0) @@ -1934,7 +1928,7 @@ def forward(self, x): f = Foo() inputs = (torch.zeros(4, 4),) - ep = export_for_training(f, inputs, strict=True) + ep = export(f, inputs, strict=True) # Replace one of the values with an instance of our custom class for node in ep.graph.nodes: @@ -1988,7 +1982,7 @@ def forward(self, x): inputs = (torch.zeros(2, 3),) with enable_torchbind_tracing(): - ep = export_for_training(f, inputs, strict=False) + ep = export(f, inputs, strict=False) serialized_vals = serialize(ep) ep = deserialize(serialized_vals) @@ -2008,7 +2002,7 @@ def forward(self, x): inputs = (torch.zeros(2, 3),) with enable_torchbind_tracing(): - ep = export_for_training(f, inputs, strict=False) + ep = export(f, inputs, strict=False) serialized_vals = serialize(ep) ep = deserialize(serialized_vals) @@ -2043,7 +2037,7 @@ def forward(self, x): f = Foo() inputs = (torch.zeros(4, 4),) - ep = export_for_training(f, inputs, strict=True) + ep = export(f, inputs, strict=True) new_gm = copy.deepcopy(ep.graph_module) new_gm.meta["custom"] = {} @@ -2078,7 +2072,7 @@ def forward(self, x): f = Foo() inputs = (torch.ones(2, 2),) - ep = export_for_training(f, inputs, strict=True) + ep = export(f, inputs, strict=True) new_gm = copy.deepcopy(ep.graph_module) new_gm.meta["custom"] = {} @@ -2114,7 +2108,7 @@ def forward(self, x): f = Foo() inputs = (torch.zeros(4, 4),) - ep = export_for_training(f, inputs, strict=True) + ep = export(f, inputs, strict=True) new_gm = copy.deepcopy(ep.graph_module) new_gm.meta["custom"] = {} diff --git a/test/export/test_torchbind.py b/test/export/test_torchbind.py index f45775f09f29a..3e8d8e35ada1a 100644 --- a/test/export/test_torchbind.py +++ b/test/export/test_torchbind.py @@ -138,7 +138,7 @@ def _test_export_same_as_eager( def export_wrapper(f, args, kwargs, strict, pre_dispatch): with enable_torchbind_tracing(): if pre_dispatch: - exported_program = torch.export.export_for_training( + exported_program = torch.export.export( f, args, kwargs, strict=strict ).run_decompositions({}) else: @@ -755,7 +755,7 @@ def forward(self, tq, x): b = torch.randn(2, 2) tq.push(a) tq.push(b) - ep = torch.export.export_for_training( + ep = torch.export.export( mod, (tq, torch.randn(2, 2)), strict=False ).run_decompositions({}) self.assertExpectedInline( @@ -809,9 +809,9 @@ def forward(self, L_safe_obj_ : torch.ScriptObject): ) with enable_torchbind_tracing(): - ep = torch.export.export_for_training( - mod, (safe_obj,), strict=False - ).run_decompositions({}) + ep = torch.export.export(mod, (safe_obj,), strict=False).run_decompositions( + {} + ) self.assertExpectedInline( ep.graph_module.code.strip(), """\ @@ -1407,9 +1407,9 @@ def forward(self, obj, x): x = torch.randn(3, 1) eager_out = mod(test_obj, x) compiled_out = torch.compile(mod, backend=backend, fullgraph=True)(test_obj, x) - ep = torch.export.export_for_training( - mod, (test_obj, x), strict=False - ).run_decompositions({}) + ep = torch.export.export(mod, (test_obj, x), strict=False).run_decompositions( + {} + ) self.assertExpectedInline( ep.graph_module.code.strip(), """\ diff --git a/test/export/test_unflatten_training_ir.py b/test/export/test_unflatten_training_ir.py index 6816787eff224..677b427f37545 100644 --- a/test/export/test_unflatten_training_ir.py +++ b/test/export/test_unflatten_training_ir.py @@ -7,14 +7,14 @@ import test_unflatten # @manual=fbcode//caffe2/test:test_export-library import testing # @manual=fbcode//caffe2/test:test_export-library -from torch.export import export_for_training +from torch.export import export test_classes = {} def mocked_training_ir_export(*args, **kwargs): - return export_for_training(*args, **kwargs, strict=True) + return export(*args, **kwargs, strict=True) def make_dynamic_cls(cls): diff --git a/test/export/test_verifier.py b/test/export/test_verifier.py index 5d3cfd5646377..f6e49791edf84 100644 --- a/test/export/test_verifier.py +++ b/test/export/test_verifier.py @@ -6,7 +6,7 @@ from torch import Tensor from torch._dynamo.eval_frame import is_dynamo_supported from torch._export.verifier import SpecViolationError, Verifier -from torch.export import export_for_training +from torch.export import export from torch.export.exported_program import InputKind, InputSpec, TensorArgument from torch.testing._internal.common_utils import IS_WINDOWS, run_tests, TestCase @@ -20,7 +20,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: f = Foo() - ep = export_for_training(f, (torch.randn(100), torch.randn(100)), strict=True) + ep = export(f, (torch.randn(100), torch.randn(100)), strict=True) verifier = Verifier() verifier.check(ep) @@ -47,7 +47,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: f = Foo() - ep = export_for_training( + ep = export( f, (torch.randn(100), torch.randn(100)), strict=True ).run_decompositions({}) for node in ep.graph.nodes: @@ -72,7 +72,7 @@ def false_fn(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: f = Foo() - ep = export_for_training(f, (torch.randn(3, 3), torch.randn(3, 3)), strict=True) + ep = export(f, (torch.randn(3, 3), torch.randn(3, 3)), strict=True) verifier = Verifier() verifier.check(ep) @@ -91,7 +91,7 @@ def false_fn(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: f = Foo() - ep = export_for_training( + ep = export( f, (torch.randn(3, 3), torch.randn(3, 3)), strict=True ).run_decompositions({}) for node in ep.graph_module.true_graph_0.graph.nodes: @@ -111,7 +111,7 @@ def __init__(self) -> None: def forward(self, x: Tensor) -> Tensor: return self.linear(x) - ep = export_for_training(M(), (torch.randn(10, 10),), strict=True) + ep = export(M(), (torch.randn(10, 10),), strict=True) ep.validate() def test_ep_verifier_invalid_param(self) -> None: @@ -125,7 +125,7 @@ def __init__(self) -> None: def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: return x + y + self.a - ep = export_for_training(M(), (torch.randn(100), torch.randn(100)), strict=True) + ep = export(M(), (torch.randn(100), torch.randn(100)), strict=True) # Parameter doesn't exist in the state dict ep.graph_signature.input_specs[0] = InputSpec( @@ -150,7 +150,7 @@ def __init__(self) -> None: def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: return x + y + self.a - ep = export_for_training(M(), (torch.randn(100), torch.randn(100)), strict=True) + ep = export(M(), (torch.randn(100), torch.randn(100)), strict=True) # Buffer doesn't exist in the state dict ep.graph_signature.input_specs[0] = InputSpec( @@ -182,9 +182,7 @@ def forward(self, x1, x2): self.my_buffer2.add_(1.0) return output - ep = export_for_training( - M(), (torch.tensor(5.0), torch.tensor(6.0)), strict=True - ) + ep = export(M(), (torch.tensor(5.0), torch.tensor(6.0)), strict=True) ep.validate() def test_ep_verifier_invalid_output(self) -> None: @@ -207,9 +205,7 @@ def forward(self, x1, x2): self.my_buffer2.add_(1.0) return output - ep = export_for_training( - M(), (torch.tensor(5.0), torch.tensor(6.0)), strict=True - ) + ep = export(M(), (torch.tensor(5.0), torch.tensor(6.0)), strict=True) output_node = list(ep.graph.nodes)[-1] output_node.args = ( diff --git a/test/fx/test_matcher_utils.py b/test/fx/test_matcher_utils.py index 604de73fcd880..d046fccf1f50e 100644 --- a/test/fx/test_matcher_utils.py +++ b/test/fx/test_matcher_utils.py @@ -6,7 +6,7 @@ import torch import torch.nn.functional as F -from torch.export import export_for_training +from torch.export import export from torch.fx import symbolic_trace from torch.fx.experimental.proxy_tensor import make_fx @@ -172,7 +172,7 @@ def pattern(x, weight): torch.randn(1, 3, 3, 3) * 10, torch.randn(3, 3, 3, 3), ) - pattern_gm = export_for_training( + pattern_gm = export( WrapperModule(pattern), example_inputs, strict=True ).module() before_split_res = pattern_gm(*example_inputs) @@ -203,11 +203,11 @@ def pattern(x, weight): torch.randn(1, 3, 3, 3) * 10, torch.randn(3, 3, 3, 3), ) - pattern_gm = export_for_training( + pattern_gm = export( WrapperModule(pattern), example_inputs, strict=True ).module() matcher = SubgraphMatcherWithNameNodeMap(pattern_gm) - target_gm = export_for_training( + target_gm = export( WrapperModule(target_graph), example_inputs, strict=True ).module() internal_matches = matcher.match(target_gm.graph) @@ -248,11 +248,9 @@ def forward(self, x): return linear, {"linear": linear, "x": x} example_inputs = (torch.randn(3, 5),) - pattern_gm = export_for_training( - Pattern(), example_inputs, strict=True - ).module() + pattern_gm = export(Pattern(), example_inputs, strict=True).module() matcher = SubgraphMatcherWithNameNodeMap(pattern_gm) - target_gm = export_for_training(M(), example_inputs, strict=True).module() + target_gm = export(M(), example_inputs, strict=True).module() internal_matches = matcher.match(target_gm.graph) for internal_match in internal_matches: name_node_map = internal_match.name_node_map diff --git a/test/inductor/test_aot_inductor.py b/test/inductor/test_aot_inductor.py index 917a914a5359e..3fcf4332c2257 100644 --- a/test/inductor/test_aot_inductor.py +++ b/test/inductor/test_aot_inductor.py @@ -34,7 +34,7 @@ from torch._utils_internal import full_aoti_runtime_assert from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer -from torch.export import Dim, export, export_for_training +from torch.export import Dim, export from torch.export.pt2_archive._package import load_pt2 from torch.testing import FileCheck from torch.testing._internal import common_utils @@ -2525,9 +2525,7 @@ def forward(self, x): config.patch({"freezing": True, "aot_inductor.force_mmap_weights": True}), torch.no_grad(), ): - exported_model = export_for_training( - model, example_inputs, strict=True - ).module() + exported_model = export(model, example_inputs, strict=True).module() quantizer = X86InductorQuantizer() quantizer.set_global( xiq.get_default_x86_inductor_quantization_config(reduce_range=True) diff --git a/test/quantization/pt2e/test_duplicate_dq.py b/test/quantization/pt2e/test_duplicate_dq.py index b830763d4ba86..a4f75588f48c2 100644 --- a/test/quantization/pt2e/test_duplicate_dq.py +++ b/test/quantization/pt2e/test_duplicate_dq.py @@ -24,7 +24,7 @@ OP_TO_ANNOTATOR, QuantizationConfig, ) -from torch.export import export_for_training +from torch.export import export from torch.testing._internal.common_quantization import QuantizationTestCase from torch.testing._internal.common_utils import IS_WINDOWS, raise_on_run_directly @@ -101,7 +101,7 @@ def _test_duplicate_dq( # program capture m = copy.deepcopy(m_eager) - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) # Calibrate diff --git a/test/quantization/pt2e/test_metadata_porting.py b/test/quantization/pt2e/test_metadata_porting.py index fe9e1b295561b..88e1c9cad7cba 100644 --- a/test/quantization/pt2e/test_metadata_porting.py +++ b/test/quantization/pt2e/test_metadata_porting.py @@ -102,7 +102,7 @@ def _test_metadata_porting( # program capture m = copy.deepcopy(m_eager) - m = torch.export.export_for_training(m, example_inputs, strict=True).module() + m = torch.export.export(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) # Calibrate diff --git a/test/quantization/pt2e/test_numeric_debugger.py b/test/quantization/pt2e/test_numeric_debugger.py index 53c7939411631..510dfabfbcc17 100644 --- a/test/quantization/pt2e/test_numeric_debugger.py +++ b/test/quantization/pt2e/test_numeric_debugger.py @@ -19,7 +19,7 @@ get_symmetric_quantization_config, XNNPACKQuantizer, ) -from torch.export import export_for_training +from torch.export import export from torch.testing._internal.common_quantization import TestHelperModules from torch.testing._internal.common_utils import ( IS_WINDOWS, @@ -86,7 +86,7 @@ def _extract_debug_handles_with_prev_decomp_op_from_node(node): def test_simple(self): m = TestHelperModules.Conv2dThenConv1d() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs, strict=True) + ep = export(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) self._assert_each_node_has_debug_handle(ep) debug_handle_map = self._extract_debug_handles(ep) @@ -96,7 +96,7 @@ def test_simple(self): def test_control_flow(self): m = TestHelperModules.ControlFlow() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs, strict=True) + ep = export(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) self._assert_each_node_has_debug_handle(ep) @@ -107,7 +107,7 @@ def test_control_flow(self): def test_quantize_pt2e_preserve_handle(self): m = TestHelperModules.Conv2dThenConv1d() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs, strict=True) + ep = export(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) m = ep.module() @@ -167,14 +167,14 @@ def test_deepcopy_preserve_handle(self): def test_re_export_preserve_handle(self): m = TestHelperModules.Conv2dThenConv1d() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs, strict=True) + ep = export(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) m = ep.module() self._assert_each_node_has_debug_handle(ep) debug_handle_map_ref = self._extract_debug_handles(ep) - ep_reexport = export_for_training(m, example_inputs, strict=True) + ep_reexport = export(m, example_inputs, strict=True) self._assert_each_node_has_debug_handle(ep_reexport) debug_handle_map = self._extract_debug_handles(ep_reexport) @@ -184,7 +184,7 @@ def test_re_export_preserve_handle(self): def test_run_decompositions_same_handle_id(self): m = TestHelperModules.Conv2dThenConv1d() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs, strict=True) + ep = export(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) self._assert_each_node_has_debug_handle(ep) @@ -209,7 +209,7 @@ def test_run_decompositions_map_handle_to_new_nodes(self): for m in test_models: example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs, strict=True) + ep = export(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) self._assert_each_node_has_debug_handle(ep) @@ -232,7 +232,7 @@ def test_run_decompositions_map_handle_to_new_nodes(self): def test_prepare_for_propagation_comparison(self): m = TestHelperModules.Conv2dThenConv1d() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs, strict=True) + ep = export(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) m = ep.module() m_logger = prepare_for_propagation_comparison(m) @@ -249,7 +249,7 @@ def test_prepare_for_propagation_comparison(self): def test_extract_results_from_loggers(self): m = TestHelperModules.Conv2dThenConv1d() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs, strict=True) + ep = export(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) m = ep.module() m_ref_logger = prepare_for_propagation_comparison(m) @@ -274,7 +274,7 @@ def test_extract_results_from_loggers(self): def test_extract_results_from_loggers_list_output(self): m = TestHelperModules.Conv2dWithSplit() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs, strict=True) + ep = export(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) m = ep.module() m_ref_logger = prepare_for_propagation_comparison(m) @@ -304,7 +304,7 @@ def test_extract_results_from_loggers_list_output(self): def test_added_node_gets_unique_id(self) -> None: m = TestHelperModules.Conv2dThenConv1d() example_inputs = m.example_inputs() - ep = export_for_training(m, example_inputs, strict=True) + ep = export(m, example_inputs, strict=True) generate_numeric_debug_handle(ep) ref_handles = self._extract_debug_handles(ep) ref_counter = Counter(ref_handles.values()) diff --git a/test/quantization/pt2e/test_quantize_pt2e.py b/test/quantization/pt2e/test_quantize_pt2e.py index f9780dbf7b3df..8c539224fce4e 100644 --- a/test/quantization/pt2e/test_quantize_pt2e.py +++ b/test/quantization/pt2e/test_quantize_pt2e.py @@ -39,7 +39,7 @@ OP_TO_ANNOTATOR, QuantizationConfig, ) -from torch.export import export_for_training +from torch.export import export from torch.fx import Node from torch.testing._internal.common_quantization import ( NodeSpec as ns, @@ -767,7 +767,7 @@ def validate(self, model: torch.fx.GraphModule) -> None: example_inputs = (torch.randn(1, 3, 5, 5), torch.randn(1, 3, 5, 5)) # program capture - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_pt2e(m, BackendAQuantizer()) # make sure the two observers for input are shared conv_output_obs = [] @@ -827,7 +827,7 @@ def _test_transitive_sharing_with_cat_helper(self, quantizer): ) # program capture - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) m(*example_inputs) # make sure the two input observers and output are shared @@ -1146,7 +1146,7 @@ def validate(self, model: torch.fx.GraphModule) -> None: ) # program capture - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() quantizer = BackendAQuantizer() m = prepare_pt2e(m, quantizer) m(*example_inputs) @@ -1296,7 +1296,7 @@ def validate(self, model: torch.fx.GraphModule) -> None: m = M().eval() example_inputs = torch.randn(1, 2, 3, 3) - m = export_for_training(m, (example_inputs,), strict=True).module() + m = export(m, (example_inputs,), strict=True).module() with self.assertRaises(Exception): m = prepare_pt2e(m, BackendAQuantizer()) @@ -1419,7 +1419,7 @@ def forward(self, x): quantizer.set_global(operator_config) example_inputs = (torch.randn(2, 2),) m = M().eval() - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() weight_meta = None for n in m.graph.nodes: if ( @@ -1506,7 +1506,7 @@ def forward(self, x): m = M().eval() quantizer = TestQuantizer() example_inputs = (torch.randn(1, 2, 3, 3),) - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) m(*example_inputs) node_occurrence = { @@ -1557,7 +1557,7 @@ def forward(self, x, y, z): torch.randn(1, 2, 3, 3), torch.randn(1, 2, 3, 3), ) - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) m(*example_inputs) node_occurrence = { @@ -1812,7 +1812,7 @@ def forward(self, x): example_inputs = (torch.randn(1),) m = M().train() - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() if inplace: target = torch.ops.aten.dropout_.default else: @@ -1877,7 +1877,7 @@ def forward(self, x): m = M().train() example_inputs = (torch.randn(1, 3, 3, 3),) bn_train_op, bn_eval_op = self._get_bn_train_eval_ops() - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() # Assert that batch norm op exists and is in train mode bn_node = self._get_node(m, bn_train_op) @@ -1908,7 +1908,7 @@ def test_disallow_eval_train(self): m.train() # After export: this is not OK - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() with self.assertRaises(NotImplementedError): m.eval() with self.assertRaises(NotImplementedError): @@ -1949,7 +1949,7 @@ def forward(self, x): m = M().train() example_inputs = (torch.randn(1, 3, 3, 3),) bn_train_op, bn_eval_op = self._get_bn_train_eval_ops() - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() def _assert_ops_are_correct(m: torch.fx.GraphModule, train: bool): targets = [n.target for n in m.graph.nodes] @@ -2015,7 +2015,7 @@ def forward(self, x): m = M().train() example_inputs = (torch.randn(1, 3, 3, 3),) - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() torch.ao.quantization.allow_exported_model_train_eval(m) # Mock m.recompile() to count how many times it's been called @@ -2047,7 +2047,7 @@ def _fake_recompile(): def test_model_is_exported(self): m = TestHelperModules.ConvWithBNRelu(relu=True) example_inputs = (torch.rand(3, 3, 5, 5),) - exported_gm = export_for_training(m, example_inputs, strict=True).module() + exported_gm = export(m, example_inputs, strict=True).module() fx_traced_gm = torch.fx.symbolic_trace(m, example_inputs) self.assertTrue( torch.ao.quantization.pt2e.export_utils.model_is_exported(exported_gm) @@ -2065,9 +2065,7 @@ def test_reentrant(self): quantizer = XNNPACKQuantizer().set_global( get_symmetric_quantization_config(is_per_channel=True, is_qat=True) ) - m.conv_bn_relu = export_for_training( - m.conv_bn_relu, example_inputs, strict=True - ).module() + m.conv_bn_relu = export(m.conv_bn_relu, example_inputs, strict=True).module() m.conv_bn_relu = prepare_qat_pt2e(m.conv_bn_relu, quantizer) m(*example_inputs) m.conv_bn_relu = convert_pt2e(m.conv_bn_relu) @@ -2075,7 +2073,7 @@ def test_reentrant(self): quantizer = XNNPACKQuantizer().set_module_type( torch.nn.Linear, get_symmetric_quantization_config(is_per_channel=False) ) - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) m = convert_pt2e(m) @@ -2247,7 +2245,7 @@ def test_speed(self): def dynamic_quantize_pt2e(model, example_inputs): torch._dynamo.reset() - model = export_for_training(model, example_inputs, strict=True).module() + model = export(model, example_inputs, strict=True).module() # Per channel quantization for weight # Dynamic quantization for activation # Please read a detail: https://fburl.com/code/30zds51q @@ -2462,7 +2460,7 @@ def forward(self, x): example_inputs = (torch.randn(1, 3, 5, 5),) m = M() - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() quantizer = XNNPACKQuantizer().set_global( get_symmetric_quantization_config(), ) @@ -2544,7 +2542,7 @@ def prepare_obs_or_fq_callback( edge_or_node_to_obs_or_fq[x] = new_observer example_inputs = (torch.rand(1, 32, 16, 16),) - gm = export_for_training(Model().eval(), example_inputs, strict=True).module() + gm = export(Model().eval(), example_inputs, strict=True).module() gm = prepare_pt2e(gm, BackendAQuantizer()) gm = convert_pt2e(gm) for n in gm.graph.nodes: @@ -2571,9 +2569,7 @@ def check_nn_module(node): "ConvWithBNRelu" in node.meta["nn_module_stack"]["L__self__"][1] ) - m.conv_bn_relu = export_for_training( - m.conv_bn_relu, example_inputs, strict=True - ).module() + m.conv_bn_relu = export(m.conv_bn_relu, example_inputs, strict=True).module() for node in m.conv_bn_relu.graph.nodes: if node.op not in ["placeholder", "output", "get_attr"]: check_nn_module(node) diff --git a/test/quantization/pt2e/test_quantize_pt2e_qat.py b/test/quantization/pt2e/test_quantize_pt2e_qat.py index 98682dc14e079..ca80439bbf34c 100644 --- a/test/quantization/pt2e/test_quantize_pt2e_qat.py +++ b/test/quantization/pt2e/test_quantize_pt2e_qat.py @@ -34,7 +34,7 @@ get_symmetric_quantization_config, XNNPACKQuantizer, ) -from torch.export import export_for_training +from torch.export import export from torch.testing._internal.common_cuda import TEST_CUDA from torch.testing._internal.common_quantization import ( NodeSpec as ns, @@ -140,9 +140,7 @@ def _verify_symmetric_xnnpack_qat_numerics_helper( is_per_channel=is_per_channel, is_qat=True ) ) - model_pt2e = export_for_training( - model_pt2e, example_inputs, strict=True - ).module() + model_pt2e = export(model_pt2e, example_inputs, strict=True).module() model_pt2e = prepare_qat_pt2e(model_pt2e, quantizer) torch.manual_seed(MANUAL_SEED) after_prepare_result_pt2e = model_pt2e(*example_inputs) @@ -229,7 +227,7 @@ def _verify_symmetric_xnnpack_qat_graph_helper( quantizer.set_global( get_symmetric_quantization_config(is_per_channel, is_qat=True) ) - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_qat_pt2e(m, quantizer) m(*example_inputs) @@ -618,7 +616,7 @@ def forward(self, x): m = M(self.conv_class, self.bn_class, backbone) quantizer = XNNPACKQuantizer() quantizer.set_global(get_symmetric_quantization_config(is_qat=True)) - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_qat_pt2e(m, quantizer) m(*example_inputs) m = convert_pt2e(m) @@ -676,7 +674,7 @@ def get_source_fn(node: torch.fx.Node): def test_qat_conv_bn_bias_derived_qspec(self): m = self._get_conv_bn_model() example_inputs = self.example_inputs - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() quantizer = ConvBnDerivedBiasQuantizer() m = prepare_qat_pt2e(m, quantizer) m(*example_inputs) @@ -723,7 +721,7 @@ def test_qat_conv_bn_bias_derived_qspec(self): def test_qat_per_channel_weight_custom_dtype(self): m = self._get_conv_bn_model() example_inputs = self.example_inputs - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() quantizer = ConvBnInt32WeightQuantizer() m = prepare_qat_pt2e(m, quantizer) m(*example_inputs) @@ -777,7 +775,7 @@ def test_qat_conv_transpose_bn_relu(self): def test_qat_conv_bn_per_channel_weight_bias(self): m = self._get_conv_bn_model() example_inputs = self.example_inputs - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() quantizer = ConvBnDerivedBiasQuantizer(is_per_channel=True) m = prepare_qat_pt2e(m, quantizer) m(*example_inputs) @@ -834,7 +832,7 @@ def test_fold_bn_erases_bn_node(self): it into conv in `convert_pt2e` even in train mode. """ m = self._get_conv_bn_model(has_conv_bias=False, has_bn=True, has_relu=False) - m = export_for_training(m, self.example_inputs, strict=True).module() + m = export(m, self.example_inputs, strict=True).module() quantizer = XNNPACKQuantizer() quantizer.set_global( get_symmetric_quantization_config(is_per_channel=False, is_qat=True), @@ -850,7 +848,7 @@ def test_fold_bn_erases_add_node(self): Test that batch norm stat tracking (which results in an add_ tensor) is removed when folding batch norm. """ m = self._get_conv_bn_model(has_conv_bias=False, has_bn=True, has_relu=False) - m = export_for_training(m, self.example_inputs, strict=True).module() + m = export(m, self.example_inputs, strict=True).module() def _has_add_(graph): for node in graph.nodes: @@ -1115,9 +1113,7 @@ def _prepare_qat_linears(self, model): in_channels = child.linear1.weight.size(1) example_input = (torch.rand((1, in_channels)),) - traced_child = export_for_training( - child, example_input, strict=True - ).module() + traced_child = export(child, example_input, strict=True).module() quantizer = XNNPACKQuantizer() quantization_config = get_symmetric_quantization_config( is_per_channel=True, is_qat=True @@ -1148,7 +1144,7 @@ def test_mixing_qat_ptq(self): self._convert_qat_linears(model) model(*example_inputs) - model_pt2e = export_for_training(model, example_inputs, strict=True).module() + model_pt2e = export(model, example_inputs, strict=True).module() quantizer = XNNPACKQuantizer() quantizer.set_module_type(torch.nn.Linear, None) diff --git a/test/quantization/pt2e/test_representation.py b/test/quantization/pt2e/test_representation.py index 5c5a7cce505b6..1c97dd6a73862 100644 --- a/test/quantization/pt2e/test_representation.py +++ b/test/quantization/pt2e/test_representation.py @@ -10,7 +10,7 @@ get_symmetric_quantization_config, XNNPACKQuantizer, ) -from torch.export import export_for_training +from torch.export import export from torch.testing._internal.common_quantization import ( NodeSpec as ns, QuantizationTestCase, @@ -34,7 +34,7 @@ def _test_representation( ) -> torch.nn.Module: # resetting dynamo cache torch._dynamo.reset() - model = export_for_training(model, example_inputs, strict=True).module() + model = export(model, example_inputs, strict=True).module() model_copy = copy.deepcopy(model) model = prepare_pt2e(model, quantizer) diff --git a/test/quantization/pt2e/test_x86inductor_quantizer.py b/test/quantization/pt2e/test_x86inductor_quantizer.py index 1f1020b9bd41c..6c83ab1a869ed 100644 --- a/test/quantization/pt2e/test_x86inductor_quantizer.py +++ b/test/quantization/pt2e/test_x86inductor_quantizer.py @@ -17,7 +17,7 @@ QUANT_ANNOTATION_KEY, X86InductorQuantizer, ) -from torch.export import export_for_training +from torch.export import export from torch.testing._internal.common_quantization import ( NodeSpec as ns, QuantizationTestCase, @@ -668,7 +668,7 @@ def _test_quantizer( # program capture m = copy.deepcopy(m_eager) - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() # QAT Model failed to deepcopy export_model = m if is_qat else copy.deepcopy(m) @@ -2344,7 +2344,7 @@ def forward(self, x): ) example_inputs = (torch.randn(2, 2),) m = M().eval() - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) # Use a linear count instead of names because the names might change, but # the order should be the same. diff --git a/test/quantization/pt2e/test_xnnpack_quantizer.py b/test/quantization/pt2e/test_xnnpack_quantizer.py index 37bac5c8f51f9..3baec3f8004b1 100644 --- a/test/quantization/pt2e/test_xnnpack_quantizer.py +++ b/test/quantization/pt2e/test_xnnpack_quantizer.py @@ -29,7 +29,7 @@ get_symmetric_quantization_config, XNNPACKQuantizer, ) -from torch.export import export_for_training +from torch.export import export from torch.testing._internal.common_quantization import ( NodeSpec as ns, PT2EQuantizationTestCase, @@ -362,7 +362,7 @@ def forward(self, x): ) example_inputs = (torch.randn(2, 2),) m = M().eval() - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) # Use a linear count instead of names because the names might change, but # the order should be the same. @@ -498,7 +498,7 @@ def test_propagate_annotation(self): example_inputs = (torch.randn(1, 3, 5, 5),) # program capture - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() m = prepare_pt2e(m, quantizer) m(*example_inputs) @@ -763,9 +763,7 @@ def forward(self, input_tensor, hidden_tensor): model_fx = _convert_to_reference_decomposed_fx(model_fx) with torchdynamo.config.patch(allow_rnn=True): - model_graph = export_for_training( - model_graph, example_inputs, strict=True - ).module() + model_graph = export(model_graph, example_inputs, strict=True).module() quantizer = XNNPACKQuantizer() quantization_config = get_symmetric_quantization_config( is_per_channel=False, is_dynamic=False @@ -825,9 +823,7 @@ def forward(self, input_tensor, hidden_tensor): model_fx = _convert_to_reference_decomposed_fx(model_fx) with torchdynamo.config.patch(allow_rnn=True): - model_graph = export_for_training( - model_graph, example_inputs, strict=True - ).module() + model_graph = export(model_graph, example_inputs, strict=True).module() quantizer = XNNPACKQuantizer() quantization_config = get_symmetric_quantization_config( is_per_channel=False, is_dynamic=False @@ -1035,7 +1031,7 @@ def test_resnet18(self): m = torchvision.models.resnet18().eval() m_copy = copy.deepcopy(m) # program capture - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() quantizer = XNNPACKQuantizer() quantization_config = get_symmetric_quantization_config(is_per_channel=True) diff --git a/test/test_model_exports_to_core_aten.py b/test/test_model_exports_to_core_aten.py index 3d1c25939ec4b..60ec7ec54daf9 100644 --- a/test/test_model_exports_to_core_aten.py +++ b/test/test_model_exports_to_core_aten.py @@ -27,9 +27,7 @@ def test_vit_aten_export(self): m = m.eval() input_shape = (1, 3, 224, 224) example_inputs = (torch.randn(input_shape),) - m = torch.export.export_for_training( - m, copy.deepcopy(example_inputs), strict=True - ).module() + m = torch.export.export(m, copy.deepcopy(example_inputs), strict=True).module() m(*example_inputs) m = export.export(m, copy.deepcopy(example_inputs)) ops = _get_ops_list(m.graph_module) diff --git a/torch/ao/quantization/pt2e/lowering.py b/torch/ao/quantization/pt2e/lowering.py index 587cee22560df..742549dedcf8d 100644 --- a/torch/ao/quantization/pt2e/lowering.py +++ b/torch/ao/quantization/pt2e/lowering.py @@ -50,7 +50,7 @@ def _node_replace(m): # type: ignore[no-untyped-def] m.recompile() lowered_model = ( - torch.export.export_for_training(model, example_inputs, strict=True) + torch.export.export(model, example_inputs, strict=True) .run_decompositions(_post_autograd_decomp_table()) .module() ) diff --git a/torch/ao/quantization/pt2e/utils.py b/torch/ao/quantization/pt2e/utils.py index 699a4c384837d..ae938fec4c7f9 100644 --- a/torch/ao/quantization/pt2e/utils.py +++ b/torch/ao/quantization/pt2e/utils.py @@ -356,7 +356,7 @@ def _get_aten_graph_module_for_pattern( [x.cuda() if isinstance(x, torch.Tensor) else x for x in example_inputs] ) - aten_pattern = torch.export.export_for_training( + aten_pattern = torch.export.export( pattern, # type: ignore[arg-type] example_inputs, kwargs, diff --git a/torch/distributed/pipelining/_IR.py b/torch/distributed/pipelining/_IR.py index 3dfb0fe25c4cd..ab648a97a7ee9 100644 --- a/torch/distributed/pipelining/_IR.py +++ b/torch/distributed/pipelining/_IR.py @@ -1002,9 +1002,7 @@ def _trace_with_export( ) -> ExportedProgram: logger.info("Tracing model ...") try: - ep = torch.export.export_for_training( - mod, example_args, example_kwargs, strict=True - ) + ep = torch.export.export(mod, example_args, example_kwargs, strict=True) except Exception as e: raise RuntimeError( "It seems that we cannot capture your model as a full graph. " diff --git a/torch/testing/_internal/common_quantization.py b/torch/testing/_internal/common_quantization.py index 600848b80a7e8..8040d647216f3 100644 --- a/torch/testing/_internal/common_quantization.py +++ b/torch/testing/_internal/common_quantization.py @@ -58,7 +58,7 @@ XNNPACKQuantizer, ) -from torch.export import export_for_training +from torch.export import export from torch.jit.mobile import _load_for_lite_interpreter from torch.testing._internal.common_quantized import override_quantized_engine from torch.testing._internal.common_utils import TEST_WITH_ROCM, TestCase @@ -1513,7 +1513,7 @@ def _test_quantizer( {0: torch.export.Dim("dim")} if i == 0 else None for i in range(len(example_inputs)) ) - m = export_for_training( + m = export( m, example_inputs, dynamic_shapes=dynamic_shapes if export_with_dynamic_shape else None, @@ -1554,7 +1554,7 @@ def _test_quantizer( m_fx = _convert_to_reference_decomposed_fx( m_fx, backend_config=backend_config ) - m_fx = export_for_training( + m_fx = export( m_fx, example_inputs, dynamic_shapes=dynamic_shapes if export_with_dynamic_shape else None, @@ -1578,7 +1578,7 @@ def _quantize(self, m, quantizer, example_inputs, is_qat: bool = False): # resetting dynamo cache torch._dynamo.reset() - m = export_for_training(m, example_inputs, strict=True).module() + m = export(m, example_inputs, strict=True).module() if is_qat: m = prepare_qat_pt2e(m, quantizer) else: @@ -3183,12 +3183,15 @@ def forward(self, x): x = self.adaptive_avg_pool2d(x) return x - class ConvWithBNRelu(torch.nn.Module): def __init__(self, relu, dim=2, bn=True, bias=True, padding=0): super().__init__() convs = {1: torch.nn.Conv1d, 2: torch.nn.Conv2d, 3: torch.nn.Conv3d} - bns = {1: torch.nn.BatchNorm1d, 2: torch.nn.BatchNorm2d, 3: torch.nn.BatchNorm3d} + bns = { + 1: torch.nn.BatchNorm1d, + 2: torch.nn.BatchNorm2d, + 3: torch.nn.BatchNorm3d, + } self.conv = convs[dim](3, 3, 3, bias=bias, padding=padding) if bn: @@ -3394,7 +3397,7 @@ def get_default_quantizer(is_qat, is_dynamic, inputs): maybe_no_grad = contextlib.nullcontext() if is_qat else torch.no_grad() with maybe_no_grad: - export_model = export_for_training(mod, inputs, strict=True).module(check_guards=False) + export_model = export(mod, inputs, strict=True).module(check_guards=False) quantizer = ( quantizer if quantizer From 3d32bb114bf0d5bd0193dc40f20253635dddf080 Mon Sep 17 00:00:00 2001 From: atalman Date: Wed, 10 Sep 2025 14:22:41 +0000 Subject: [PATCH 045/693] [CD] Aarch64 Fix packaging ``libarm_compute.so`` and other libraries to the aarch64 CUDA wheels (#162566) Fixes aarch64 linux packaging, following error: https://github.com/pytorch/vision/actions/runs/17612462583/job/50037380487#step:15:62 ``` Traceback (most recent call last): File "/__w/vision/vision/pytorch/vision/setup.py", line 13, in import torch File "/__w/_temp/conda_environment_17612462583/lib/python3.11/site-packages/torch/__init__.py", line 415, in from torch._C import * # noqa: F403 ^^^^^^^^^^^^^^^^^^^^^^ ImportError: libarm_compute.so: cannot open shared object file: No such file or directory ``` Due to missing dependencies. Current Error: File torch-2.10.0.dev20250910+cu130-cp310-cp310-linux_aarch64.whl is extracted File is repackaged as torch-2.10.0.dev20250910+cu130-cp310-cp310-manylinux_2_28_aarch64.whl File torch-2.10.0.dev20250910+cu130-cp310-cp310-linux_aarch64.whl renamed as torch-2.10.0.dev20250910+cu130-cp310-cp310-manylinux_2_28_aarch64.whl Hence the repackaging does not take any effect. This PR does following File torch-2.10.0.dev20250910+cu130-cp310-cp310-linux_aarch64.whl is extracted File torch-2.10.0.dev20250910+cu130-cp310-cp310-linux_aarch64.whl deleted File is repackaged as torch-2.10.0.dev20250910+cu130-cp310-cp310-manylinux_2_28_aarch64.whl Looks like after migrating from zipping the wheel to wheel pack renaming the wheel is no longer necessary. Hence removing renaming and deleting old file. ``` 2025-09-10T10:10:05.9652454Z Using nvidia libs from pypi - skipping CUDA library bundling 2025-09-10T10:10:05.9656595Z Copying to /pytorch/dist/tmp/torch/lib/libgomp.so.1 2025-09-10T10:10:05.9873843Z Copying to /pytorch/dist/tmp/torch/lib/libgfortran.so.5 2025-09-10T10:10:06.0410041Z Copying to /pytorch/dist/tmp/torch/lib/libarm_compute.so 2025-09-10T10:10:06.2869242Z Copying to /pytorch/dist/tmp/torch/lib/libarm_compute_graph.so 2025-09-10T10:10:06.4385740Z Copying to /pytorch/dist/tmp/torch/lib/libnvpl_lapack_lp64_gomp.so.0 2025-09-10T10:10:06.5461372Z Copying to /pytorch/dist/tmp/torch/lib/libnvpl_blas_lp64_gomp.so.0 2025-09-10T10:10:06.5728970Z Copying to /pytorch/dist/tmp/torch/lib/libnvpl_lapack_core.so.0 2025-09-10T10:10:06.6231872Z Copying to /pytorch/dist/tmp/torch/lib/libnvpl_blas_core.so.0 2025-09-10T10:10:14.1503110Z Updated tag from Tag: cp310-cp310-linux_aarch64 2025-09-10T10:10:14.1503482Z to Tag: cp310-cp310-manylinux_2_28_aarch64 2025-09-10T10:10:14.1503682Z 2025-09-10T10:10:41.6498892Z Repacking wheel as /pytorch/dist/torch-2.10.0.dev20250910+cu130-cp310-cp310-manylinux_2_28_aarch64.whl...OK 2025-09-10T10:10:41.9394460Z Renaming torch-2.10.0.dev20250910+cu130-cp310-cp310-linux_aarch64.whl wheel to torch-2.10.0.dev20250910+cu130-cp310-cp310-manylinux_2_28_aarch64.whl ``` Test Plan, Executed on local file: ``` inflating: ubuntu/dist/tmp/torch-2.9.0.dev20250909+cu130.dist-info/WHEEL inflating: ubuntu/dist/tmp/torch-2.9.0.dev20250909+cu130.dist-info/entry_points.txt inflating: ubuntu/dist/tmp/torch-2.9.0.dev20250909+cu130.dist-info/top_level.txt inflating: ubuntu/dist/tmp/torch-2.9.0.dev20250909+cu130.dist-info/RECORD Bundling CUDA libraries with wheel Updated tag from Tag: cp310-cp310-manylinux_2_28_aarch64 to Tag: cp310-cp310-manylinux_2_28_aarch64 Repacking wheel as ubuntu/dist/torch-2.9.0.dev20250909+cu130-cp310-cp310-manylinux_2_28_aarch64.whl...OK Copying torch-2.9.0.dev20250909+cu130-cp310-cp310-manylinux_2_28_aarch64.whl to artifacts Build Complete. Created torch-2.9.0.dev20250909+cu130-cp310-cp310-manylinux_2_28_aarch64.whl.. ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162566 Approved by: https://github.com/jeanschmidt, https://github.com/NicolasHug --- .ci/aarch64_linux/aarch64_wheel_ci_build.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py index e63ce012652aa..758f1d1c0adad 100755 --- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py +++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py @@ -138,6 +138,8 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None: folder = os.path.dirname(wheel_path) os.mkdir(f"{folder}/tmp") os.system(f"unzip {wheel_path} -d {folder}/tmp") + # Delete original wheel since it will be repackaged + os.system(f"rm {wheel_path}") # Check if we should use PyPI NVIDIA libraries or bundle system libraries use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1" @@ -275,14 +277,7 @@ def complete_wheel(folder: str) -> str: f"/{folder}/dist/{repaired_wheel_name}", ) else: - repaired_wheel_name = wheel_name.replace( - "linux_aarch64", "manylinux_2_28_aarch64" - ) - print(f"Renaming {wheel_name} wheel to {repaired_wheel_name}") - os.rename( - f"/{folder}/dist/{wheel_name}", - f"/{folder}/dist/{repaired_wheel_name}", - ) + repaired_wheel_name = list_dir(f"/{folder}/dist")[0] print(f"Copying {repaired_wheel_name} to artifacts") shutil.copy2( From fefc406a3d0d90db0f808419fb88045f90b213cd Mon Sep 17 00:00:00 2001 From: Masaki Kozuki Date: Wed, 10 Sep 2025 14:43:53 +0000 Subject: [PATCH 046/693] fix typo: summit -> submit (#162587) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162587 Approved by: https://github.com/justinchuby --- torch/onnx/_internal/exporter/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/onnx/_internal/exporter/_core.py b/torch/onnx/_internal/exporter/_core.py index 85aa513c6d023..33a19d629388d 100644 --- a/torch/onnx/_internal/exporter/_core.py +++ b/torch/onnx/_internal/exporter/_core.py @@ -79,7 +79,7 @@ f"""\ Failed to export the model with torch.export. {_BLUE}This is step 1/3{_END} of exporting the model to ONNX. Next steps: - Modify the model code for `torch.export.export` to succeed. Refer to https://pytorch.org/docs/stable/generated/exportdb/index.html for more information. - - Debug `torch.export.export` and summit a PR to PyTorch. + - Debug `torch.export.export` and submit a PR to PyTorch. - Create an issue in the PyTorch GitHub repository against the {_BLUE}*torch.export*{_END} component and attach the full error stack as well as reproduction scripts.""" ) From b5e6e58050bd2a15f4173cfffa00c7e32e382b49 Mon Sep 17 00:00:00 2001 From: Benjamin Glass Date: Mon, 8 Sep 2025 20:55:19 +0000 Subject: [PATCH 047/693] [nn] Assert parsed iterable arguments are an appropriate length (#162340) Fixes #162327 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162340 Approved by: https://github.com/Skylion007 --- test/nn/test_pooling.py | 2 +- test/quantization/core/test_quantized_op.py | 20 +++++++++++--------- test/test_mps.py | 6 +++--- test/test_nn.py | 10 ++-------- torch/nn/modules/conv.py | 2 +- torch/nn/modules/utils.py | 15 +++++++++++++-- 6 files changed, 31 insertions(+), 24 deletions(-) diff --git a/test/nn/test_pooling.py b/test/nn/test_pooling.py index a8f77df22d311..2e85f2da22683 100644 --- a/test/nn/test_pooling.py +++ b/test/nn/test_pooling.py @@ -481,7 +481,7 @@ def test_max_unpool(self): def test_max_unpool3d_input_check(self): x = torch.ones(1, 3, 1, 1, 1) - with self.assertRaises(RuntimeError): + with self.assertRaises(AssertionError): F.max_unpool3d(x, torch.zeros(x.shape, dtype=int), [1, 1]) def test_quantized_max_pool1d_empty_kernel(self): diff --git a/test/quantization/core/test_quantized_op.py b/test/quantization/core/test_quantized_op.py index b6df2089e87e7..6b362bef365e6 100644 --- a/test/quantization/core/test_quantized_op.py +++ b/test/quantization/core/test_quantized_op.py @@ -15,7 +15,7 @@ from torch import _VF import torch.jit import torch.nn.functional as F -from torch.nn.modules.utils import _single, _pair +from torch.nn.modules.utils import _ntuple, _pair, _single from hypothesis import settings, HealthCheck from hypothesis import assume, given, note @@ -5311,10 +5311,11 @@ def _make_qconv_tensors( input_channels = input_channels_per_group * groups output_channels = output_channels_per_group * groups # Padded input size should be at least as big as dilated kernel - kernels = _single(kernels) - strides = _single(strides) - pads = _single(pads) - dilations = _single(dilations) + input_dimension_function = _ntuple(len(input_feature_map_shape)) + kernels = input_dimension_function(kernels) + strides = input_dimension_function(strides) + pads = input_dimension_function(pads) + dilations = input_dimension_function(dilations) for i in range(len(kernels)): assume(input_feature_map_shape[i] + 2 * pads[i] >= dilations[i] * (kernels[i] - 1) + 1) @@ -7846,10 +7847,11 @@ def _make_qconv_tensors_fp8( input_channels = input_channels_per_group * groups output_channels = output_channels_per_group * groups # Padded input size should be at least as big as dilated kernel - kernels = _single(kernels) - strides = _single(strides) - pads = _single(pads) - dilations = _single(dilations) + input_dimension_function = _ntuple(len(input_feature_map_shape)) + kernels = input_dimension_function(kernels) + strides = input_dimension_function(strides) + pads = input_dimension_function(pads) + dilations = input_dimension_function(dilations) for i in range(len(kernels)): assume(input_feature_map_shape[i] + 2 * pads[i] >= dilations[i] * (kernels[i] - 1) + 1) diff --git a/test/test_mps.py b/test/test_mps.py index 756b2cd20567a..c172c8c119b2b 100644 --- a/test/test_mps.py +++ b/test/test_mps.py @@ -8957,9 +8957,9 @@ def helper(shape, padding, op, value=0): # pad dims == input dims helper((1, 3), (0, 2, 0, 1), nn.ConstantPad2d) # input.numel() == 0 but output.numel() > 0 - helper((0, 3, 3), (1, 1, 1, 1, 1, 1), nn.ConstantPad2d) + helper((0, 3, 3), 1, nn.ConstantPad2d) # pad dims < input dims - 2 - helper((1, 2, 3, 4), (1, 2), nn.ConstantPad2d) + helper((1, 2, 3, 4, 5), (1, 2, 0, 0), nn.ConstantPad2d) # 3D Padding helper((2, 4, 6, 8, 4), (1, 3, 3, 5, 3, 4), nn.ReflectionPad3d) @@ -8972,7 +8972,7 @@ def helper(shape, padding, op, value=0): # input size < pad size helper((2, 4, 6), (1, 3, 3, 5, 3, 4), nn.ConstantPad3d) # check the workaround for the right padding bug in Monterey - helper((1, 2, 2, 2, 2), (0, 1), nn.ConstantPad3d) + helper((1, 2, 2, 2, 2), (0, 1, 0, 1, 0, 1), nn.ConstantPad3d) def test_constant_pad_nd_preserves_memory_format(self): nchw_tensor = torch.rand((1, 2, 5, 3)) diff --git a/test/test_nn.py b/test/test_nn.py index c17f7cb668b6f..13ee5c2e2a420 100644 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -7466,14 +7466,8 @@ def test_padding_list(self): def test_fractional_max_pool2d_invalid_output_ratio(self): arg_1 = [2, 1] arg_2 = [0.5, 0.5, 0.6] - arg_class = torch.nn.FractionalMaxPool2d(kernel_size=arg_1, output_ratio=arg_2,) - arg_3_0_tensor = torch.rand([20, 16, 50, 32], dtype=torch.float32) - arg_3_0 = arg_3_0_tensor.clone() - arg_3 = [arg_3_0,] - - with self.assertRaisesRegex(ValueError, - "fractional_max_pool2d requires output_ratio to either be a single Int or tuple of Ints."): - res = arg_class(*arg_3) + with self.assertRaisesRegex(AssertionError, "Expected an iterable of length 2, but got length 3"): + arg_class = torch.nn.FractionalMaxPool2d(kernel_size=arg_1, output_ratio=arg_2,) def test_max_pool1d_invalid_output_size(self): arg_1 = 3 diff --git a/torch/nn/modules/conv.py b/torch/nn/modules/conv.py index 2f15c3d488f72..ffb6f21e67145 100644 --- a/torch/nn/modules/conv.py +++ b/torch/nn/modules/conv.py @@ -768,7 +768,7 @@ def _output_padding( dilation: Optional[list[int]] = None, ) -> list[int]: if output_size is None: - ret = _single(self.output_padding) # converting to list if was not already + ret = list(self.output_padding) # converting to list if was not already else: has_batch_dim = input.dim() == num_spatial_dims + 2 num_non_spatial_dims = 2 if has_batch_dim else 1 diff --git a/torch/nn/modules/utils.py b/torch/nn/modules/utils.py index 220b8f206b195..492556dab01e6 100644 --- a/torch/nn/modules/utils.py +++ b/torch/nn/modules/utils.py @@ -1,5 +1,5 @@ # mypy: allow-untyped-defs -import collections +import collections.abc from itertools import repeat from typing import Any @@ -10,7 +10,18 @@ def _ntuple(n, name="parse"): def parse(x): if isinstance(x, collections.abc.Iterable): - return tuple(x) + ret = tuple(x) + + # If the iterable is length 1, automatically expand to fill. This + # matches the behavior of expand_param_if_needed. + if len(ret) == 1: + return tuple(repeat(ret[0], n)) + + # Otherwise assert the correct length. + assert len(ret) == n, ( + f"Expected an iterable of length {n}, but got length {len(ret)}" + ) + return ret return tuple(repeat(x, n)) parse.__name__ = name From 582d278983b28a91ac0cedd035183f2495bb6887 Mon Sep 17 00:00:00 2001 From: Robert Hardwick Date: Tue, 9 Sep 2025 22:27:08 +0000 Subject: [PATCH 048/693] Build and Install Arm Compute Library in manylinux docker image (#159737) ---- This PR will be part of a series of PR's that aims to remove `.ci/aarch64_linux` folder entirely, such that Aarch64 manylinux build happens as part of `.ci/manywheel/build.sh`, the same as other platforms. In this PR: - We prebuild + install Arm Compute Library in the manylinux docker image ( at /acl ), instead of a build time for every pytorch build. Also updated jammy install path to be /acl too. - We can therefore remove build_ArmComputeLibrary functions from the ci build scripts. - There is also some refactoring of install_openblas.sh and install_acl.sh to align them together ( similar formatting, similar variable names, same place for version number update ) - We had 2 places to define openblas version, this has been reduced to 1 now ( install_openblas.sh ). - ACL_VERSION and OPENBLAS_VERSION are now able to be overriden at build.sh level for developers, but there is only 1 version of each hardcoded for ci. Pull Request resolved: https://github.com/pytorch/pytorch/pull/159737 Approved by: https://github.com/seemethere ghstack dependencies: #160078 --- .ci/aarch64_linux/aarch64_wheel_ci_build.py | 55 ++------------------ .ci/aarch64_linux/build_aarch64_wheel.py | 48 ++++------------- .ci/docker/common/install_acl.sh | 27 +++++++--- .ci/docker/common/install_openblas.sh | 12 +++-- .ci/docker/manywheel/Dockerfile_2_28_aarch64 | 10 +++- .ci/docker/manywheel/build.sh | 5 +- .ci/pytorch/build.sh | 2 +- 7 files changed, 52 insertions(+), 107 deletions(-) mode change 100644 => 100755 .ci/docker/common/install_acl.sh mode change 100644 => 100755 .ci/docker/common/install_openblas.sh diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py index 758f1d1c0adad..dc75516fe1294 100755 --- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py +++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py @@ -13,49 +13,6 @@ def list_dir(path: str) -> list[str]: return check_output(["ls", "-1", path]).decode().split("\n") -def build_ArmComputeLibrary() -> None: - """ - Using ArmComputeLibrary for aarch64 PyTorch - """ - print("Building Arm Compute Library") - acl_build_flags = [ - "debug=0", - "neon=1", - "opencl=0", - "os=linux", - "openmp=1", - "cppthreads=0", - "arch=armv8a", - "multi_isa=1", - "fixed_format_kernels=1", - "build=native", - ] - acl_install_dir = "/acl" - acl_checkout_dir = os.getenv("ACL_SOURCE_DIR", "ComputeLibrary") - if os.path.isdir(acl_install_dir): - shutil.rmtree(acl_install_dir) - if not os.path.isdir(acl_checkout_dir) or not len(os.listdir(acl_checkout_dir)): - check_call( - [ - "git", - "clone", - "https://github.com/ARM-software/ComputeLibrary.git", - "-b", - "v25.02", - "--depth", - "1", - "--shallow-submodules", - ] - ) - - check_call( - ["scons", "Werror=1", f"-j{os.cpu_count()}"] + acl_build_flags, - cwd=acl_checkout_dir, - ) - for d in ["arm_compute", "include", "utils", "support", "src", "build"]: - shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") - - def replace_tag(filename) -> None: with open(filename) as f: lines = f.readlines() @@ -353,19 +310,13 @@ def parse_arguments(): build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " if enable_mkldnn: - build_ArmComputeLibrary() print("build pytorch with mkldnn+acl backend") - build_vars += ( - "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " - "ACL_ROOT_DIR=/acl " - "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " - "ACL_INCLUDE_DIR=/acl/build " - "ACL_LIBRARY=/acl/build " - ) + build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " + build_vars += "ACL_ROOT_DIR=/acl " if enable_cuda: build_vars += "BLAS=NVPL " else: - build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS " + build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/opt/OpenBLAS " else: print("build pytorch without mkldnn backend") diff --git a/.ci/aarch64_linux/build_aarch64_wheel.py b/.ci/aarch64_linux/build_aarch64_wheel.py index 7a4715d330060..dca0427a45d77 100755 --- a/.ci/aarch64_linux/build_aarch64_wheel.py +++ b/.ci/aarch64_linux/build_aarch64_wheel.py @@ -299,40 +299,6 @@ def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None: ) -def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None: - print("Building OpenBLAS") - host.run_cmd( - f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.28 {git_clone_flags}" - ) - make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8" - host.run_cmd( - f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS" - ) - - -def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None: - print("Building Arm Compute Library") - acl_build_flags = " ".join( - [ - "debug=0", - "neon=1", - "opencl=0", - "os=linux", - "openmp=1", - "cppthreads=0", - "arch=armv8a", - "multi_isa=1", - "fixed_format_kernels=1", - "build=native", - ] - ) - host.run_cmd( - f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}" - ) - - host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}") - - def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None: host.run_cmd("pip3 install auditwheel") host.run_cmd( @@ -700,7 +666,6 @@ def start_build( configure_system( host, compiler=compiler, use_conda=use_conda, python_version=python_version ) - build_OpenBLAS(host, git_clone_flags) if host.using_docker(): print("Move libgfortant.a into a standard location") @@ -723,6 +688,8 @@ def start_build( f"git clone --recurse-submodules -b {branch} https://github.com/pytorch/pytorch {git_clone_flags}" ) + host.run_cmd("pytorch/.ci/docker/common/install_openblas.sh") + print("Building PyTorch wheel") build_opts = "" if pytorch_build_number is not None: @@ -743,15 +710,18 @@ def start_build( if host.using_docker(): build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" if enable_mkldnn: - build_ArmComputeLibrary(host, git_clone_flags) + host.run_cmd("pytorch/.ci/docker/common/install_acl.sh") print("build pytorch with mkldnn+acl backend") build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON" + build_vars += " BLAS=OpenBLAS" + build_vars += " OpenBLAS_HOME=/opt/OpenBLAS" + build_vars += " ACL_ROOT_DIR=/acl" host.run_cmd( - f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}" + f"cd $HOME/pytorch && {build_vars} python3 setup.py bdist_wheel{build_opts}" ) print("Repair the wheel") pytorch_wheel_name = host.list_dir("pytorch/dist")[0] - ld_library_path = "$HOME/acl/build:$HOME/pytorch/build/lib" + ld_library_path = "/acl/build:$HOME/pytorch/build/lib" host.run_cmd( f"export LD_LIBRARY_PATH={ld_library_path} && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}" ) @@ -907,7 +877,7 @@ def terminate_instances(instance_type: str) -> None: def parse_arguments(): from argparse import ArgumentParser - parser = ArgumentParser("Builid and test AARCH64 wheels using EC2") + parser = ArgumentParser("Build and test AARCH64 wheels using EC2") parser.add_argument("--key-name", type=str) parser.add_argument("--debug", action="store_true") parser.add_argument("--build-only", action="store_true") diff --git a/.ci/docker/common/install_acl.sh b/.ci/docker/common/install_acl.sh old mode 100644 new mode 100755 index bf41a03b28063..0b865e5bc6f8d --- a/.ci/docker/common/install_acl.sh +++ b/.ci/docker/common/install_acl.sh @@ -1,16 +1,27 @@ -set -euo pipefail +#!/bin/bash +# Script used only in CD pipeline -readonly version=v25.02 -readonly src_host=https://github.com/ARM-software -readonly src_repo=ComputeLibrary +set -eux -# Clone ACL -[[ ! -d ${src_repo} ]] && git clone ${src_host}/${src_repo}.git -cd ${src_repo} +ACL_VERSION=${ACL_VERSION:-"v25.02"} +ACL_INSTALL_DIR="/acl" -git checkout $version +# Clone ACL +git clone https://github.com/ARM-software/ComputeLibrary.git -b "${ACL_VERSION}" --depth 1 --shallow-submodules +ACL_CHECKOUT_DIR="ComputeLibrary" # Build with scons +pushd $ACL_CHECKOUT_DIR scons -j8 Werror=0 debug=0 neon=1 opencl=0 embed_kernels=0 \ os=linux arch=armv8a build=native multi_isa=1 \ fixed_format_kernels=1 openmp=1 cppthreads=0 +popd + +# Install ACL +sudo mkdir -p ${ACL_INSTALL_DIR} +for d in arm_compute include utils support src build +do + sudo cp -r ${ACL_CHECKOUT_DIR}/${d} ${ACL_INSTALL_DIR}/${d} +done + +rm -rf $ACL_CHECKOUT_DIR \ No newline at end of file diff --git a/.ci/docker/common/install_openblas.sh b/.ci/docker/common/install_openblas.sh old mode 100644 new mode 100755 index 3c795acf2220b..2f386c6bd523a --- a/.ci/docker/common/install_openblas.sh +++ b/.ci/docker/common/install_openblas.sh @@ -3,8 +3,10 @@ set -ex -cd / -git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION:-v0.3.30}" --depth 1 --shallow-submodules +OPENBLAS_VERSION=${OPENBLAS_VERSION:-"v0.3.30"} + +# Clone OpenBLAS +git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION}" --depth 1 --shallow-submodules OPENBLAS_CHECKOUT_DIR="OpenBLAS" OPENBLAS_BUILD_FLAGS=" @@ -17,5 +19,7 @@ CFLAGS=-O3 BUILD_BFLOAT16=1 " -make -j8 ${OPENBLAS_BUILD_FLAGS} -C ${OPENBLAS_CHECKOUT_DIR} -make -j8 ${OPENBLAS_BUILD_FLAGS} install -C ${OPENBLAS_CHECKOUT_DIR} +make -j8 ${OPENBLAS_BUILD_FLAGS} -C $OPENBLAS_CHECKOUT_DIR +sudo make install -C $OPENBLAS_CHECKOUT_DIR + +rm -rf $OPENBLAS_CHECKOUT_DIR \ No newline at end of file diff --git a/.ci/docker/manywheel/Dockerfile_2_28_aarch64 b/.ci/docker/manywheel/Dockerfile_2_28_aarch64 index da7ab4d3fd154..5ff4d98e51aa6 100644 --- a/.ci/docker/manywheel/Dockerfile_2_28_aarch64 +++ b/.ci/docker/manywheel/Dockerfile_2_28_aarch64 @@ -62,6 +62,13 @@ ARG OPENBLAS_VERSION ADD ./common/install_openblas.sh install_openblas.sh RUN bash ./install_openblas.sh && rm install_openblas.sh +# Install Arm Compute Library +FROM base as arm_compute +# use python3.9 to install scons +RUN python3.9 -m pip install scons==4.7.0 +RUN ln -sf /opt/python/cp39-cp39/bin/scons /usr/local/bin +COPY ./common/install_acl.sh install_acl.sh +RUN bash ./install_acl.sh && rm install_acl.sh FROM base as final # remove unnecessary python versions @@ -70,4 +77,5 @@ RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/ -ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH +COPY --from=arm_compute /acl /acl +ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:/acl/build/:$LD_LIBRARY_PATH \ No newline at end of file diff --git a/.ci/docker/manywheel/build.sh b/.ci/docker/manywheel/build.sh index 5dee4325857fb..ea4be0128eb00 100755 --- a/.ci/docker/manywheel/build.sh +++ b/.ci/docker/manywheel/build.sh @@ -28,6 +28,7 @@ fi MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-} DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-} OPENBLAS_VERSION=${OPENBLAS_VERSION:-} +ACL_VERSION=${ACL_VERSION:-} case ${image} in manylinux2_28-builder:cpu) @@ -41,7 +42,6 @@ case ${image} in GPU_IMAGE=arm64v8/almalinux:8 DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13 --build-arg NINJA_VERSION=1.12.1" MANY_LINUX_VERSION="2_28_aarch64" - OPENBLAS_VERSION="v0.3.30" ;; manylinuxcxx11-abi-builder:cpu-cxx11-abi) TARGET=final @@ -121,7 +121,8 @@ tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]') DOCKER_BUILDKIT=1 docker build \ ${DOCKER_GPU_BUILD_ARG} \ --build-arg "GPU_IMAGE=${GPU_IMAGE}" \ - --build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION}" \ + --build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION:-}" \ + --build-arg "ACL_VERSION=${ACL_VERSION:-}" \ --target "${TARGET}" \ -t "${tmp_tag}" \ $@ \ diff --git a/.ci/pytorch/build.sh b/.ci/pytorch/build.sh index 1c88554c2af96..b3601b17b7af0 100755 --- a/.ci/pytorch/build.sh +++ b/.ci/pytorch/build.sh @@ -89,7 +89,7 @@ fi if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then export USE_MKLDNN=1 export USE_MKLDNN_ACL=1 - export ACL_ROOT_DIR=/ComputeLibrary + export ACL_ROOT_DIR=/acl fi if [[ "$BUILD_ENVIRONMENT" == *riscv64* ]]; then From 2dc26131801a430e030a773c4fbfe874e263259d Mon Sep 17 00:00:00 2001 From: suo Date: Wed, 10 Sep 2025 16:59:18 +0000 Subject: [PATCH 049/693] [torch][c10d] fix split_group in mixed backend case (#162424) Today we can initialize a mixed-backend process group (e.g. "cpu:gloo,cuda:nccl") but we can only pass one set of process group options. However, when we call `split_group`, we retrieve that set of options from the parent PG and pass it to the ProcessGroup::groupSplit C++ API, which then attempts to propagate that set of options to all backends. This leads to an assert on some user code, where ProcessGroupGloo::split is expecting gloo options but receives nccl options instead. Arguably the APIs as currently designed are just broken; we should not ever expect a single set of backend options to apply across multiple backends. However, fixing this would require changing quite a few public APIs. As a quick fix, since user-provided options really only exist for NCCL, just warn and fall-back to defaulted options for Gloo if non-gloo options are detected. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162424 Approved by: https://github.com/d4l3k, https://github.com/fduwjj, https://github.com/H-Huang --- test/distributed/test_c10d_nccl.py | 56 +++++++++++++++++++ torch/csrc/distributed/c10d/ProcessGroup.hpp | 4 +- .../distributed/c10d/ProcessGroupGloo.cpp | 40 ++++++++++++- .../distributed/c10d/ProcessGroupGloo.hpp | 3 + torch/csrc/distributed/c10d/init.cpp | 28 +--------- torch/distributed/distributed_c10d.py | 6 +- 6 files changed, 107 insertions(+), 30 deletions(-) diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py index b234c907a6658..0d55845228da7 100644 --- a/test/distributed/test_c10d_nccl.py +++ b/test/distributed/test_c10d_nccl.py @@ -1087,6 +1087,62 @@ def test_comm_split_group(self): dist.destroy_process_group() + @requires_nccl_version((2, 18), "Need NCCL 2.18+ for ncclCommSplit") + @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") + def test_comm_split_group_mixed_backend(self): + # Test `ncclCommSplit` for smaller subgroups of the world when + # we've passed a specific device_id to init_process_group. + store = c10d.FileStore(self.file_name, self.world_size) + device = torch.device(f"cuda:{self.rank}") + # pg = self._create_process_group_nccl(store, self.opts(), device_id=device) + # create nccl processgroup with opts + c10d.init_process_group( + "cpu:gloo,cuda:nccl", + world_size=self.world_size, + rank=self.rank, + store=store, + pg_options=self.opts(), + device_id=device, + ) + pg = c10d.distributed_c10d._get_default_group() + backend = pg._get_backend(torch.device(device)) + + cuda_tensor = torch.full((1,), self.rank).cuda(device) + cpu_tensor = torch.full((1,), self.rank) + # Create subgroup between ranks 0, 1 + subg_ranks = [0, 1] + ng1 = c10d.split_group(pg, [subg_ranks]) + backend1 = ng1._get_backend(torch.device(device)) + + # check basic options are the same between parent and child + self.assertEqual(backend.options._timeout, backend1.options._timeout) + self.assertEqual( + backend.options.is_high_priority_stream, + backend1.options.is_high_priority_stream, + ) + self.assertEqual(ng1.group_desc, "default_pg:split:0") + + # comm split happens eagerly since device_id is passed to init_process_group. + self.assertEqual(backend.comm_split_count(), 1) + # dist.get_process_group_ranks returns the global ranks in the subgroup. + self.assertEqual( + dist.get_process_group_ranks(ng1), + subg_ranks if self.rank in subg_ranks else [], + ) + + # is part of ng1; otherwise, -1 + if dist.get_rank(ng1) >= 0: + dist.broadcast(cuda_tensor, dist.get_global_rank(ng1, 0), group=ng1) + self.assertEqual(cuda_tensor, torch.full((1,), 0)) + dist.broadcast(cpu_tensor, dist.get_global_rank(ng1, 0), group=ng1) + self.assertEqual(cpu_tensor, torch.full((1,), 0)) + + ng2 = c10d.split_group(pg, [subg_ranks]) + self.assertEqual(ng2.group_desc, "default_pg:split:1") + self.assertEqual(backend.comm_split_count(), 2) + + dist.destroy_process_group() + @requires_nccl_version((2, 18), "Need NCCL 2.18+ for ncclCommSplit") @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") def test_non_blocking_init(self): diff --git a/torch/csrc/distributed/c10d/ProcessGroup.hpp b/torch/csrc/distributed/c10d/ProcessGroup.hpp index 4fb2d566e9a76..5a06a386d5ca8 100644 --- a/torch/csrc/distributed/c10d/ProcessGroup.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroup.hpp @@ -1015,7 +1015,9 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder { // Backend classes for this ProcessGroup std::unordered_set deviceTypes_; - std::unordered_map deviceTypeToBackendType_; + // This mapping is ordered, as splitGroup must call split on the underlying + // backends in a consistent order. + std::map deviceTypeToBackendType_; std::unordered_map> deviceTypeToBackend_; std::unordered_map> diff --git a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp index fbd8a403b97dc..74063ff579e80 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp +++ b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp @@ -551,6 +551,32 @@ std::shared_ptr<::gloo::transport::Device> ProcessGroupGloo:: static std::atomic process_group_id = 0; +c10::intrusive_ptr ProcessGroupGloo::Options:: + create_default(std::chrono::milliseconds timeout) { + auto options = ::c10d::ProcessGroupGloo::Options::create(); + bool lazyInit = ::c10d::getDefaultGlooLazyInit(); + + // Use interfaces listed in "GLOO_SOCKET_IFNAME", if set. + auto ifnameEnv = c10::utils::get_env("GLOO_SOCKET_IFNAME"); + if (ifnameEnv && ifnameEnv->size() > 1) { + for (const auto& iface : ::c10d::split(',', ifnameEnv->c_str())) { + options->devices.push_back( + ::c10d::ProcessGroupGloo::createDeviceForInterface(iface, lazyInit)); + } + } else { + // If no hostname is specified, this function looks up + // the machine's hostname and returns a device instance + // associated with the address that the hostname resolves to. + options->devices.push_back( + ::c10d::ProcessGroupGloo::createDefaultDevice(lazyInit)); + } + + options->timeout = timeout; + // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) + options->threads = options->devices.size() * 2; + return options; +} + ProcessGroupGloo::ProcessGroupGloo( const c10::intrusive_ptr& store, int rank, @@ -710,7 +736,12 @@ c10::intrusive_ptr ProcessGroupGloo::split( } auto glooOpts = c10::dynamic_intrusive_pointer_cast(opts); - TORCH_CHECK(glooOpts != nullptr, "opts not a ProcessGroupGloo::Options."); + if (glooOpts == nullptr) { + TORCH_WARN_ONCE( + "Tried to pass options to ProcessGroupGloo::split that are not ProcessGroupGloo::Options." + "Falling back to default options."); + glooOpts = ProcessGroupGloo::Options::create_default(); + } // TODO: we need to get rid of globalRanksInGroup eventually. std::vector globalRanksInGroup; @@ -729,7 +760,12 @@ c10::intrusive_ptr ProcessGroupGloo::merge( const int& rank, const int& size) { auto glooOpts = c10::dynamic_intrusive_pointer_cast(opts); - TORCH_CHECK(glooOpts != nullptr, "opts not a ProcessGroupGloo::Options."); + if (glooOpts == nullptr) { + TORCH_WARN_ONCE( + "Tried to pass options to ProcessGroupGloo::merge that are not ProcessGroupGloo::Options." + "Falling back to default options."); + glooOpts = ProcessGroupGloo::Options::create_default(); + } auto pg = c10::make_intrusive( store->clone(), rank, size, glooOpts); return c10::static_intrusive_pointer_cast(pg); diff --git a/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp b/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp index 4297807f2e8b9..b2cc6993528bf 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp @@ -255,6 +255,9 @@ class TORCH_API ProcessGroupGloo : public Backend { return c10::make_intrusive(timeout); } + static c10::intrusive_ptr create_default( + std::chrono::milliseconds timeout = kBackendDefaultTimeout); + std::vector> devices; int threads; }; diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp index 0189326683585..c36b9025dfecc 100644 --- a/torch/csrc/distributed/c10d/init.cpp +++ b/torch/csrc/distributed/c10d/init.cpp @@ -3103,8 +3103,6 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`). .def_readwrite("group_name", &::c10d::Backend::Options::group_name); #ifdef USE_C10D_GLOO - static const std::string GLOO_SOCKET_IFNAME_ENV = "GLOO_SOCKET_IFNAME"; - auto processGroupGloo = intrusive_ptr_no_gil_destructor_class_<::c10d::ProcessGroupGloo>( module, "ProcessGroupGloo", backend); @@ -3181,31 +3179,9 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`). // https://github.com/pybind/pybind11/issues/5473 py::gil_scoped_release nogil{}; - auto options = ::c10d::ProcessGroupGloo::Options::create(); - bool lazyInit = ::c10d::getDefaultGlooLazyInit(); - - // Use interfaces listed in "GLOO_SOCKET_IFNAME", if set. - auto ifnameEnv = - c10::utils::get_env(GLOO_SOCKET_IFNAME_ENV.c_str()); - if (ifnameEnv && ifnameEnv->size() > 1) { - for (const auto& iface : ::c10d::split(',', ifnameEnv->c_str())) { - options->devices.push_back( - ::c10d::ProcessGroupGloo::createDeviceForInterface( - iface, lazyInit)); - } - } else { - // If no hostname is specified, this function looks up - // the machine's hostname and returns a device instance - // associated with the address that the hostname resolves to. - options->devices.push_back( - ::c10d::ProcessGroupGloo::createDefaultDevice(lazyInit)); - } - - options->timeout = timeout; - // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) - options->threads = options->devices.size() * 2; + auto options = ::c10d::ProcessGroupGloo::Options::create_default(); return c10::make_intrusive<::c10d::ProcessGroupGloo>( - store, rank, size, options); + store, rank, size, std::move(options)); }), py::arg("store"), py::arg("rank"), diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 14790e5dba8af..29609404df09b 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -5158,7 +5158,11 @@ def split_group( my_group = split_group break - group_name = _process_group_name(my_group, use_hashed_name=False) + # use_hashed_name is True to ensure that subgroups have unique names. + # This is needed as some backends (e.g. Gloo) use the group name as a + # PrefixStore prefix for initialization of splits. Thus, names have to be + # unique to avoid key collisions. + group_name = _process_group_name(my_group, use_hashed_name=True) split_pg = parent_pg.split_group( my_group, timeout=timeout, From 1051c7dbc221debddbea8e7842aebb7a95b990fc Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Wed, 10 Sep 2025 10:35:32 -0400 Subject: [PATCH 050/693] Don't unconditionally import torch._dynamo, it's slow (#162595) A trivial test on OS X. Before: ``` real 0m6.550s user 0m2.532s sys 0m3.359s ``` After: ``` real 0m2.607s user 0m1.898s sys 0m3.344s ``` Signed-off-by: Edward Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/162595 Approved by: https://github.com/albanD --- torch/testing/_internal/common_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index bfc568bc14645..e29f36020e9c2 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -68,7 +68,6 @@ import torch.cuda from torch import Tensor from torch._C import ScriptDict, ScriptList # type: ignore[attr-defined] -from torch._dynamo.trace_rules import _as_posix_path from torch._utils_internal import get_writable_path from torch._logging.scribe import open_source_signpost from torch.nn import ( @@ -5606,6 +5605,8 @@ class LazyVal: def munge_exc(e, *, suppress_suffix=True, suppress_prefix=True, file=None, skip=0): + from torch._dynamo.trace_rules import _as_posix_path + if file is None: file = inspect.stack()[1 + skip].filename # skip one frame From f08487aa8692751c36e608e338204490b0955583 Mon Sep 17 00:00:00 2001 From: Ruben Rodriguez Buchillon Date: Tue, 9 Sep 2025 23:00:11 -0700 Subject: [PATCH 051/693] [inductor] FlexibleLayout for ExternKernelChoice for mms (#161351) # why - if we only use ExternKernelChoice we're not doing any codegen - if we're not doing any codegen, we can use a FlexibleLayout here, and provide deeper passes more chances to change it # what - if all the kernel template choices (KTC) are with a ExternKernelChoice template, we switch to a FlexibleLayout before generating the choice - add a test to make sure that works as intended (FlexibleLayout for only extern, and FixedLayout if Triton is involved) - caveats: - because CPP, CUTLASS, and CK are not using V.choices.get_mm_configs yet, we turn off the optimization if either of those backends are in use. This will be relaxed once they support this too - because Triton templates are still using their own calls (not a single call) to get_mm_configs, it's also turned off there. The next diff unifies Triton + ATEN to a single call to get_mm_configs and that in turn allows the optimization there too # testing ``` python3 -bb -m pytest test/inductor/test_max_autotune.py -v ``` Differential Revision: [D81520584](https://our.internmc.facebook.com/intern/diff/D81520584) Pull Request resolved: https://github.com/pytorch/pytorch/pull/161351 Approved by: https://github.com/eellison, https://github.com/jansel --- test/inductor/test_max_autotune.py | 38 ++++++++++++- torch/_inductor/choices.py | 86 ++++++++++++++++++++++-------- torch/_inductor/kernel/mm.py | 21 +------- 3 files changed, 102 insertions(+), 43 deletions(-) diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py index 320bdf3462e64..aea205163d808 100644 --- a/test/inductor/test_max_autotune.py +++ b/test/inductor/test_max_autotune.py @@ -27,7 +27,7 @@ TuningProcessPool, ) from torch._inductor.graph import GraphLowering -from torch._inductor.ir import Buffer, ChoiceCaller, FixedLayout +from torch._inductor.ir import Buffer, ChoiceCaller, FixedLayout, FlexibleLayout from torch._inductor.kernel.mm_plus_mm import aten_mm_plus_mm from torch._inductor.select_algorithm import ( add_feedback_saver, @@ -1973,6 +1973,42 @@ def choice_validator(choices): finally: clear_preprocessing_fns() + @config.patch( + {"test_configs.max_mm_configs": 4, "max_autotune_gemm_backends": "ATEN,TRITON"} + ) + @parametrize("max_autotune_enabled", (True, False)) + def test_autotune_layout_optimization(self, max_autotune_enabled): + """Test that layouts are flexible when every choice is ExternKernelChoice""" + + # we use a proxy here of bias_addmm and max-autotune because this enables us to see + # multiple choices in both scenarios (bias_addmm, addmm, triton (max-autotune only)) + # and both bias_addmm and addmm are extern kernel choices + def layout_checker(choices): + if choices: + expected_layout = ( + FixedLayout if max_autotune_enabled else FlexibleLayout + ) + for choice in choices: + self.assertIsInstance( + choice.layout, + expected_layout, + f"Expected {expected_layout.__name__} with max_autotune={max_autotune_enabled}", + ) + return choices + + add_preprocessing_fn(layout_checker) + + try: + bias = torch.randn(64, device=GPU_TYPE) + x = torch.randn(32, 128, device=GPU_TYPE) + w = torch.randn(128, 64, device=GPU_TYPE) + + with config.patch({"max_autotune": max_autotune_enabled}): + compiled_fn = torch.compile(lambda b, x, w: torch.addmm(b, x, w)) + _ = compiled_fn(bias, x, w) + finally: + clear_preprocessing_fns(clear_defaults=False) + class TestMaxAutotunePrecompile(TestCase): def test_precompilation_threads(self): diff --git a/torch/_inductor/choices.py b/torch/_inductor/choices.py index a6275ac85c110..3db131e2584eb 100644 --- a/torch/_inductor/choices.py +++ b/torch/_inductor/choices.py @@ -14,6 +14,7 @@ from .metrics import get_metric_table, is_metric_table_enabled from .runtime.hints import DeviceProperties, ReductionHint from .scheduler import BaseSchedulerNode, Scheduler, WhyNoFuse +from .select_algorithm import ExternKernelChoice from .template_heuristics import get_template_heuristic from .template_heuristics.triton import ( BaseConfigHeuristic, @@ -23,6 +24,7 @@ ROCmConfigHeuristic, XPUConfigHeuristic, ) +from .utils import _use_autotune_backend from .virtualized import V @@ -32,14 +34,13 @@ from triton import Config as TritonConfig - from torch.utils._ordered_set import OrderedSet - from .codegen.common import KernelTemplate from .codegen.simd_kernel_features import SIMDKernelFeatures from .codegen.triton import TritonKernel - from .ir import ChoiceCaller, Layout + from .ir import ChoiceCaller from .kernel_template_choice import KernelTemplateChoice - from .select_algorithm import ExternKernelChoice + + from torch.utils._ordered_set import OrderedSet # isort: skip class Sortable(typing.Protocol): @@ -109,7 +110,6 @@ def _finalize_mm_configs( self, template_choices: dict[str, Generator[KernelTemplateChoice, None, None]], kernel_inputs: KernelInputs, - layout: Any, templates: list[Union[KernelTemplate, ExternKernelChoice]], op_name: str, kwarg_overrides: Optional[dict[str, dict[str, Any]]] = None, @@ -126,7 +126,6 @@ def _finalize_mm_configs( Args: template_choices: Dictionary mapping template UIDs to generators of KernelTemplateChoice objects kernel_inputs: MMKernelInputs containing input tensor nodes and matrix indices - layout: Output layout templates: List of template objects (KernelTemplate or ExternKernelChoice) in use op_name: Operation name (e.g., "bmm", "baddbmm", "addmm") kwarg_overrides: Optional dict of kwargs to override for each template heuristic @@ -142,7 +141,6 @@ def _finalize_mm_configs( def get_ktc( self, kernel_inputs: KernelInputs, - layout: Layout, template: Union[KernelTemplate, ExternKernelChoice], op_name: str, kwarg_overrides: Optional[dict[str, Any]] = None, @@ -176,16 +174,60 @@ def get_ktc( cs=cs, overrides=overrides, extra_kwargs=extra_kwargs, - layout=layout, + layout=kernel_inputs.output_layout(), inputs=inputs_val, ) + def _need_to_fix_layout( + self, + adjusted_choices: list[KernelTemplateChoice], + op_name: str, + ) -> bool: + """ + Check if we need to fix the layout instead of keeping it flexible + + Args: + ktc: KernelTemplateChoice object + + Returns: + True if we need to fix the layout, False otherwise + """ + # TODO: debug and fix + # NOTE: on mps, we see issues with flexible layouts on baddmm. This check just makes sure + # that for mps, everything stays as it was before this optimization + if len(adjusted_choices) > 0: + if adjusted_choices[0].inputs.device_type == "mps" and op_name not in [ + "mm", + "addmm", + ]: + return True + + # Since the following backends are not using get_mm_configs yet through the singular call, + if not (config.max_autotune or config.max_autotune_gemm): + # no danger of using other backends than ATEN + return False + + # Since the following backends are not using get_template_configs yet through the singular call, + # we don't know if they are a valid choice or not. Instead, just skip the optimization + # defensively. + # TODO(coconutruben): remove this once TRITON,CPP,CK,CUTLASS are supported + if _use_autotune_backend("TRITON"): + return True + if _use_autotune_backend("CUTLASS"): + return True + if _use_autotune_backend("CK") or _use_autotune_backend("CKTILE"): + return True + if _use_autotune_backend("CPP"): + return True + return any( + not isinstance(ktc.template, ExternKernelChoice) for ktc in adjusted_choices + ) + def get_mm_configs( self, kernel_inputs: KernelInputs, templates: list[Union[KernelTemplate, ExternKernelChoice]], op_name: str, - layout: Optional[Layout] = None, kwarg_overrides: Optional[dict[str, dict[str, Any]]] = None, ) -> list[ChoiceCaller]: """ @@ -206,17 +248,12 @@ def get_mm_configs( input_tensors = kernel_inputs.nodes() if len(input_tensors) < 2: raise ValueError(f"Need at least 2 input tensors, got {len(input_tensors)}") - if layout is None: - # TODO(coconutruben): remove this once we remove the layout argument entirely - # This is just here to the brief gap between commits where we still need this - # to accommodate fixed vs flexible layout decision externally - layout = kernel_inputs.output_layout(flexible=False) + layout = kernel_inputs.output_layout() # First pass: Create dict of template.uid to generator of KernelTemplateChoice objects template_choices = {} for template in templates: template_choices[template.uid] = self.get_ktc( kernel_inputs, - layout, template, op_name, kwarg_overrides.get(template.uid, {}), @@ -226,18 +263,21 @@ def get_mm_configs( adjusted_choices = self._finalize_mm_configs( template_choices, kernel_inputs, - layout, templates, op_name, kwarg_overrides, ) - choices = [] - # Third pass: Get adjusted choices and collect non-None ChoiceCaller objects - for ktc in adjusted_choices: - if ktc.choice is not None: - choices.append(ktc.choice) - - return choices + # Layout optimization: if all choices are ExternKernelChoice and layout is FixedLayout, convert to FlexibleLayout + if self._need_to_fix_layout(adjusted_choices, op_name): + layout = kernel_inputs.output_layout(flexible=False) + for ktc in adjusted_choices: + ktc.layout = layout + # for good measure, delete the cached ChoiceCaller from the ktc if it existed. + # ExternKernelChoice are cheap to generate + if hasattr(ktc, "_choice"): + del ktc._choice + # Third pass: Convert to ChoiceCaller objects + return [ktc.choice for ktc in adjusted_choices if ktc.choice is not None] def triton_kernel_kwargs( self, diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 784744dba9917..94eb609128c81 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -23,7 +23,7 @@ from ..codegen.rocm.ck_tile_universal_gemm_template import CKTileGemmTemplate from ..codegen.rocm.ck_universal_gemm_template import CKGemmTemplate from ..codegen.subgraph import SubgraphChoiceCaller, SubgraphTemplate -from ..ir import Buffer, ChoiceCaller, FlexibleLayout, is_triton, Layout +from ..ir import Buffer, ChoiceCaller, is_triton, Layout from ..kernel_inputs import MMKernelInputs from ..lowering import add_layout_constraint, constrain_to_fx_strides, register_lowering from ..select_algorithm import ( @@ -745,16 +745,9 @@ def tuned_mm(mat1, mat2, *, layout=None): layout, ) - aten_layout = layout - if not (inductor_config.max_autotune or inductor_config.max_autotune_gemm): - aten_layout = FlexibleLayout( - device=layout.device, dtype=layout.dtype, size=layout.size - ) choices: list[ChoiceCaller] = [] if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs(kernel_inputs, [aten_mm], "mm", aten_layout) - ) + choices.extend(V.choices.get_mm_configs(kernel_inputs, [aten_mm], "mm")) static_shape, is_nonzero = _is_static_problem(layout) if is_nonzero and use_triton_template(layout, check_max_autotune=False): @@ -942,18 +935,9 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): mat2.get_dtype(), layout, ) - aten_layout = layout if (not is_nonzero) or ( not (inductor_config.max_autotune or inductor_config.max_autotune_gemm) ): - # Use a FlexibleLayout if we are not autotuning. - # This allows padding strides for the output. - from torch._inductor.ir import FixedLayout, FlexibleLayout - - if isinstance(layout, FixedLayout): - aten_layout = FlexibleLayout( - device=layout.device, dtype=layout.dtype, size=layout.size - ) # TODO(coconutruben): combine this with the main flow of addmm through # a subgraph or something as inp vs inp_expanded causes some slight numeric # differences @@ -965,7 +949,6 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): kernel_inputs, [aten_addmm], name, - aten_layout, ) ) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) From 623e623c821f639559248e9acd6084311c8fd3d5 Mon Sep 17 00:00:00 2001 From: Ruben Rodriguez Buchillon Date: Tue, 9 Sep 2025 23:00:11 -0700 Subject: [PATCH 052/693] [inductor] leverage template stacking in V.choices.get_mm_configs (#161350) # why - now everything is in place to just gather templates and run the V.choices.get_mm_configs once per op - enables any overrides inside V.choices.get_mm_configs to have a full view of the options for an op, not just for one template # what - replace multiple calls to V.choices.get_mm_configs with calls to gather the active templates, and then using those in a single call # testing ``` python3 -bb -m pytest test/inductor/test_max_autotune.py -v ``` Differential Revision: [D81520571](https://our.internmc.facebook.com/intern/diff/D81520571) Pull Request resolved: https://github.com/pytorch/pytorch/pull/161350 Approved by: https://github.com/eellison, https://github.com/jansel ghstack dependencies: #161351 --- torch/_inductor/choices.py | 4 +- torch/_inductor/kernel/bmm.py | 47 ++++--- torch/_inductor/kernel/mm.py | 175 +++++++++++---------------- torch/_inductor/kernel/mm_plus_mm.py | 20 +-- 4 files changed, 110 insertions(+), 136 deletions(-) diff --git a/torch/_inductor/choices.py b/torch/_inductor/choices.py index 3db131e2584eb..fa70c6a53358e 100644 --- a/torch/_inductor/choices.py +++ b/torch/_inductor/choices.py @@ -210,9 +210,7 @@ def _need_to_fix_layout( # Since the following backends are not using get_template_configs yet through the singular call, # we don't know if they are a valid choice or not. Instead, just skip the optimization # defensively. - # TODO(coconutruben): remove this once TRITON,CPP,CK,CUTLASS are supported - if _use_autotune_backend("TRITON"): - return True + # TODO(coconutruben): remove this once CPP,CK,CUTLASS are supported if _use_autotune_backend("CUTLASS"): return True if _use_autotune_backend("CK") or _use_autotune_backend("CKTILE"): diff --git a/torch/_inductor/kernel/bmm.py b/torch/_inductor/kernel/bmm.py index 6c468e7da0280..ea81799690f79 100644 --- a/torch/_inductor/kernel/bmm.py +++ b/torch/_inductor/kernel/bmm.py @@ -1,6 +1,6 @@ # mypy: allow-untyped-defs import logging -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Union import torch from torch._dynamo.utils import counters @@ -28,6 +28,7 @@ if TYPE_CHECKING: from ..ir import ChoiceCaller + from ..select_algorithm import KernelTemplate log = logging.getLogger(__name__) aten = torch.ops.aten @@ -197,21 +198,29 @@ def may_require_contiguous(t, meta_t): aten_extra_kwargs = {"out_dtype": out_dtype} choices: list[ChoiceCaller] = [] + + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] + kwarg_overrides = {} + if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [aten_handler], - name, - kwarg_overrides={aten_handler.uid: aten_extra_kwargs}, - ) - ) + templates_to_use.append(aten_handler) + kwarg_overrides[aten_handler.uid] = aten_extra_kwargs if use_triton_template(layout, check_max_autotune=False): # TODO: add out_dtype support for Triton Template assert out_dtype is None, "out_dtype is not supported for Triton" - - choices.extend(V.choices.get_mm_configs(kernel_inputs, [bmm_template], name)) + templates_to_use.append(bmm_template) + + # Single unified call for all templates + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + templates_to_use, + name, + kwarg_overrides=kwarg_overrides, + ) + ) _, is_nonzero = _is_static_problem(layout) batch_stride_largest_or_zero = is_batch_stride_largest_or_zero(mat1, mat2, layout) if ( @@ -271,16 +280,16 @@ def tuned_baddbmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): name = "baddbmm" # options to tune from choices: list[ChoiceCaller] = [] + + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - choices.extend(V.choices.get_mm_configs(kernel_inputs, [aten_baddbmm], name)) + templates_to_use.append(aten_baddbmm) if use_triton_template(layout, check_max_autotune=False): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [bmm_template], - name, - ) - ) + templates_to_use.append(bmm_template) + + # Single unified call for all templates + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 94eb609128c81..c6219fa25cfc3 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import functools import logging -from typing import Any, Optional +from typing import Any, Optional, Union import torch from torch._dynamo.utils import counters @@ -29,6 +29,7 @@ from ..select_algorithm import ( autotune_select_algorithm, ExternKernelChoice, + KernelTemplate, realize_inputs, TritonTemplate, ) @@ -746,32 +747,26 @@ def tuned_mm(mat1, mat2, *, layout=None): ) choices: list[ChoiceCaller] = [] - if use_aten_gemm_kernels(): - choices.extend(V.choices.get_mm_configs(kernel_inputs, [aten_mm], "mm")) static_shape, is_nonzero = _is_static_problem(layout) - if is_nonzero and use_triton_template(layout, check_max_autotune=False): - # Get template choices using the new unified function - choices.extend(V.choices.get_mm_configs(kernel_inputs, [mm_template], "mm")) + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] + if use_aten_gemm_kernels(): + templates_to_use.append(aten_mm) + + if is_nonzero and use_triton_template(layout, check_max_autotune=True): + templates_to_use.append(mm_template) + if use_triton_tma_template(mat1, mat2): - # Get TMA template choices using the new unified function - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, [persistent_tma_mm_template], "mm" - ) - ) + templates_to_use.append(persistent_tma_mm_template) if use_decompose_k_choice(m, n, k): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, [decompose_k_subgraph_template], "mm" - ) - ) - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, [mm_contiguous_subgraph_template], "mm" - ) - ) + templates_to_use.append(decompose_k_subgraph_template) + + templates_to_use.append(mm_contiguous_subgraph_template) + + # Single unified call for all non-autoheuristic templates + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm")) if ( is_nonzero @@ -887,25 +882,25 @@ def tuned_int_mm(mat1, mat2, *, layout=None): # Create MMKernelInputs for Int MM kernel_inputs = MMKernelInputs([mat1, mat2], out_dtype=torch.int32) + + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [aten__int_mm], - name, - ) - ) + templates_to_use.append(aten__int_mm) + + if is_nonzero and use_triton_template( + layout, enable_int32=True, check_max_autotune=False + ): + templates_to_use.append(mm_template) + + # Single unified call for all templates + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) if use_cutlass and _use_cutlass_for_op(name): CUTLASS3xGemmTemplate.add_cutlass_gemm_choices( choices, layout, kernel_inputs.nodes(), fuseable=True, non_fuseable=True ) - if is_nonzero and use_triton_template( - layout, enable_int32=True, check_max_autotune=False - ): - choices.extend(V.choices.get_mm_configs(kernel_inputs, [mm_template], name)) - return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) @@ -953,50 +948,21 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): ) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [aten_bias_addmm], - name, - ) - ) - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [aten_addmm], - name, - ) - ) + templates_to_use.extend([aten_bias_addmm, aten_addmm]) if is_nonzero and use_triton_template(layout, check_max_autotune=False): - # all the triton templates use the extra_kwargs - # Get template choices using the new unified function - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [mm_template], - name, - ) - ) + templates_to_use.append(mm_template) if use_triton_tma_template(mat1, mat2): - # Get TMA template choices using the new unified function - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [persistent_tma_mm_template], - name, - ) - ) + templates_to_use.append(persistent_tma_mm_template) - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [addmm_contiguous_subgraph_template], - "addmm", - ) - ) + templates_to_use.append(addmm_contiguous_subgraph_template) + + # Single unified call for all templates + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) if ( is_nonzero @@ -1151,52 +1117,49 @@ def tuned_scaled_mm( ) choices: list[ChoiceCaller] = [] + + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] + kwarg_overrides = {} + if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [aten__fp8_mm], - name, - kwarg_overrides={ - aten__fp8_mm.uid: dict( - out_dtype=out_dtype, use_fast_accum=use_fast_accum - ) - }, - ) + templates_to_use.append(aten__fp8_mm) + kwarg_overrides[aten__fp8_mm.uid] = dict( + out_dtype=out_dtype, use_fast_accum=use_fast_accum ) - # We dont have triton lowerings for the MX variants yet - if scale_a.dtype != torch.float32: - return autotune_select_algorithm(name, choices, input_nodes, layout) - _, is_nonzero = _is_static_problem(layout) - if is_nonzero and use_triton_template( - layout, enable_float8=True, check_max_autotune=False + if ( + # We dont have triton lowerings for the MX variants yet + scale_a.dtype == torch.float32 + and is_nonzero + and use_triton_template(layout, enable_float8=True, check_max_autotune=False) ): overriders = dict(USE_FAST_ACCUM=use_fast_accum) + # TODO (paulzhan): There is no template that exists for bias and TMA # Don't run tma template currently if bias exists if use_triton_tma_template(mat_a, mat_b) and not bias: - # Get TMA template choices using the new unified function - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [scaled_mm_device_tma_template], - name, - kwarg_overrides={scaled_mm_device_tma_template.uid: overriders}, - ) - ) + templates_to_use.append(scaled_mm_device_tma_template) + kwarg_overrides[scaled_mm_device_tma_template.uid] = overriders - # Get template choices using the new unified function - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [mm_template], - name, - kwarg_overrides={mm_template.uid: overriders}, - ) + templates_to_use.append(mm_template) + kwarg_overrides[mm_template.uid] = overriders + + # Single unified call for all templates + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + templates_to_use, + name, + kwarg_overrides=kwarg_overrides, ) + ) + + # Early return for MX variants + if scale_a.dtype != torch.float32: + return autotune_select_algorithm(name, choices, input_nodes, layout) if ( is_nonzero diff --git a/torch/_inductor/kernel/mm_plus_mm.py b/torch/_inductor/kernel/mm_plus_mm.py index a7497b6d684af..9a58af591f803 100644 --- a/torch/_inductor/kernel/mm_plus_mm.py +++ b/torch/_inductor/kernel/mm_plus_mm.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import logging -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Union import torch @@ -19,6 +19,7 @@ if TYPE_CHECKING: from torch._inductor.ir import ChoiceCaller + from torch._inductor.select_algorithm import KernelTemplate log = logging.getLogger(__name__) @@ -155,16 +156,19 @@ def tuned_mm_plus_mm(mat1, mat2, mat3, mat4, *, layout=None): assert layout1 == layout2 # options to tune from choices: list[ChoiceCaller] = [] + + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs(kernel_inputs, [aten_mm_plus_mm], "mm_plus_mm") - ) + templates_to_use.append(aten_mm_plus_mm) if use_triton_template(layout1, check_max_autotune=False): - # Get template choices using the new unified function - choices.extend( - V.choices.get_mm_configs(kernel_inputs, [mm_plus_mm_template], "mm_plus_mm") - ) + templates_to_use.append(mm_plus_mm_template) + + # Single unified call for all templates + choices.extend( + V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm_plus_mm") + ) return autotune_select_algorithm( "mm_plus_mm", choices, kernel_inputs.nodes(), layout1 From 30191fcf03ddd6a09381a490096c4bb721874316 Mon Sep 17 00:00:00 2001 From: Ruben Rodriguez Buchillon Date: Tue, 9 Sep 2025 23:00:12 -0700 Subject: [PATCH 053/693] [inductor][choices] rename get_mm_configs to get_template_configs (#162293) # why - eventually we want all templates to go through this - we're exposing this through diode as a sort of interface/API - avoid later renaming # what - rename get_mm_configs to get_template_configs - rename _finalize_mm_configs to _finalize_template_configs # testing - lintrunner - ci Differential Revision: [D81820641](https://our.internmc.facebook.com/intern/diff/D81820641) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162293 Approved by: https://github.com/eellison ghstack dependencies: #161351, #161350 --- torch/_inductor/choices.py | 10 +++++----- torch/_inductor/kernel/bmm.py | 6 ++++-- torch/_inductor/kernel/mm.py | 18 ++++++++++++------ torch/_inductor/kernel/mm_plus_mm.py | 2 +- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/torch/_inductor/choices.py b/torch/_inductor/choices.py index fa70c6a53358e..bb7fe6d46d9d1 100644 --- a/torch/_inductor/choices.py +++ b/torch/_inductor/choices.py @@ -106,7 +106,7 @@ def get_flex_decode_configs( flex_heuristics = self.get_config_heuristics(device_type) return flex_heuristics.get_flex_decode_configs(head_dim, dtype) - def _finalize_mm_configs( + def _finalize_template_configs( self, template_choices: dict[str, Generator[KernelTemplateChoice, None, None]], kernel_inputs: KernelInputs, @@ -148,12 +148,12 @@ def get_ktc( """ Utility to get the KernelTemplateChoice generator for a specific input. - This is a per template/op call, whereas get_mm_configs is an op wide call (all templates). + This is a per template/op call, whereas get_template_configs is an op wide call (all templates). Consider when overriding/using at which level you need to make decisions """ # Extract device_type from kernel_inputs device_type = kernel_inputs.device_type - assert device_type is not None, "get_mm_configs requires a valid device type" + assert device_type is not None, "get_ktc requires a valid device type" # Extract template_name from the template object template_name = template.uid @@ -221,7 +221,7 @@ def _need_to_fix_layout( not isinstance(ktc.template, ExternKernelChoice) for ktc in adjusted_choices ) - def get_mm_configs( + def get_template_configs( self, kernel_inputs: KernelInputs, templates: list[Union[KernelTemplate, ExternKernelChoice]], @@ -258,7 +258,7 @@ def get_mm_configs( ) # Second pass: Adjust the template choices - adjusted_choices = self._finalize_mm_configs( + adjusted_choices = self._finalize_template_configs( template_choices, kernel_inputs, templates, diff --git a/torch/_inductor/kernel/bmm.py b/torch/_inductor/kernel/bmm.py index ea81799690f79..42cd742fa2928 100644 --- a/torch/_inductor/kernel/bmm.py +++ b/torch/_inductor/kernel/bmm.py @@ -214,7 +214,7 @@ def may_require_contiguous(t, meta_t): # Single unified call for all templates choices.extend( - V.choices.get_mm_configs( + V.choices.get_template_configs( kernel_inputs, templates_to_use, name, @@ -290,6 +290,8 @@ def tuned_baddbmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): templates_to_use.append(bmm_template) # Single unified call for all templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) + choices.extend( + V.choices.get_template_configs(kernel_inputs, templates_to_use, name) + ) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index c6219fa25cfc3..3f54854827c6d 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -766,7 +766,9 @@ def tuned_mm(mat1, mat2, *, layout=None): templates_to_use.append(mm_contiguous_subgraph_template) # Single unified call for all non-autoheuristic templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm")) + choices.extend( + V.choices.get_template_configs(kernel_inputs, templates_to_use, "mm") + ) if ( is_nonzero @@ -801,7 +803,7 @@ def tuned_mm(mat1, mat2, *, layout=None): always_included.append("extern_mm") num_choices_before_extra_configs = len(choices) choices.extend( - V.choices.get_mm_configs( + V.choices.get_template_configs( # TODO(coconutruben): remove once we deprecate ah # mm-extra is a hack to keep the ah functionality alive # while we transition to the unified kwargs retrieval @@ -894,7 +896,9 @@ def tuned_int_mm(mat1, mat2, *, layout=None): templates_to_use.append(mm_template) # Single unified call for all templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) + choices.extend( + V.choices.get_template_configs(kernel_inputs, templates_to_use, name) + ) if use_cutlass and _use_cutlass_for_op(name): CUTLASS3xGemmTemplate.add_cutlass_gemm_choices( @@ -940,7 +944,7 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): [inp, mat1, mat2], scalars=dict(alpha=alpha, beta=beta) ) choices.extend( - V.choices.get_mm_configs( + V.choices.get_template_configs( kernel_inputs, [aten_addmm], name, @@ -962,7 +966,9 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): templates_to_use.append(addmm_contiguous_subgraph_template) # Single unified call for all templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) + choices.extend( + V.choices.get_template_configs(kernel_inputs, templates_to_use, name) + ) if ( is_nonzero @@ -1149,7 +1155,7 @@ def tuned_scaled_mm( # Single unified call for all templates choices.extend( - V.choices.get_mm_configs( + V.choices.get_template_configs( kernel_inputs, templates_to_use, name, diff --git a/torch/_inductor/kernel/mm_plus_mm.py b/torch/_inductor/kernel/mm_plus_mm.py index 9a58af591f803..cf169e81067da 100644 --- a/torch/_inductor/kernel/mm_plus_mm.py +++ b/torch/_inductor/kernel/mm_plus_mm.py @@ -167,7 +167,7 @@ def tuned_mm_plus_mm(mat1, mat2, mat3, mat4, *, layout=None): # Single unified call for all templates choices.extend( - V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm_plus_mm") + V.choices.get_template_configs(kernel_inputs, templates_to_use, "mm_plus_mm") ) return autotune_select_algorithm( From a67e798cb79a9633d89234f3316e4f165b9c8d5a Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Tue, 9 Sep 2025 22:47:35 -0700 Subject: [PATCH 054/693] [dynamo][guards] Prevent framelocals to dict conversion for not required LAMBDA_GUARD (#162509) This is a smaller PR to reduce framelocals to dict conversion. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162509 Approved by: https://github.com/williamwen42 --- torch/_C/_dynamo/guards.pyi | 6 ++ torch/_dynamo/guards.py | 18 +++--- torch/csrc/dynamo/guards.cpp | 120 +++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 9 deletions(-) diff --git a/torch/_C/_dynamo/guards.pyi b/torch/_C/_dynamo/guards.pyi index aa6614504fc23..3cea2c0b83342 100644 --- a/torch/_C/_dynamo/guards.pyi +++ b/torch/_C/_dynamo/guards.pyi @@ -224,6 +224,12 @@ class GuardManager: def add_lambda_guard( self, user_lambda: Callable[..., Any], verbose_code_parts: list[str] ) -> None: ... + def add_lambda_guard_no_args( + self, user_lambda: Callable[..., Any], verbose_code_parts: list[str] + ) -> None: ... + def add_lambda_guard_no_framelocals( + self, user_lambda: Callable[..., Any], verbose_code_parts: list[str] + ) -> None: ... def add_id_match_guard( self, id_val: int, verbose_code_parts: list[str] ) -> None: ... diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index df683eb6e1ebb..509aa206bb889 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -2037,10 +2037,10 @@ def DUAL_LEVEL(self, guard: Guard) -> None: # TODO(anijain2305) - Consider this moving this guard to C++ forward_ad = torch.autograd.forward_ad - def fn(x: Any) -> bool: + def fn() -> bool: return forward_ad._current_level == dual_level - self.guard_manager.root.add_lambda_guard( + self.guard_manager.root.add_lambda_guard_no_args( fn, get_verbose_code_parts(code, guard) ) @@ -2056,10 +2056,10 @@ def FUNCTORCH_STACK_MATCH(self, guard: Guard) -> None: # TODO(anijain2305) - Consider this moving this guard to C++ compare_fn = torch._functorch.pyfunctorch.compare_functorch_state - def fn(x: Any) -> bool: + def fn() -> bool: return compare_fn(states) - self.guard_manager.root.add_lambda_guard( + self.guard_manager.root.add_lambda_guard_no_args( fn, get_verbose_code_parts(code, guard) ) @@ -2085,10 +2085,10 @@ def hooks_ids_fn( ] self._set_guard_export_info(guard, code) - def fn(x: Any) -> bool: + def fn() -> bool: return guard_hooks_ids == hooks_ids_fn(get_hooks()) - self.guard_manager.root.add_lambda_guard( + self.guard_manager.root.add_lambda_guard_no_args( fn, get_verbose_code_parts(code, guard) ) @@ -2109,7 +2109,7 @@ def metadata_checker(x: Any) -> bool: return x.__tensor_flatten__()[1] == original_metadata global_name = f"___check_metadata_{id(metadata_checker)}_c{CompileContext.current_compile_id()}" - self.get_guard_manager(guard).add_lambda_guard( + self.get_guard_manager(guard).add_lambda_guard_no_framelocals( metadata_checker, get_verbose_code_parts(global_name, guard) ) @@ -2184,7 +2184,7 @@ def EQUALS_MATCH(self, guard: Guard, recompile_hint: Optional[str] = None) -> No code.append(f"__math_isnan({ref})") self._set_guard_export_info(guard, code) - self.get_guard_manager(guard).add_lambda_guard( + self.get_guard_manager(guard).add_lambda_guard_no_framelocals( _get_closure_vars()["__math_isnan"], # type: ignore[arg-type] get_verbose_code_parts(code, guard), ) @@ -2197,7 +2197,7 @@ def EQUALS_MATCH(self, guard: Guard, recompile_hint: Optional[str] = None) -> No code.append(f"__numpy_isnan({ref})") self._set_guard_export_info(guard, code) - self.get_guard_manager(guard).add_lambda_guard( + self.get_guard_manager(guard).add_lambda_guard_no_framelocals( _get_closure_vars()["__numpy_isnan"], # type: ignore[arg-type] get_verbose_code_parts(code, guard), ) diff --git a/torch/csrc/dynamo/guards.cpp b/torch/csrc/dynamo/guards.cpp index c8e0ae9c27360..5510ef635c0a7 100644 --- a/torch/csrc/dynamo/guards.cpp +++ b/torch/csrc/dynamo/guards.cpp @@ -1701,6 +1701,93 @@ class LAMBDA_GUARD : public LeafGuard { py::function _guard_check_fn; }; +/* +Similar to LAMBDA_GUARD but where lambda does not take any arguments. This +ensures that we don't need to construct a dictionary from framelocals even if +the guard is at the root. These guards are for root guards like GlobalState. +*/ +class LAMBDA_GUARD_NO_ARGS : public LeafGuard { + public: + LAMBDA_GUARD_NO_ARGS( + RootGuardManager* root_guard_manager, + py::object guard_check_fn, + py::object verbose_code_parts) + : LeafGuard(root_guard_manager, std::move(verbose_code_parts)) { + if (py::isinstance(guard_check_fn)) { + _guard_check_fn = py::cast(std::move(guard_check_fn)); + } else { + throw py::type_error("LAMBDA_GUARD_NO_ARGS expects (callable, str)"); + } + } + + bool _check() { + PyObject* x = PyObject_CallNoArgs(_guard_check_fn.ptr()); // new ref + if (x == nullptr) { + // An exception is caught in the lambda function. + PyErr_Clear(); + return false; + } + bool result = PyObject_IsTrue(x); + Py_DECREF(x); + return result; + } + + bool check_nopybind(PyObject* value) override { // borrowed ref + return _check(); + } + + GuardDebugInfo check_verbose_nopybind(PyObject* value) override { + PyObject* x = PyObject_CallNoArgs(_guard_check_fn.ptr()); // new ref + if (x == nullptr) { + // An exception is caught in the lambda function. + std::string exc_message = get_exception_message(); + PyErr_Clear(); + return GuardDebugInfo(false, exc_message, 0); + } + bool result = PyObject_IsTrue(x); + Py_DECREF(x); + if (result) { + return GuardDebugInfo(true, 0); + } + return GuardDebugInfo(false, verbose_code_parts(), 0); + } + + // Ensure that framelocals dict is not constructed. + bool check_nopybind(FrameLocalsMapping* map) override { + return _check(); + } + + private: + // The user provided lambda function for check_fn. + py::function _guard_check_fn; +}; + +/* +Similar to LAMBDA_GUARD but disallows running on a FrameLocalsMapping input. +These guards are at trunk or leaf, and not at the root. +*/ +class LAMBDA_GUARD_NO_FRAMELOCALS : public LAMBDA_GUARD { + public: + LAMBDA_GUARD_NO_FRAMELOCALS( + RootGuardManager* root_guard_manager, + py::object guard_check_fn, + py::object verbose_code_parts) + : LAMBDA_GUARD(root_guard_manager, guard_check_fn, verbose_code_parts) {} + + bool check_nopybind(PyObject* value) override { // borrowed ref + return LAMBDA_GUARD::check_nopybind(value); + } + + GuardDebugInfo check_verbose_nopybind(PyObject* value) override { + return LAMBDA_GUARD::check_verbose_nopybind(value); + } + + bool check_nopybind(FrameLocalsMapping* map) override { + throw std::runtime_error( + "FramelocalsMapping input to LAMBDA_GUARD_NO_FRAMELOCALS, use LAMBDA_GUARD instead"); + } +}; + class TYPE_MATCH : public LeafGuard { public: // type_id = id(type(obj)) @@ -6605,6 +6692,19 @@ PyObject* torch_c_dynamo_guards_init() { py_m, "LAMBDA_GUARD") .def(py::init()) .def("__call__", &LAMBDA_GUARD::check); + py::class_< + LAMBDA_GUARD_NO_ARGS, + LeafGuard, + std::shared_ptr>(py_m, "LAMBDA_GUARD_NO_ARGS") + .def(py::init()) + .def("__call__", &LAMBDA_GUARD_NO_ARGS::check); + py::class_< + LAMBDA_GUARD_NO_FRAMELOCALS, + LeafGuard, + std::shared_ptr>( + py_m, "LAMBDA_GUARD_NO_FRAMELOCALS") + .def(py::init()) + .def("__call__", &LAMBDA_GUARD_NO_FRAMELOCALS::check); py::class_>( py_m, "TYPE_MATCH") .def(py::init()) @@ -6907,6 +7007,26 @@ PyObject* torch_c_dynamo_guards_init() { std::move(lambda), std::move(verbose_code_parts))); }) + .def( + "add_lambda_guard_no_args", + [](GuardManager& self, + py::object lambda, + py::object verbose_code_parts) -> void { + self.add_leaf_guard(std::make_shared( + self.get_root(), + std::move(lambda), + std::move(verbose_code_parts))); + }) + .def( + "add_lambda_guard_no_framelocals", + [](GuardManager& self, + py::object lambda, + py::object verbose_code_parts) -> void { + self.add_leaf_guard(std::make_shared( + self.get_root(), + std::move(lambda), + std::move(verbose_code_parts))); + }) .def( "add_type_match_guard", [](GuardManager& self, From 5f630d28d7ff9fdd8bd6cdbe2438e5c821007845 Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Tue, 9 Sep 2025 22:48:56 -0700 Subject: [PATCH 055/693] [dynamo][guards] Do not construct entire framelocals dict for LAMBDA_GUARD (#162525) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162525 Approved by: https://github.com/williamwen42 ghstack dependencies: #162509 --- test/dynamo/test_guard_manager.py | 24 ++++++++++++- test/dynamo/test_misc.py | 23 +++++++++---- torch/_C/_dynamo/guards.pyi | 8 ++++- torch/_dynamo/config.py | 6 ++++ torch/_dynamo/guards.py | 28 ++++++++++++++-- torch/_dynamo/output_graph.py | 2 +- torch/csrc/dynamo/guards.cpp | 56 +++++++++++++++++++++++++++---- 7 files changed, 128 insertions(+), 19 deletions(-) diff --git a/test/dynamo/test_guard_manager.py b/test/dynamo/test_guard_manager.py index c4ad29f69b438..32f666698bd04 100644 --- a/test/dynamo/test_guard_manager.py +++ b/test/dynamo/test_guard_manager.py @@ -116,6 +116,8 @@ def test_python_lambda_leaf_guard(self): const_guard = guards.LAMBDA_GUARD( root, functools.partial(equals_match, expected=5), + {}, + False, equals_match_verbose_code_parts(5), ) self.assertTrue(const_guard(5)) @@ -405,10 +407,14 @@ def test_guard_manager_leaf_guard(self): guard_manager.add_type_match_guard(id_type(5), ["type(x) == int"]) guard_manager.add_lambda_guard( functools.partial(ge_match, expected=5), + {}, + False, ge_match_verbose_code_parts(expected=5), ) guard_manager.add_lambda_guard( functools.partial(less_match, expected=10), + {}, + False, less_match_verbose_code_parts(expected=10), ) self.assertEqual(len(guard_manager.get_leaf_guards()), 3) @@ -428,10 +434,14 @@ def __init__(self, x, y): guard_manager.add_type_match_guard(id_type(foo), ["type(x) == Foo"]) guard_manager.getattr_manager("x", "x", 1, default_mgr_enum).add_lambda_guard( functools.partial(equals_match, expected=foo.x), + {}, + False, equals_match_verbose_code_parts(foo.x), ) guard_manager.getattr_manager("y", "y", 2, default_mgr_enum).add_lambda_guard( functools.partial(equals_match, expected=foo.y), + {}, + False, equals_match_verbose_code_parts(foo.y), ) self.assertEqual(len(guard_manager.get_leaf_guards()), 1) @@ -474,10 +484,14 @@ def test_item_guard_manager(self): guard_manager.add_type_match_guard(id_type(foo), ["type(x) == Foo"]) guard_manager.getitem_manager(0, "", 1, default_mgr_enum).add_lambda_guard( functools.partial(equals_match, expected=foo[0]), + {}, + False, equals_match_verbose_code_parts(foo[0]), ) guard_manager.getitem_manager(1, "", 2, default_mgr_enum).add_lambda_guard( functools.partial(equals_match, expected=foo[1]), + {}, + False, equals_match_verbose_code_parts(foo[1]), ) self.assertEqual(len(guard_manager.get_leaf_guards()), 1) @@ -585,6 +599,8 @@ def test_globals(self): lambda x: isinstance(x, Pair) and isinstance(x.x, torch.Tensor) and isinstance(x.y, int), + {}, + False, "global guard fail", ) @@ -635,6 +651,8 @@ def mul(self, x): ) attr_manager.add_lambda_guard( lambda x: x == 4, + {}, + False, "Expected value 4", ) @@ -675,6 +693,8 @@ def test_global_weakref(self): weakref_manager.add_lambda_guard( lambda x: isinstance(x, torch.Tensor), + {}, + False, "global weakref fail", ) @@ -694,6 +714,8 @@ def test_lambda_manager(self): ) foo_mgr.add_lambda_guard( lambda x: x == 3, + {}, + False, "Expected value 3", ) self.assertTrue(guard_manager.check(a)) @@ -779,7 +801,7 @@ def nothing(): # Add key-value manager (nothing : {"z" : 3}) self.assertTrue(root.check(f_locals)) dict_mgr.get_key_manager(1, "", nothing, default_mgr_enum).add_lambda_guard( - lambda x: x is nothing, ["x is nothing"] + lambda x: x is nothing, {}, False, ["x is nothing"] ) self.assertTrue(root.check(f_locals)) value_mgr = dict_mgr.get_value_manager( diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py index c9a2a0730b08a..0a85aac2bf318 100644 --- a/test/dynamo/test_misc.py +++ b/test/dynamo/test_misc.py @@ -7207,7 +7207,9 @@ def fn(x): return x + 1 guard_manager = torch._dynamo.guards.RootGuardManager() - guard_manager.add_lambda_guard(lambda L: isinstance(L["x"], int), []) + guard_manager.add_lambda_guard( + lambda L: isinstance(L["x"], int), {"x": 0}, True, [] + ) def injected(x): return x + 42 @@ -7232,27 +7234,33 @@ def fn(x): return x + 1 guard_manager_bool = torch._dynamo.guards.RootGuardManager() - guard_manager_bool.add_lambda_guard(lambda L: isinstance(L["x"], bool), []) + guard_manager_bool.add_lambda_guard( + lambda L: isinstance(L["x"], bool), {"x": 0}, True, [] + ) def injected_bool(x: bool): return x + 102 guard_manager_int = torch._dynamo.guards.RootGuardManager() - guard_manager_int.add_lambda_guard(lambda L: isinstance(L["x"], int), []) + guard_manager_int.add_lambda_guard( + lambda L: isinstance(L["x"], int), {"x": 0}, True, [] + ) def injected_int(x: int): return x + 42 guard_manager_tensor = torch._dynamo.guards.RootGuardManager() guard_manager_tensor.add_lambda_guard( - lambda L: isinstance(L["x"], torch.Tensor), [] + lambda L: isinstance(L["x"], torch.Tensor), {"x": 0}, True, [] ) def injected_tensor(x: torch.Tensor): return x + 100 guard_manager_str = torch._dynamo.guards.RootGuardManager() - guard_manager_str.add_lambda_guard(lambda L: isinstance(L["x"], str), []) + guard_manager_str.add_lambda_guard( + lambda L: isinstance(L["x"], str), {"x": 0}, True, [] + ) def injected_str(x: str): return x + "1" @@ -7329,7 +7337,10 @@ def fn(x): guard_manager_bool = torch._dynamo.guards.RootGuardManager() guard_manager_bool.add_lambda_guard( - lambda L: isinstance(L["x"], bool), ["isinstance(L['x'], bool)"] + lambda L: isinstance(L["x"], bool), + {"x": 0}, + True, + ["isinstance(L['x'], bool)"], ) def injected_bool(x: bool): diff --git a/torch/_C/_dynamo/guards.pyi b/torch/_C/_dynamo/guards.pyi index 3cea2c0b83342..537a28123d0d3 100644 --- a/torch/_C/_dynamo/guards.pyi +++ b/torch/_C/_dynamo/guards.pyi @@ -222,7 +222,11 @@ class GuardManager: ) -> GuardManager: ... # Leaf guards def add_lambda_guard( - self, user_lambda: Callable[..., Any], verbose_code_parts: list[str] + self, + user_lambda: Callable[..., Any], + required_locals: dict[str, int], + construct_partial_framelocals_dict: bool, + verbose_code_parts: list[str], ) -> None: ... def add_lambda_guard_no_args( self, user_lambda: Callable[..., Any], verbose_code_parts: list[str] @@ -342,6 +346,8 @@ class RootGuardManager(GuardManager): def add_epilogue_lambda_guard( self, guard: LeafGuard, + required_locals: dict[str, int], + construct_partial_framelocals_dict: bool, verbose_code_parts: list[str], ) -> None: ... def clone_manager( diff --git a/torch/_dynamo/config.py b/torch/_dynamo/config.py index b8d1008dec8e1..8649f17cd17a2 100644 --- a/torch/_dynamo/config.py +++ b/torch/_dynamo/config.py @@ -381,6 +381,12 @@ # useful for regional compilation. max_saved_pointers_for_recursive_dict_tags_check = 256 +# Controls whether to construct the partial framelocals to dict for lambda +# guards. This is a temporary flag to allow quick fallback behavior in case of +# unexpected issues. Default is True, i.e., we will construct only partial +# dict, a faster version for guards. Set to False to fallback to old behavior. +construct_partial_framelocals_dict = True + # If True, raises exception if TorchDynamo is called with a context manager raise_on_ctx_manager_usage = True diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index 509aa206bb889..05e1311cf7ebd 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -234,7 +234,7 @@ ) -def get_framelocals_idx(code: types.CodeType, var_name: str) -> int: +def get_framelocals_idx(code: types.CodeType, var_name: str) -> Optional[int]: # Refer to index in the frame's localsplus directly. # NOTE: name order for a code object doesn't change. # NOTE: we need to find the LAST matching index because <= 3.10 contains @@ -242,6 +242,8 @@ def get_framelocals_idx(code: types.CodeType, var_name: str) -> int: # and will take up 2 slots of the frame's localsplus. The correct behavior # is to refer to the cell, which has a higher index. framelocals_names_reversed = code_framelocals_names_reversed_cached(code) + if var_name not in framelocals_names_reversed: + return None framelocals_idx = ( len(framelocals_names_reversed) - framelocals_names_reversed.index(var_name) - 1 ) @@ -1357,6 +1359,7 @@ def get_guard_manager_from_source(self, source: Source) -> GuardManager: # Use istype instead of isinstance to check for exact type of source. if istype(source, LocalSource): framelocals_idx = get_framelocals_idx(self.f_code, source.local_name) + assert framelocals_idx is not None out = root_guard_manager.framelocals_manager( key=(source.local_name, framelocals_idx), source=source_name, @@ -1744,15 +1747,34 @@ def add_python_lambda_leaf_guard_to_root( guards_log.debug("Python shape guard function:\n%s", pycode) exec(pycode, globals_for_guard_fn, out) guard_fn = out["___make_guard_fn"](*closure_vars.values()) + + required_locals = {} + all_locals = self.scope["L"].keys() + for var_name in guard_fn.__code__.co_consts: + if isinstance(var_name, str) and var_name in all_locals: + index = get_framelocals_idx(self.f_code, var_name) + if index is not None: + required_locals[var_name] = index + + construct_partial_framelocals_dict = config.construct_partial_framelocals_dict + if is_epilogue: # Epilogue guards are run after all the other guards have finished. # If epilogue guards contain a getattr or getitem access, one of the # other guards would fail preventing the epilogue guards to run. self.guard_manager.root.add_epilogue_lambda_guard( - guard_fn, verbose_code_parts + guard_fn, + required_locals, + construct_partial_framelocals_dict, + verbose_code_parts, ) else: - self.guard_manager.root.add_lambda_guard(guard_fn, verbose_code_parts) + self.guard_manager.root.add_lambda_guard( + guard_fn, + required_locals, + construct_partial_framelocals_dict, + verbose_code_parts, + ) # Warning: use this with care! This lets you access what the current # value of the value you are guarding on is. You probably don't want diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index 4cdf353da99ed..ee3c34618413e 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -2053,7 +2053,7 @@ def compile_and_call_fx_graph( check_fn_source = inspect.getsource(specialization.check_fn).strip() # Required because the LABDA_GUARD API requires a root guard manager unused_root_guard_manager = RootGuardManager() - check_fn = guards.LAMBDA_GUARD( # type: ignore[attr-defined] + check_fn = guards.LAMBDA_GUARD_NO_FRAMELOCALS( # type: ignore[attr-defined] unused_root_guard_manager, specialization.check_fn, [check_fn_source], diff --git a/torch/csrc/dynamo/guards.cpp b/torch/csrc/dynamo/guards.cpp index 5510ef635c0a7..44e95d6910028 100644 --- a/torch/csrc/dynamo/guards.cpp +++ b/torch/csrc/dynamo/guards.cpp @@ -1625,9 +1625,7 @@ class LeafGuard { // is not exposed to Python and can only be called from C++. virtual bool check_nopybind(PyObject* value) = 0; virtual bool check_nopybind(FrameLocalsMapping* map) { - // throw std::runtime_error("fallback to python"); - // Could fallback to running check on the Python dict (lazily constructed) - return check_nopybind((PyObject*)map->to_dict()); + throw std::runtime_error("fallback to python"); } virtual ~LeafGuard() = default; @@ -1658,8 +1656,13 @@ class LAMBDA_GUARD : public LeafGuard { LAMBDA_GUARD( RootGuardManager* root_guard_manager, py::object guard_check_fn, + py::object required_locals, + bool construct_partial_framelocals_dict, py::object verbose_code_parts) - : LeafGuard(root_guard_manager, std::move(verbose_code_parts)) { + : LeafGuard(root_guard_manager, std::move(verbose_code_parts)), + _required_locals(py::cast(required_locals)), + _construct_partial_framelocals_dict( + construct_partial_framelocals_dict) { if (py::isinstance(guard_check_fn)) { _guard_check_fn = py::cast(std::move(guard_check_fn)); } else { @@ -1696,7 +1699,30 @@ class LAMBDA_GUARD : public LeafGuard { return GuardDebugInfo(false, verbose_code_parts(), 0); } + bool check_nopybind(FrameLocalsMapping* map) override { + // TODO (anijain2305) - Get rid of the _construct_partial_framelocals_dict + // once its stable. + if (_construct_partial_framelocals_dict) { + py::dict partial_dict; + + for (auto item : _required_locals) { + partial_dict[item.first] = map->get(item.second.cast()); + } + + return check_nopybind(partial_dict.ptr()); + } + return check_nopybind((PyObject*)map->to_dict()); + } + private: + // Dict of (local_name, framelocal_idx) representing the minimum number of + // framelocals needed to construct the dictionary for the lambda guard. + py::dict _required_locals; + + // Temporary flag to allow a fallback behavior. With stability, we can remove + // this member. + bool _construct_partial_framelocals_dict; + // The user provided lambda function for check_fn. py::function _guard_check_fn; }; @@ -1772,7 +1798,12 @@ class LAMBDA_GUARD_NO_FRAMELOCALS : public LAMBDA_GUARD { RootGuardManager* root_guard_manager, py::object guard_check_fn, py::object verbose_code_parts) - : LAMBDA_GUARD(root_guard_manager, guard_check_fn, verbose_code_parts) {} + : LAMBDA_GUARD( + root_guard_manager, + guard_check_fn, + py::dict(), + false, + verbose_code_parts) {} bool check_nopybind(PyObject* value) override { // borrowed ref return LAMBDA_GUARD::check_nopybind(value); @@ -6690,7 +6721,8 @@ PyObject* torch_c_dynamo_guards_init() { .def("verbose_code_parts", &LeafGuard::verbose_code_parts); py::class_>( py_m, "LAMBDA_GUARD") - .def(py::init()) + .def( + py::init()) .def("__call__", &LAMBDA_GUARD::check); py::class_< LAMBDA_GUARD_NO_ARGS, @@ -7001,10 +7033,14 @@ PyObject* torch_c_dynamo_guards_init() { "add_lambda_guard", [](GuardManager& self, py::object lambda, + py::object required_locals, + bool construct_partial_framelocals_dict, py::object verbose_code_parts) -> void { self.add_leaf_guard(std::make_shared( self.get_root(), std::move(lambda), + std::move(required_locals), + construct_partial_framelocals_dict, std::move(verbose_code_parts))); }) .def( @@ -7667,9 +7703,15 @@ PyObject* torch_c_dynamo_guards_init() { "add_epilogue_lambda_guard", [](RootGuardManager& self, py::object lambda, + py::object required_locals, + bool construct_partial_framelocals_dict, py::object verbose_code_parts) -> void { self.add_epilogue_lambda_guard(std::make_unique( - &self, std::move(lambda), std::move(verbose_code_parts))); + &self, + std::move(lambda), + std::move(required_locals), + construct_partial_framelocals_dict, + std::move(verbose_code_parts))); }); // Dict Guard Manager From ab0694f1c6974ef82a05d9d2f964bffc8b3d47e8 Mon Sep 17 00:00:00 2001 From: Max Podkorytov <4273004+tenpercent@users.noreply.github.com> Date: Wed, 10 Sep 2025 19:33:40 +0000 Subject: [PATCH 056/693] [ROCm][Inductor][CK backend] Install rocm-composable-kernel python package on ROCm Linux CI docker images (#162288) Reopened from #158747 which got reverted since without setuptools-scm in pytorch index URL the wheel cannot be built We reconsider the original PR idea of introducing CK as a pytorch dependency on ROCm Linux and install the CK python package in CI only -- since (1) rocm-composable-kernel depends on setuptools-scm which depends on tomli and the existing index URLs need to be modified to host the new packages and (2) there also is a packaging [bug](https://github.com/pypa/setuptools/issues/3269#issuecomment-1254507377) in Ubuntu 22.04 which prevents correct dynamic version calculation with default system pip. Extras: -> this PR reconsiders how TORCHINDUCTOR_CK_DIR env variable is used; previously, this var was used to point to rocm-composable-kernel package installation path on the filesystem; now, the path is inferred by trying to import ck4inductor -> the tests are updated to reflect this change -> since in CI clang points to a bash script which invokes sccache, we cannot patch PATH to not contain sccache, this logic is removed from the testing code -> scaled_mm test crashes during the benchmarking when the benchmarking happens in the main process, and times out benchmarking when it happens in a subprocess, on gfx942, so it is disabled TBD: roll back rocm-mi300 workflow before merging Pull Request resolved: https://github.com/pytorch/pytorch/pull/162288 Approved by: https://github.com/jeffdaily --- .ci/docker/centos-rocm/Dockerfile | 6 +++- .../ci_commit_pins/rocm-composable-kernel.txt | 1 + .ci/docker/common/install_rocm.sh | 9 ++++++ .ci/docker/ubuntu-rocm/Dockerfile | 6 +++- .github/workflows/rocm-mi300.yml | 1 + test/inductor/test_ck_backend.py | 30 ++++++------------- .../_inductor/codegen/rocm/compile_command.py | 11 +++++-- torch/_inductor/utils.py | 11 +------ 8 files changed, 39 insertions(+), 36 deletions(-) create mode 100644 .ci/docker/ci_commit_pins/rocm-composable-kernel.txt diff --git a/.ci/docker/centos-rocm/Dockerfile b/.ci/docker/centos-rocm/Dockerfile index 8d1e7f5972b1d..319765590fc02 100644 --- a/.ci/docker/centos-rocm/Dockerfile +++ b/.ci/docker/centos-rocm/Dockerfile @@ -56,9 +56,13 @@ ENV INSTALLED_VISION ${VISION} # Install rocm ARG ROCM_VERSION +RUN mkdir ci_commit_pins +COPY ./common/common_utils.sh common_utils.sh +COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt COPY ./common/install_rocm.sh install_rocm.sh RUN bash ./install_rocm.sh -RUN rm install_rocm.sh +RUN rm install_rocm.sh common_utils.sh +RUN rm -r ci_commit_pins COPY ./common/install_rocm_magma.sh install_rocm_magma.sh RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} RUN rm install_rocm_magma.sh diff --git a/.ci/docker/ci_commit_pins/rocm-composable-kernel.txt b/.ci/docker/ci_commit_pins/rocm-composable-kernel.txt new file mode 100644 index 0000000000000..c45f46af95d03 --- /dev/null +++ b/.ci/docker/ci_commit_pins/rocm-composable-kernel.txt @@ -0,0 +1 @@ +7fe50dc3da2069d6645d9deb8c017a876472a977 diff --git a/.ci/docker/common/install_rocm.sh b/.ci/docker/common/install_rocm.sh index 5d355276def7c..a156670cb815b 100644 --- a/.ci/docker/common/install_rocm.sh +++ b/.ci/docker/common/install_rocm.sh @@ -2,6 +2,11 @@ set -ex +# for pip_install function +source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" + +ROCM_COMPOSABLE_KERNEL_VERSION="$(cat $(dirname $0)/../ci_commit_pins/rocm-composable-kernel.txt)" + ver() { printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' '); } @@ -113,6 +118,8 @@ EOF rm -rf HIP clr fi + pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION" + # Cleanup apt-get autoclean && apt-get clean rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* @@ -176,6 +183,8 @@ install_centos() { sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;" done + pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION" + # Cleanup yum clean all rm -rf /var/cache/yum diff --git a/.ci/docker/ubuntu-rocm/Dockerfile b/.ci/docker/ubuntu-rocm/Dockerfile index 681f6fe750510..b517a990a057b 100644 --- a/.ci/docker/ubuntu-rocm/Dockerfile +++ b/.ci/docker/ubuntu-rocm/Dockerfile @@ -52,9 +52,13 @@ ENV INSTALLED_VISION ${VISION} # Install rocm ARG ROCM_VERSION +RUN mkdir ci_commit_pins +COPY ./common/common_utils.sh common_utils.sh +COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt COPY ./common/install_rocm.sh install_rocm.sh RUN bash ./install_rocm.sh -RUN rm install_rocm.sh +RUN rm install_rocm.sh common_utils.sh +RUN rm -r ci_commit_pins COPY ./common/install_rocm_magma.sh install_rocm_magma.sh RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} RUN rm install_rocm_magma.sh diff --git a/.github/workflows/rocm-mi300.yml b/.github/workflows/rocm-mi300.yml index 7e3ba43bf9845..8ffd58cb9811c 100644 --- a/.github/workflows/rocm-mi300.yml +++ b/.github/workflows/rocm-mi300.yml @@ -70,4 +70,5 @@ jobs: build-environment: linux-noble-rocm-py3.12-mi300 docker-image: ${{ needs.linux-noble-rocm-py3_12-build.outputs.docker-image }} test-matrix: ${{ needs.linux-noble-rocm-py3_12-build.outputs.test-matrix }} + tests-to-include: "inductor/test_ck_backend" secrets: inherit diff --git a/test/inductor/test_ck_backend.py b/test/inductor/test_ck_backend.py index f73a47e45a57a..079be79fcc9d8 100644 --- a/test/inductor/test_ck_backend.py +++ b/test/inductor/test_ck_backend.py @@ -1,5 +1,4 @@ # Owner(s): ["module: inductor"] -import functools import logging import os import unittest @@ -13,6 +12,7 @@ import torch from torch._inductor import config from torch._inductor.test_case import run_tests, TestCase +from torch._inductor.utils import try_import_ck_lib from torch.testing._internal.common_cuda import tf32_off from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, @@ -32,20 +32,8 @@ log = logging.getLogger(__name__) -@functools.lru_cache(None) -def _get_path_without_sccache() -> str: - """ - Get the PATH environment variable without sccache. - """ - path_envs = os.environ.get("PATH", "").split(":") - path_envs = [env for env in path_envs if "/opt/cache/bin" not in env] - return ":".join(path_envs) - - -_test_env = { - "PATH": _get_path_without_sccache(), - "DISABLE_SCCACHE": "1", -} +# patch env for tests if needed +_test_env = {} @instantiate_parametrized_tests @@ -61,13 +49,10 @@ def setUp(self): ) torch.random.manual_seed(1234) - try: - import ck4inductor # @manual - self.ck_dir = os.path.dirname(ck4inductor.__file__) - os.environ["TORCHINDUCTOR_CK_DIR"] = self.ck_dir - except ImportError as e: - raise unittest.SkipTest("Composable Kernel library not installed") from e + self.ck_dir, _, _, _ = try_import_ck_lib() + if not self.ck_dir: + raise unittest.SkipTest("Composable Kernel library is not installed") try: os.environ["INDUCTOR_TEST_DISABLE_FRESH_CACHE"] = "1" @@ -288,6 +273,9 @@ def addmm(x, a, b, alpha, beta): torch.testing.assert_close(Y_compiled, Y_eager) + @unittest.skip( + "FIXME(tenpercent): kernel compilation errors on gfx942 as of 09/01/25" + ) @unittest.skipIf(not torch.version.hip, "ROCM only") @unittest.mock.patch.dict(os.environ, _test_env) @parametrize("max_autotune_gemm_backends", ("CK", "ATen,Triton,CK")) diff --git a/torch/_inductor/codegen/rocm/compile_command.py b/torch/_inductor/codegen/rocm/compile_command.py index b9cae55102b61..aa935b14af23c 100644 --- a/torch/_inductor/codegen/rocm/compile_command.py +++ b/torch/_inductor/codegen/rocm/compile_command.py @@ -4,7 +4,7 @@ from typing import Optional from torch._inductor import config -from torch._inductor.utils import is_linux +from torch._inductor.utils import is_linux, try_import_ck_lib log = logging.getLogger(__name__) @@ -18,18 +18,23 @@ def _rocm_include_paths(dst_file_ext: str) -> list[str]: if config.rocm.rocm_home else cpp_extension._join_rocm_home("include") ) - if not config.rocm.ck_dir: - log.warning("Unspecified Composable Kernel include dir") if config.is_fbcode(): from libfb.py import parutil ck_path = parutil.get_dir_path("composable-kernel-headers") else: + if not config.rocm.ck_dir: + ck_dir, _, _, _ = try_import_ck_lib() + if not ck_dir: + log.warning("Unspecified Composable Kernel directory") + config.rocm.ck_dir = ck_dir ck_path = config.rocm.ck_dir or cpp_extension._join_rocm_home( "composable_kernel" ) + log.debug("Using ck path %s", ck_path) + ck_include = os.path.join(ck_path, "include") ck_library_include = os.path.join(ck_path, "library", "include") diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py index abb850ea4cce4..0b09f9a67a96d 100644 --- a/torch/_inductor/utils.py +++ b/torch/_inductor/utils.py @@ -1985,16 +1985,7 @@ def use_ck_template(layout: Layout) -> bool: log.warning("Please pip install Composable Kernel package") return False - if config.is_fbcode(): - config.rocm.ck_dir = ck_package_dirname - - if not config.rocm.ck_dir: - log.warning("Please set TORCHINDUCTOR_CK_DIR env variable") - return False - - if ck_package_dirname != config.rocm.ck_dir: - log.warning("Invalid path to CK library") - return False + config.rocm.ck_dir = ck_package_dirname return True From bf7f481144ca64af6cb5117c6685e67ffad29cfa Mon Sep 17 00:00:00 2001 From: Parshant Sharma Date: Wed, 10 Sep 2025 19:57:07 +0000 Subject: [PATCH 057/693] Update misleading torch.sparse_coo_tensor error check (#161900) Fixes #160622 ### Summary Updated the misleading torch.sparse_coo_tensor error check to provide clear context. earlier: `RuntimeError: number of dimensions must be sparse_dim (3) + dense_dim (0), but got 1` Updated: `RuntimeError: 'len(size) == sparse_dim + dense_dim' is not satisfied: len(size) = 1, sparse_dim = 3, dense_dim = 0` **Impacts:** - Comprehensive error message that will improve developer experience. - module: sparse Pull Request resolved: https://github.com/pytorch/pytorch/pull/161900 Approved by: https://github.com/nikitaved, https://github.com/pearu --- aten/src/ATen/SparseTensorImpl.h | 20 ++++++++++---------- aten/src/ATen/native/sparse/SparseTensor.cpp | 14 +++++++------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/aten/src/ATen/SparseTensorImpl.h b/aten/src/ATen/SparseTensorImpl.h index 39f77664de864..b10795fbc37eb 100644 --- a/aten/src/ATen/SparseTensorImpl.h +++ b/aten/src/ATen/SparseTensorImpl.h @@ -133,12 +133,12 @@ struct TORCH_API SparseTensorImpl : public TensorImpl { "resize_ called on tensor with symbolic shape") TORCH_CHECK( sparse_dim + dense_dim == static_cast(size.size()), - "number of dimensions must be sparse_dim (", + "'len(size) == sparse_dim + dense_dim' is not satisfied: len(size) = ", + size.size(), + ", sparse_dim = ", sparse_dim, - ") + dense_dim (", - dense_dim, - "), but got ", - size.size()); + ", dense_dim = ", + dense_dim); if (nnz() > 0) { [[maybe_unused]] auto constexpr alt_options_msg = "You could try the following options:\n\ @@ -254,12 +254,12 @@ struct TORCH_API SparseTensorImpl : public TensorImpl { "resize_and_clear_ called on tensor with symbolic shape") TORCH_CHECK( sparse_dim + dense_dim == static_cast(size.size()), - "number of dimensions must be sparse_dim (", + "'len(size) == sparse_dim + dense_dim' is not satisfied: len(size) = ", + size.size(), + ", sparse_dim = ", sparse_dim, - ") + dense_dim (", - dense_dim, - "), but got ", - size.size()); + ", dense_dim = ", + dense_dim); set_sizes_and_strides(size, std::vector(size.size())); sparse_dim_ = sparse_dim; diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp index 752365d545dee..e8c5fd013ba86 100644 --- a/aten/src/ATen/native/sparse/SparseTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseTensor.cpp @@ -391,13 +391,13 @@ void _validate_sparse_coo_tensor_args( int64_t sparse_dim = indices.size(0); int64_t dense_dim = values.dim() - 1; TORCH_CHECK( - static_cast(size.size()) == sparse_dim + dense_dim, - "number of dimensions must be sparse_dim (", - sparse_dim, - ") + dense_dim (", - dense_dim, - "), but got ", - size.size()); + sparse_dim + dense_dim == static_cast(size.size()), + "'len(size) == sparse_dim + dense_dim' is not satisfied: len(size) = ", + size.size(), + ", sparse_dim = ", + sparse_dim, + ", dense_dim = ", + dense_dim); if (check_pinning) { TORCH_CHECK( From 80d4da893cf08d7ee6e6cfe0024d1d5202a0c5a6 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 10 Sep 2025 20:11:37 +0000 Subject: [PATCH 058/693] Revert "Put torchao (0.13.0) back to benchmark workflow (#162227)" This reverts commit 00985970e312c3c5e674e8e14d39fe77c226600e. Reverted https://github.com/pytorch/pytorch/pull/162227 on behalf of https://github.com/huydhn due to Crashing some inductor jobs in trunk ([comment](https://github.com/pytorch/pytorch/pull/162227#issuecomment-3276355034)) --- .ci/docker/common/install_inductor_benchmark_deps.sh | 2 +- .ci/pytorch/macos-test.sh | 3 +++ .github/ci_commit_pins/torchao.txt | 2 +- .github/workflows/inductor-perf-test-nightly-h100.yml | 3 +++ .github/workflows/inductor-periodic.yml | 4 ---- .github/workflows/inductor.yml | 2 -- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.ci/docker/common/install_inductor_benchmark_deps.sh b/.ci/docker/common/install_inductor_benchmark_deps.sh index 19159b50ce37d..81467d87f5140 100644 --- a/.ci/docker/common/install_inductor_benchmark_deps.sh +++ b/.ci/docker/common/install_inductor_benchmark_deps.sh @@ -43,4 +43,4 @@ install_huggingface install_timm # Clean up -conda_run pip uninstall -y torch torchvision torchaudio triton +conda_run pip uninstall -y torch torchvision torchaudio triton torchao diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh index 3437802da4eaa..a859901191e03 100755 --- a/.ci/pytorch/macos-test.sh +++ b/.ci/pytorch/macos-test.sh @@ -177,6 +177,9 @@ checkout_install_torchbench() { popd pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt + # https://github.com/pytorch/pytorch/issues/160689 to remove torchao because + # its current version 0.12.0 doesn't work with transformers 4.54.0 + pip uninstall -y torchao echo "Print all dependencies after TorchBench is installed" python -mpip freeze diff --git a/.github/ci_commit_pins/torchao.txt b/.github/ci_commit_pins/torchao.txt index c40e3a82e615f..d12c20e6a117f 100644 --- a/.github/ci_commit_pins/torchao.txt +++ b/.github/ci_commit_pins/torchao.txt @@ -1 +1 @@ -f32431e593d0e9db86c502d3872dd67ee40a005f +51c87b6ead6b7e098ada95d6a7609ee873b854cf diff --git a/.github/workflows/inductor-perf-test-nightly-h100.yml b/.github/workflows/inductor-perf-test-nightly-h100.yml index 7e363df9f8a86..41210f89c9a89 100644 --- a/.github/workflows/inductor-perf-test-nightly-h100.yml +++ b/.github/workflows/inductor-perf-test-nightly-h100.yml @@ -137,6 +137,7 @@ jobs: docker-image: ${{ needs.build.outputs.docker-image }} test-matrix: ${{ needs.build.outputs.test-matrix }} timeout-minutes: 720 + # disable monitor in perf tests, next step is to enable it disable-monitor: false monitor-log-interval: 15 monitor-data-collect-interval: 4 @@ -153,6 +154,7 @@ jobs: docker-image: ${{ needs.build.outputs.docker-image }} test-matrix: ${{ needs.build.outputs.test-matrix }} timeout-minutes: 1440 + # disable monitor in perf tests, next step is to enable it disable-monitor: false monitor-log-interval: 15 monitor-data-collect-interval: 4 @@ -171,6 +173,7 @@ jobs: docker-image: ${{ needs.build.outputs.docker-image }} test-matrix: ${{ needs.build.outputs.test-matrix }} timeout-minutes: 720 + # disable monitor in perf tests for more investigation disable-monitor: false monitor-log-interval: 15 monitor-data-collect-interval: 4 diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml index a5b05d0d358c2..21d965eaeaada 100644 --- a/.github/workflows/inductor-periodic.yml +++ b/.github/workflows/inductor-periodic.yml @@ -36,8 +36,6 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix with: - # More memory is needed to build torchao - runner: linux.2xlarge.memory runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86 docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks @@ -130,8 +128,6 @@ jobs: needs: - get-default-label-prefix with: - # More memory is needed to build torchao - runner: linux.2xlarge.memory runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm80 docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index a1367991e6c6d..4189d24a7b14f 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -49,8 +49,6 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: - # More memory is needed to build torchao - runner: linux.2xlarge.memory build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86 docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks cuda-arch-list: '8.6' From d033d11d26b321471bc7b5186d1e21a3084a0963 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 10 Sep 2025 20:13:44 +0000 Subject: [PATCH 059/693] Revert "[torch][c10d] fix split_group in mixed backend case (#162424)" This reverts commit 2dc26131801a430e030a773c4fbfe874e263259d. Reverted https://github.com/pytorch/pytorch/pull/162424 on behalf of https://github.com/clee2000 due to failure seems related, maybe a hang/timeout distributed/test_distributed_spawn.py::TestDistBackendWithSpawn::test_ddp_model_diff_shape_across_ranks log classifier is pointing at the wrong line ([comment](https://github.com/pytorch/pytorch/pull/162424#issuecomment-3276360494)) --- test/distributed/test_c10d_nccl.py | 56 ------------------- torch/csrc/distributed/c10d/ProcessGroup.hpp | 4 +- .../distributed/c10d/ProcessGroupGloo.cpp | 40 +------------ .../distributed/c10d/ProcessGroupGloo.hpp | 3 - torch/csrc/distributed/c10d/init.cpp | 28 +++++++++- torch/distributed/distributed_c10d.py | 6 +- 6 files changed, 30 insertions(+), 107 deletions(-) diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py index 0d55845228da7..b234c907a6658 100644 --- a/test/distributed/test_c10d_nccl.py +++ b/test/distributed/test_c10d_nccl.py @@ -1087,62 +1087,6 @@ def test_comm_split_group(self): dist.destroy_process_group() - @requires_nccl_version((2, 18), "Need NCCL 2.18+ for ncclCommSplit") - @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") - def test_comm_split_group_mixed_backend(self): - # Test `ncclCommSplit` for smaller subgroups of the world when - # we've passed a specific device_id to init_process_group. - store = c10d.FileStore(self.file_name, self.world_size) - device = torch.device(f"cuda:{self.rank}") - # pg = self._create_process_group_nccl(store, self.opts(), device_id=device) - # create nccl processgroup with opts - c10d.init_process_group( - "cpu:gloo,cuda:nccl", - world_size=self.world_size, - rank=self.rank, - store=store, - pg_options=self.opts(), - device_id=device, - ) - pg = c10d.distributed_c10d._get_default_group() - backend = pg._get_backend(torch.device(device)) - - cuda_tensor = torch.full((1,), self.rank).cuda(device) - cpu_tensor = torch.full((1,), self.rank) - # Create subgroup between ranks 0, 1 - subg_ranks = [0, 1] - ng1 = c10d.split_group(pg, [subg_ranks]) - backend1 = ng1._get_backend(torch.device(device)) - - # check basic options are the same between parent and child - self.assertEqual(backend.options._timeout, backend1.options._timeout) - self.assertEqual( - backend.options.is_high_priority_stream, - backend1.options.is_high_priority_stream, - ) - self.assertEqual(ng1.group_desc, "default_pg:split:0") - - # comm split happens eagerly since device_id is passed to init_process_group. - self.assertEqual(backend.comm_split_count(), 1) - # dist.get_process_group_ranks returns the global ranks in the subgroup. - self.assertEqual( - dist.get_process_group_ranks(ng1), - subg_ranks if self.rank in subg_ranks else [], - ) - - # is part of ng1; otherwise, -1 - if dist.get_rank(ng1) >= 0: - dist.broadcast(cuda_tensor, dist.get_global_rank(ng1, 0), group=ng1) - self.assertEqual(cuda_tensor, torch.full((1,), 0)) - dist.broadcast(cpu_tensor, dist.get_global_rank(ng1, 0), group=ng1) - self.assertEqual(cpu_tensor, torch.full((1,), 0)) - - ng2 = c10d.split_group(pg, [subg_ranks]) - self.assertEqual(ng2.group_desc, "default_pg:split:1") - self.assertEqual(backend.comm_split_count(), 2) - - dist.destroy_process_group() - @requires_nccl_version((2, 18), "Need NCCL 2.18+ for ncclCommSplit") @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") def test_non_blocking_init(self): diff --git a/torch/csrc/distributed/c10d/ProcessGroup.hpp b/torch/csrc/distributed/c10d/ProcessGroup.hpp index 5a06a386d5ca8..4fb2d566e9a76 100644 --- a/torch/csrc/distributed/c10d/ProcessGroup.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroup.hpp @@ -1015,9 +1015,7 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder { // Backend classes for this ProcessGroup std::unordered_set deviceTypes_; - // This mapping is ordered, as splitGroup must call split on the underlying - // backends in a consistent order. - std::map deviceTypeToBackendType_; + std::unordered_map deviceTypeToBackendType_; std::unordered_map> deviceTypeToBackend_; std::unordered_map> diff --git a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp index 74063ff579e80..fbd8a403b97dc 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp +++ b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp @@ -551,32 +551,6 @@ std::shared_ptr<::gloo::transport::Device> ProcessGroupGloo:: static std::atomic process_group_id = 0; -c10::intrusive_ptr ProcessGroupGloo::Options:: - create_default(std::chrono::milliseconds timeout) { - auto options = ::c10d::ProcessGroupGloo::Options::create(); - bool lazyInit = ::c10d::getDefaultGlooLazyInit(); - - // Use interfaces listed in "GLOO_SOCKET_IFNAME", if set. - auto ifnameEnv = c10::utils::get_env("GLOO_SOCKET_IFNAME"); - if (ifnameEnv && ifnameEnv->size() > 1) { - for (const auto& iface : ::c10d::split(',', ifnameEnv->c_str())) { - options->devices.push_back( - ::c10d::ProcessGroupGloo::createDeviceForInterface(iface, lazyInit)); - } - } else { - // If no hostname is specified, this function looks up - // the machine's hostname and returns a device instance - // associated with the address that the hostname resolves to. - options->devices.push_back( - ::c10d::ProcessGroupGloo::createDefaultDevice(lazyInit)); - } - - options->timeout = timeout; - // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) - options->threads = options->devices.size() * 2; - return options; -} - ProcessGroupGloo::ProcessGroupGloo( const c10::intrusive_ptr& store, int rank, @@ -736,12 +710,7 @@ c10::intrusive_ptr ProcessGroupGloo::split( } auto glooOpts = c10::dynamic_intrusive_pointer_cast(opts); - if (glooOpts == nullptr) { - TORCH_WARN_ONCE( - "Tried to pass options to ProcessGroupGloo::split that are not ProcessGroupGloo::Options." - "Falling back to default options."); - glooOpts = ProcessGroupGloo::Options::create_default(); - } + TORCH_CHECK(glooOpts != nullptr, "opts not a ProcessGroupGloo::Options."); // TODO: we need to get rid of globalRanksInGroup eventually. std::vector globalRanksInGroup; @@ -760,12 +729,7 @@ c10::intrusive_ptr ProcessGroupGloo::merge( const int& rank, const int& size) { auto glooOpts = c10::dynamic_intrusive_pointer_cast(opts); - if (glooOpts == nullptr) { - TORCH_WARN_ONCE( - "Tried to pass options to ProcessGroupGloo::merge that are not ProcessGroupGloo::Options." - "Falling back to default options."); - glooOpts = ProcessGroupGloo::Options::create_default(); - } + TORCH_CHECK(glooOpts != nullptr, "opts not a ProcessGroupGloo::Options."); auto pg = c10::make_intrusive( store->clone(), rank, size, glooOpts); return c10::static_intrusive_pointer_cast(pg); diff --git a/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp b/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp index b2cc6993528bf..4297807f2e8b9 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp @@ -255,9 +255,6 @@ class TORCH_API ProcessGroupGloo : public Backend { return c10::make_intrusive(timeout); } - static c10::intrusive_ptr create_default( - std::chrono::milliseconds timeout = kBackendDefaultTimeout); - std::vector> devices; int threads; }; diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp index c36b9025dfecc..0189326683585 100644 --- a/torch/csrc/distributed/c10d/init.cpp +++ b/torch/csrc/distributed/c10d/init.cpp @@ -3103,6 +3103,8 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`). .def_readwrite("group_name", &::c10d::Backend::Options::group_name); #ifdef USE_C10D_GLOO + static const std::string GLOO_SOCKET_IFNAME_ENV = "GLOO_SOCKET_IFNAME"; + auto processGroupGloo = intrusive_ptr_no_gil_destructor_class_<::c10d::ProcessGroupGloo>( module, "ProcessGroupGloo", backend); @@ -3179,9 +3181,31 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`). // https://github.com/pybind/pybind11/issues/5473 py::gil_scoped_release nogil{}; - auto options = ::c10d::ProcessGroupGloo::Options::create_default(); + auto options = ::c10d::ProcessGroupGloo::Options::create(); + bool lazyInit = ::c10d::getDefaultGlooLazyInit(); + + // Use interfaces listed in "GLOO_SOCKET_IFNAME", if set. + auto ifnameEnv = + c10::utils::get_env(GLOO_SOCKET_IFNAME_ENV.c_str()); + if (ifnameEnv && ifnameEnv->size() > 1) { + for (const auto& iface : ::c10d::split(',', ifnameEnv->c_str())) { + options->devices.push_back( + ::c10d::ProcessGroupGloo::createDeviceForInterface( + iface, lazyInit)); + } + } else { + // If no hostname is specified, this function looks up + // the machine's hostname and returns a device instance + // associated with the address that the hostname resolves to. + options->devices.push_back( + ::c10d::ProcessGroupGloo::createDefaultDevice(lazyInit)); + } + + options->timeout = timeout; + // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) + options->threads = options->devices.size() * 2; return c10::make_intrusive<::c10d::ProcessGroupGloo>( - store, rank, size, std::move(options)); + store, rank, size, options); }), py::arg("store"), py::arg("rank"), diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 29609404df09b..14790e5dba8af 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -5158,11 +5158,7 @@ def split_group( my_group = split_group break - # use_hashed_name is True to ensure that subgroups have unique names. - # This is needed as some backends (e.g. Gloo) use the group name as a - # PrefixStore prefix for initialization of splits. Thus, names have to be - # unique to avoid key collisions. - group_name = _process_group_name(my_group, use_hashed_name=True) + group_name = _process_group_name(my_group, use_hashed_name=False) split_pg = parent_pg.split_group( my_group, timeout=timeout, From 7e2e83cdbe532b230dee40cfe0454116c9b64710 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Wed, 10 Sep 2025 20:29:46 +0000 Subject: [PATCH 060/693] [ONNX] Update export docstring (#162622) Update export docstring to reflect the latest configuration. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162622 Approved by: https://github.com/titaiwangms --- torch/onnx/__init__.py | 148 +++++++++++++++++------------------------ 1 file changed, 60 insertions(+), 88 deletions(-) diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py index f9c955bef6d6f..10d0ba23463de 100644 --- a/torch/onnx/__init__.py +++ b/torch/onnx/__init__.py @@ -65,15 +65,10 @@ def export( f: str | os.PathLike | None = None, *, kwargs: dict[str, Any] | None = None, - export_params: bool = True, verbose: bool | None = None, input_names: Sequence[str] | None = None, output_names: Sequence[str] | None = None, opset_version: int | None = None, - dynamic_axes: Mapping[str, Mapping[int, str]] - | Mapping[str, Sequence[int]] - | None = None, - keep_initializers_as_inputs: bool = False, dynamo: bool = True, # Dynamo only options external_data: bool = True, @@ -87,6 +82,12 @@ def export( dump_exported_program: bool = False, artifacts_dir: str | os.PathLike = ".", fallback: bool = True, + # BC options + export_params: bool = True, + keep_initializers_as_inputs: bool = False, + dynamic_axes: Mapping[str, Mapping[int, str]] + | Mapping[str, Sequence[int]] + | None = None, # Deprecated options training: _C_onnx.TrainingMode = _C_onnx.TrainingMode.EVAL, operator_export_type: _C_onnx.OperatorExportTypes = _C_onnx.OperatorExportTypes.ONNX, @@ -99,7 +100,7 @@ def export( Setting ``dynamo=True`` enables the new ONNX export logic which is based on :class:`torch.export.ExportedProgram` and a more modern - set of translation logic. This is the recommended way to export models + set of translation logic. This is the recommended and default way to export models to ONNX. When ``dynamo=True``: @@ -109,21 +110,17 @@ def export( #. If the model is already an ExportedProgram, it will be used as-is. #. Use :func:`torch.export.export` and set ``strict=False``. #. Use :func:`torch.export.export` and set ``strict=True``. - #. Use ``draft_export`` which removes some soundness guarantees in data-dependent - operations to allow export to proceed. You will get a warning if the exporter - encounters any unsound data-dependent operation. - #. Use :func:`torch.jit.trace` to trace the model then convert to ExportedProgram. - This is the most unsound strategy but may be useful for converting TorchScript - models to ONNX. Args: model: The model to be exported. args: Example positional inputs. Any non-Tensor arguments will be hard-coded into the exported model; any Tensor arguments will become inputs of the exported model, in the order they occur in the tuple. - f: Path to the output ONNX model file. E.g. "model.onnx". + f: Path to the output ONNX model file. E.g. "model.onnx". This argument is kept for + backward compatibility. It is recommended to leave unspecified (None) + and use the returned :class:`torch.onnx.ONNXProgram` to serialize the model + to a file instead. kwargs: Optional example keyword inputs. - export_params: If false, parameters (weights) will not be exported. verbose: Whether to enable verbose logging. input_names: names to assign to the input nodes of the graph, in order. output_names: names to assign to the output nodes of the graph, in order. @@ -133,7 +130,52 @@ def export( of the runtime backend or compiler you want to run the exported model with. Leave as default (``None``) to use the recommended version, or refer to the ONNX operators documentation for more information. + dynamo: Whether to export the model with ``torch.export`` ExportedProgram instead of TorchScript. + external_data: Whether to save the model weights as an external data file. + This is required for models with large weights that exceed the ONNX file size limit (2GB). + When False, the weights are saved in the ONNX file with the model architecture. + dynamic_shapes: A dictionary or a tuple of dynamic shapes for the model inputs. Refer to + :func:`torch.export.export` for more details. This is only used (and preferred) when dynamo is True. + Note that dynamic_shapes is designed to be used when the model is exported with dynamo=True, while + dynamic_axes is used when dynamo=False. + custom_translation_table: A dictionary of custom decompositions for operators in the model. + The dictionary should have the callable target in the fx Node as the key (e.g. ``torch.ops.aten.stft.default``), + and the value should be a function that builds that graph using ONNX Script. This option + is only valid when dynamo is True. + report: Whether to generate a markdown report for the export process. This option + is only valid when dynamo is True. + optimize: Whether to optimize the exported model. This option + is only valid when dynamo is True. Default is True. + verify: Whether to verify the exported model using ONNX Runtime. This option + is only valid when dynamo is True. + profile: Whether to profile the export process. This option + is only valid when dynamo is True. + dump_exported_program: Whether to dump the :class:`torch.export.ExportedProgram` to a file. + This is useful for debugging the exporter. This option is only valid when dynamo is True. + artifacts_dir: The directory to save the debugging artifacts like the report and the serialized + exported program. This option is only valid when dynamo is True. + fallback: Whether to fallback to the TorchScript exporter if the dynamo exporter fails. + This option is only valid when dynamo is True. When fallback is enabled, It is + recommended to set dynamic_axes even when dynamic_shapes is provided. + export_params: **When ``f`` is specified**: If false, parameters (weights) will not be exported. + + You can also leave it unspecified and use the returned :class:`torch.onnx.ONNXProgram` + to control how initializers are treated when serializing the model. + keep_initializers_as_inputs: **When ``f`` is specified**: If True, all the + initializers (typically corresponding to model weights) in the + exported graph will also be added as inputs to the graph. If False, + then initializers are not added as inputs to the graph, and only + the user inputs are added as inputs. + + Set this to True if you intend to supply model weights at runtime. + Set it to False if the weights are static to allow for better optimizations + (e.g. constant folding) by backends/runtimes. + + You can also leave it unspecified and use the returned :class:`torch.onnx.ONNXProgram` + to control how initializers are treated when serializing the model. dynamic_axes: + Prefer specifying ``dynamic_shapes`` when ``dynamo=True`` and when ``fallback`` + is not enabled. By default the exported model will have the shapes of all input and output tensors set to exactly match those given in ``args``. To specify axes of tensors as @@ -215,84 +257,12 @@ def forward(self, x): dim_param: "sum_dynamic_axes_1" # axis 0 ... - keep_initializers_as_inputs: If True, all the - initializers (typically corresponding to model weights) in the - exported graph will also be added as inputs to the graph. If False, - then initializers are not added as inputs to the graph, and only - the user inputs are added as inputs. - - Set this to True if you intend to supply model weights at runtime. - Set it to False if the weights are static to allow for better optimizations - (e.g. constant folding) by backends/runtimes. - - dynamo: Whether to export the model with ``torch.export`` ExportedProgram instead of TorchScript. - external_data: Whether to save the model weights as an external data file. - This is required for models with large weights that exceed the ONNX file size limit (2GB). - When False, the weights are saved in the ONNX file with the model architecture. - dynamic_shapes: A dictionary or a tuple of dynamic shapes for the model inputs. Refer to - :func:`torch.export.export` for more details. This is only used (and preferred) when dynamo is True. - Note that dynamic_shapes is designed to be used when the model is exported with dynamo=True, while - dynamic_axes is used when dynamo=False. - custom_translation_table: A dictionary of custom decompositions for operators in the model. - The dictionary should have the callable target in the fx Node as the key (e.g. ``torch.ops.aten.stft.default``), - and the value should be a function that builds that graph using ONNX Script. This option - is only valid when dynamo is True. - report: Whether to generate a markdown report for the export process. This option - is only valid when dynamo is True. - optimize: Whether to optimize the exported model. This option - is only valid when dynamo is True. Default is True. - verify: Whether to verify the exported model using ONNX Runtime. This option - is only valid when dynamo is True. - profile: Whether to profile the export process. This option - is only valid when dynamo is True. - dump_exported_program: Whether to dump the :class:`torch.export.ExportedProgram` to a file. - This is useful for debugging the exporter. This option is only valid when dynamo is True. - artifacts_dir: The directory to save the debugging artifacts like the report and the serialized - exported program. This option is only valid when dynamo is True. - fallback: Whether to fallback to the TorchScript exporter if the dynamo exporter fails. - This option is only valid when dynamo is True. When fallback is enabled, It is - recommended to set dynamic_axes even when dynamic_shapes is provided. - training: Deprecated option. Instead, set the training mode of the model before exporting. operator_export_type: Deprecated option. Only ONNX is supported. do_constant_folding: Deprecated option. - custom_opsets: Deprecated. - A dictionary: - - * KEY (str): opset domain name - * VALUE (int): opset version - - If a custom opset is referenced by ``model`` but not mentioned in this dictionary, - the opset version is set to 1. Only custom opset domain name and version should be - indicated through this argument. + custom_opsets: Deprecated option. export_modules_as_functions: Deprecated option. - - Flag to enable - exporting all ``nn.Module`` forward calls as local functions in ONNX. Or a set to indicate the - particular types of modules to export as local functions in ONNX. - This feature requires ``opset_version`` >= 15, otherwise the export will fail. This is because - ``opset_version`` < 15 implies IR version < 8, which means no local function support. - Module variables will be exported as function attributes. There are two categories of function - attributes. - - 1. Annotated attributes: class variables that have type annotations via - `PEP 526-style `_ - will be exported as attributes. - Annotated attributes are not used inside the subgraph of ONNX local function because - they are not created by PyTorch JIT tracing, but they may be used by consumers - to determine whether or not to replace the function with a particular fused kernel. - - 2. Inferred attributes: variables that are used by operators inside the module. Attribute names - will have prefix "inferred::". This is to differentiate from predefined attributes retrieved from - python module annotations. Inferred attributes are used inside the subgraph of ONNX local function. - - * ``False`` (default): export ``nn.Module`` forward calls as fine grained nodes. - * ``True``: export all ``nn.Module`` forward calls as local function nodes. - * Set of type of nn.Module: export ``nn.Module`` forward calls as local function nodes, - only if the type of the ``nn.Module`` is found in the set. - autograd_inlining: Deprecated. - Flag used to control whether to inline autograd functions. - Refer to https://github.com/pytorch/pytorch/pull/74765 for more details. + autograd_inlining: Deprecated option. Returns: :class:`torch.onnx.ONNXProgram` if dynamo is True, otherwise None. @@ -305,6 +275,8 @@ def forward(self, x): *autograd_inlining* is now deprecated. .. versionchanged:: 2.7 *optimize* is now True by default. + .. versionchanged:: 2.9 + *dynamo* is now True by default. """ if dynamo is True or isinstance(model, torch.export.ExportedProgram): from torch.onnx._internal.exporter import _compat From 053251b98da69bacb916c9910c0c77f09ed3fe07 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 10 Sep 2025 20:45:48 +0000 Subject: [PATCH 061/693] Revert "Make functorch notebook symlinks PEP 517 valid (#157813)" This reverts commit b494547f0bd6cb1ce5d8d104cb419802434c9c08. Reverted https://github.com/pytorch/pytorch/pull/157813 on behalf of https://github.com/huydhn due to Sorry for reverting your change, but this surfaces a weird discrepancy between GitHub and Mecurial used internally ([comment](https://github.com/pytorch/pytorch/pull/157813#issuecomment-3276442242)) --- functorch/docs/source/notebooks | 1 + functorch/notebooks | 1 - functorch/{docs/source => }/notebooks/_src/plot_ensembling.py | 0 .../source => }/notebooks/_src/plot_jacobians_and_hessians.py | 0 .../source => }/notebooks/_src/plot_per_sample_gradients.py | 0 .../{docs/source => }/notebooks/aot_autograd_optimizations.ipynb | 0 functorch/{docs/source => }/notebooks/ensembling.ipynb | 0 functorch/{docs/source => }/notebooks/jacobians_hessians.ipynb | 0 functorch/{docs/source => }/notebooks/minifier.ipynb | 0 .../{docs/source => }/notebooks/neural_tangent_kernels.ipynb | 0 functorch/{docs/source => }/notebooks/per_sample_grads.ipynb | 0 functorch/{docs/source => }/notebooks/whirlwind_tour.ipynb | 0 12 files changed, 1 insertion(+), 1 deletion(-) create mode 120000 functorch/docs/source/notebooks delete mode 120000 functorch/notebooks rename functorch/{docs/source => }/notebooks/_src/plot_ensembling.py (100%) rename functorch/{docs/source => }/notebooks/_src/plot_jacobians_and_hessians.py (100%) rename functorch/{docs/source => }/notebooks/_src/plot_per_sample_gradients.py (100%) rename functorch/{docs/source => }/notebooks/aot_autograd_optimizations.ipynb (100%) rename functorch/{docs/source => }/notebooks/ensembling.ipynb (100%) rename functorch/{docs/source => }/notebooks/jacobians_hessians.ipynb (100%) rename functorch/{docs/source => }/notebooks/minifier.ipynb (100%) rename functorch/{docs/source => }/notebooks/neural_tangent_kernels.ipynb (100%) rename functorch/{docs/source => }/notebooks/per_sample_grads.ipynb (100%) rename functorch/{docs/source => }/notebooks/whirlwind_tour.ipynb (100%) diff --git a/functorch/docs/source/notebooks b/functorch/docs/source/notebooks new file mode 120000 index 0000000000000..d4082256dcfe3 --- /dev/null +++ b/functorch/docs/source/notebooks @@ -0,0 +1 @@ +../../notebooks/ \ No newline at end of file diff --git a/functorch/notebooks b/functorch/notebooks deleted file mode 120000 index cacf787bdade8..0000000000000 --- a/functorch/notebooks +++ /dev/null @@ -1 +0,0 @@ -docs/source/notebooks \ No newline at end of file diff --git a/functorch/docs/source/notebooks/_src/plot_ensembling.py b/functorch/notebooks/_src/plot_ensembling.py similarity index 100% rename from functorch/docs/source/notebooks/_src/plot_ensembling.py rename to functorch/notebooks/_src/plot_ensembling.py diff --git a/functorch/docs/source/notebooks/_src/plot_jacobians_and_hessians.py b/functorch/notebooks/_src/plot_jacobians_and_hessians.py similarity index 100% rename from functorch/docs/source/notebooks/_src/plot_jacobians_and_hessians.py rename to functorch/notebooks/_src/plot_jacobians_and_hessians.py diff --git a/functorch/docs/source/notebooks/_src/plot_per_sample_gradients.py b/functorch/notebooks/_src/plot_per_sample_gradients.py similarity index 100% rename from functorch/docs/source/notebooks/_src/plot_per_sample_gradients.py rename to functorch/notebooks/_src/plot_per_sample_gradients.py diff --git a/functorch/docs/source/notebooks/aot_autograd_optimizations.ipynb b/functorch/notebooks/aot_autograd_optimizations.ipynb similarity index 100% rename from functorch/docs/source/notebooks/aot_autograd_optimizations.ipynb rename to functorch/notebooks/aot_autograd_optimizations.ipynb diff --git a/functorch/docs/source/notebooks/ensembling.ipynb b/functorch/notebooks/ensembling.ipynb similarity index 100% rename from functorch/docs/source/notebooks/ensembling.ipynb rename to functorch/notebooks/ensembling.ipynb diff --git a/functorch/docs/source/notebooks/jacobians_hessians.ipynb b/functorch/notebooks/jacobians_hessians.ipynb similarity index 100% rename from functorch/docs/source/notebooks/jacobians_hessians.ipynb rename to functorch/notebooks/jacobians_hessians.ipynb diff --git a/functorch/docs/source/notebooks/minifier.ipynb b/functorch/notebooks/minifier.ipynb similarity index 100% rename from functorch/docs/source/notebooks/minifier.ipynb rename to functorch/notebooks/minifier.ipynb diff --git a/functorch/docs/source/notebooks/neural_tangent_kernels.ipynb b/functorch/notebooks/neural_tangent_kernels.ipynb similarity index 100% rename from functorch/docs/source/notebooks/neural_tangent_kernels.ipynb rename to functorch/notebooks/neural_tangent_kernels.ipynb diff --git a/functorch/docs/source/notebooks/per_sample_grads.ipynb b/functorch/notebooks/per_sample_grads.ipynb similarity index 100% rename from functorch/docs/source/notebooks/per_sample_grads.ipynb rename to functorch/notebooks/per_sample_grads.ipynb diff --git a/functorch/docs/source/notebooks/whirlwind_tour.ipynb b/functorch/notebooks/whirlwind_tour.ipynb similarity index 100% rename from functorch/docs/source/notebooks/whirlwind_tour.ipynb rename to functorch/notebooks/whirlwind_tour.ipynb From 6d65737aeefb8c6c883efc828ae0ae6afa68ccf9 Mon Sep 17 00:00:00 2001 From: Tugsbayasgalan Manlaibaatar Date: Wed, 10 Sep 2025 08:36:36 -0700 Subject: [PATCH 062/693] testing infra and some fixes (#162183) This PR is quite large in that it covers most of rough edges in the new strict export flow: 1. Handle nn_module_stack correctly now that we are tracing wrapper module 2. module_call_spec needs to get queried from source directly because we are not running the bytecode anymore. 3. Correct input and output handling. @diff-train-skip-merge Pull Request resolved: https://github.com/pytorch/pytorch/pull/162183 Approved by: https://github.com/zhxchen17 --- test/export/test_export.py | 23 +- test/export/test_strict_export_v2.py | 54 ++++ test/export/testing.py | 6 + torch/_dynamo/aot_compile.py | 1 + torch/_dynamo/convert_frame.py | 11 +- torch/_dynamo/functional_export.py | 428 +++++++++++++++++++++++---- torch/_dynamo/output_graph.py | 17 ++ torch/export/__init__.py | 4 + torch/export/_trace.py | 55 ++-- torch/export/_unlift.py | 25 +- torch/export/exported_program.py | 5 +- torch/fx/graph_module.py | 6 + 12 files changed, 549 insertions(+), 86 deletions(-) create mode 100644 test/export/test_strict_export_v2.py diff --git a/test/export/test_export.py b/test/export/test_export.py index ed19fbe2bec81..c3bf82c8fe906 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -214,12 +214,19 @@ class Inp3: TRAINING_IR_DECOMP_NON_STRICT_SUFFIX = "_training_ir_to_decomp_nonstrict" CPP_RUNTIME_STRICT_SUFFIX = "_cpp_runtime_strict" CPP_RUNTIME_NONSTRICT_SUFFIX = "_cpp_runtime_nonstrict" +STRICT_EXPORT_V2_SUFFIX = "_strict_export_v2" # Now default mode is non strict, so original unammended test names # should be treated as non-strict def is_non_strict_test(test_name): - return not test_name.endswith(STRICT_SUFFIX) + return not test_name.endswith(STRICT_SUFFIX) and not test_name.endswith( + STRICT_EXPORT_V2_SUFFIX + ) + + +def is_strict_v2_test(test_name): + return test_name.endswith(STRICT_EXPORT_V2_SUFFIX) def is_inline_and_install_strict_test(test_name: str) -> bool: @@ -11759,6 +11766,7 @@ def forward(self, x, y): self.assertEqual(ep.module()(3, 5), 8) self.assertEqual(ep.module()(5, 4), 9) + @testing.expectedFailureStrictV2 # ValueError: Found conflicts between user-specified and inferred ranges def test_dynamic_shapes_bounds(self): class M(torch.nn.Module): """ @@ -12065,6 +12073,8 @@ def test(ep, swap=None): test(export(M(), inp)) + # Preserving signature hook is messing with dynamo tracing + @testing.expectedFailureStrictV2 def test_unflatten_multiple_graphs_state(self): class N(torch.nn.Module): def __init__(self): @@ -13683,7 +13693,7 @@ def forward(self, x): inputs = (torch.randn(10, 72),) dx, dy = dims("dx", "dy") - ep = torch.export.export( + ep = torch.export._trace._export( Mod4Reshape(), inputs, dynamic_shapes={"x": (dx, dy)}, @@ -14531,6 +14541,14 @@ def forward(self, x, y): if is_inline_and_install_strict_test(self._testMethodName): self.assertEqual(filtered_nn_module_stack[0], "mod_list_1.2") self.assertEqual(filtered_nn_module_stack[1], "mod_list_1.2") + # This is fine since both of these will be deprecated soon. + elif is_strict_v2_test(self._testMethodName) and IS_FBCODE: + self.assertEqual( + filtered_nn_module_stack[0], "mod_list_1.slice(2, 3, None).0" + ) + self.assertEqual( + filtered_nn_module_stack[1], "mod_list_2.slice(4, 5, None).0" + ) else: self.assertEqual( filtered_nn_module_stack[0], "mod_list_1.slice(2, 3, None).2" @@ -15369,6 +15387,7 @@ def forward(self, arg0_1: "f32[2, 4]", arg1_1: "f32[4]"): ) @testing.expectedFailureStrict # test_hop doesn't have a dynamo implementation + @testing.expectedFailureStrictV2 # test_hop doesn't have a dynamo implementation @testing.expectedFailureRetraceability # test_hop doesn't have a dynamo implementation @testing.expectedFailureTrainingIRToRunDecomp # test_hop doesn't have a dynamo implementation @testing.expectedFailureSerDerNonStrict # TODO: serde torch.FunctionSchema is not implemented yet diff --git a/test/export/test_strict_export_v2.py b/test/export/test_strict_export_v2.py new file mode 100644 index 0000000000000..3b162150695f5 --- /dev/null +++ b/test/export/test_strict_export_v2.py @@ -0,0 +1,54 @@ +# Owner(s): ["oncall: export"] + +try: + from . import test_export, testing +except ImportError: + import test_export # @manual=fbcode//caffe2/test:test_export-library + import testing # @manual=fbcode//caffe2/test:test_export-library + +from torch.export import export + + +test_classes = {} + + +def mocked_strict_export_v2(*args, **kwargs): + # If user already specified strict, don't make it strict + if "strict" in kwargs: + if kwargs["strict"]: + return export(*args, **kwargs, _use_new_tracer_experimental=True) + else: + return export(*args, **kwargs) + return export(*args, **kwargs, strict=True, _use_new_tracer_experimental=True) + + +def make_dynamic_cls(cls): + cls_prefix = "StrictExportV2" + + test_class = testing.make_test_cls_with_mocked_export( + cls, + cls_prefix, + test_export.STRICT_EXPORT_V2_SUFFIX, + mocked_strict_export_v2, + xfail_prop="_expected_failure_strict_v2", + ) + + test_classes[test_class.__name__] = test_class + # REMOVING THIS LINE WILL STOP TESTS FROM RUNNING + globals()[test_class.__name__] = test_class + test_class.__module__ = __name__ + return test_class + + +tests = [ + test_export.TestDynamismExpression, + test_export.TestExport, +] +for test in tests: + make_dynamic_cls(test) +del test + +if __name__ == "__main__": + from torch._dynamo.test_case import run_tests + + run_tests() diff --git a/test/export/testing.py b/test/export/testing.py index 7ff198d72e780..cfa29cf693dea 100644 --- a/test/export/testing.py +++ b/test/export/testing.py @@ -257,6 +257,12 @@ def expectedFailureTrainingIRToRunDecompNonStrict(fn): return fn +# Controls tests generated in test/export/test_export_strict_v2.py +def expectedFailureStrictV2(fn): + fn._expected_failure_strict_v2 = True + return fn + + # Controls tests generated in test/export/test_export_strict.py def expectedFailureStrict(fn): fn._expected_failure_strict = True diff --git a/torch/_dynamo/aot_compile.py b/torch/_dynamo/aot_compile.py index 0482016846283..a454c51a33e35 100644 --- a/torch/_dynamo/aot_compile.py +++ b/torch/_dynamo/aot_compile.py @@ -255,6 +255,7 @@ def new_guard_filter_fn( assert check_fn.guards_state is not None backend_input = capture_output.backend_input + assert backend_input is not None backend_input.graph_module._backend_id = backend_input.backend_id # type: ignore[assignment] output_graph = dynamo_output.tracer_output.output_graph assert output_graph is not None diff --git a/torch/_dynamo/convert_frame.py b/torch/_dynamo/convert_frame.py index 686f0945179f3..ef00676e67748 100644 --- a/torch/_dynamo/convert_frame.py +++ b/torch/_dynamo/convert_frame.py @@ -95,6 +95,7 @@ ) from .eval_frame import ( always_optimize_code_objects, + Constraint, dynamo_tls, skip_code, TorchPatcher, @@ -894,7 +895,8 @@ class CaptureOutput: """ dynamo_output: DynamoOutput - backend_input: BackendInput + # BackendInput can be None when dynamo didn't compile any graph (no tensor op) + backend_input: Optional[BackendInput] @dataclass @@ -907,7 +909,10 @@ class FrameInfo: def fullgraph_capture( - frame: FrameInfo, *, _is_export_deprecated_do_not_use: bool = False + frame: FrameInfo, + *, + constraints: Optional[list[Constraint]] = None, + _is_export_deprecated_do_not_use: bool = False, ) -> CaptureOutput: """ A standalone function which takes a frame and returns dynamo captured graph @@ -951,6 +956,7 @@ def fullgraph_compiler( frame.closure, compiler_fn=fullgraph_compiler, export=_is_export_deprecated_do_not_use, + export_constraints=constraints, # type: ignore[arg-type] one_graph=True, restart_reasons=set(), ) @@ -966,7 +972,6 @@ def fullgraph_compiler( cur_exn = cur_exn.__cause__ raise e.with_traceback(None) from e.__cause__ # User compiler error - assert backend_input is not None return CaptureOutput(dynamo_output, backend_input) diff --git a/torch/_dynamo/functional_export.py b/torch/_dynamo/functional_export.py index 228dd7924aa3a..037577998ac22 100644 --- a/torch/_dynamo/functional_export.py +++ b/torch/_dynamo/functional_export.py @@ -1,17 +1,70 @@ import builtins import inspect +import logging +import traceback from collections import namedtuple -from typing import Any, Callable +from typing import Any, Callable, Optional, Union + +import sympy import torch +import torch.fx import torch.utils._pytree as pytree from torch._dynamo.convert_frame import FrameInfo, fullgraph_capture, get_compile_id from torch._dynamo.eval_frame import argument_names from torch._dynamo.utils import dynamo_timed, get_metrics_context from torch._guards import compile_context, CompileContext +from torch.export.dynamic_shapes import _RelaxedConstraint, Constraint +from torch.fx import Node +from torch.fx.experimental.symbolic_shapes import ( + ConstraintViolationError, + DimDynamic, + StatelessSymbolicContext, +) from torch.fx.graph import _PyTreeCodeGen, _PyTreeInfo +log = logging.getLogger(__name__) + + +def clean_nn_module_stack(graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule: + for node in graph_module.graph.nodes: + if "nn_module_stack" in node.meta: + nn_module_stack = node.meta["nn_module_stack"].copy() + first_key = next(iter(nn_module_stack.keys())) + if "export_root" in first_key: + del nn_module_stack[first_key] + nn_module_stack_corrected = {} + for k, v in nn_module_stack.items(): + k_new = "".join(k.split("__export_root")) + child_name, child_class = v + child_name = child_name.replace("._export_root", "") + nn_module_stack_corrected[k_new] = (child_name, child_class) + node.meta["nn_module_stack"] = nn_module_stack_corrected + return graph_module + + +def clean_export_root(graph_module: torch.fx.GraphModule) -> None: + """Remove export_root artifacts from FX graph in-place""" + + # Clean parameter names: L__self____export_root_param -> L__self___param + def clean_name(name) -> str: + return name.replace("__export_root_", "_") if "__export_root_" in name else name + + # Update get_attr nodes in-place + for node in graph_module.graph.nodes: + if node.op == "get_attr": + old_target = node.target + new_target = clean_name(old_target) + if new_target != old_target: + node.target = new_target + # Move the parameter to the new name + if hasattr(graph_module, old_target): + param = torch.fx.graph_module._get_attr(graph_module, old_target) + torch.fx.graph_module._set_attr(graph_module, new_target, param) + torch.fx.graph_module._del_attr(graph_module, old_target) + + class ModuleToTrace(torch.nn.Module): def __init__(self, foo: Any, in_spec: Any) -> None: super().__init__() @@ -28,29 +81,214 @@ def forward(self, *flat_args: Any) -> "ExportTracerOutput": ExportTracerOutput = namedtuple("ExportTracerOutput", ["flat_args", "out_spec"]) +# mypy: disable-error-code="no-untyped-def,var-annotated,assignment,index,operator" +class DynamoGraphTransformer(torch.fx.Transformer): + """Graph transformer for dynamo export that flattens inputs/outputs without complex matching.""" + + def __init__( + self, + module: torch.fx.GraphModule, + flat_inputs: list[Any], + flat_args_dynamic_dims: list[set[int]], + graph_input_order: dict[int, int], + graph_output_map: dict[int, tuple[str, Any]], + fake_mode: Optional[Any] = None, + ) -> None: + super().__init__(module) + + assert len(flat_args_dynamic_dims) == len(flat_inputs) + + self.flat_inputs = flat_inputs + self.flat_args_dynamic_dims = flat_args_dynamic_dims + self.graph_input_order = graph_input_order + self.graph_output_map = graph_output_map + self.fake_mode = fake_mode + + # Get original placeholders and output + self.placeholders = [n for n in module.graph.nodes if n.op == "placeholder"] + self.output_node = next(n for n in module.graph.nodes if n.op == "output") + + # Create new flattened input placeholders + self.new_input_nodes: dict[int, torch.fx.Node] = {} + self._create_flattened_inputs() + + # Iterator for replacing old placeholders + self.old_to_new_mapping = {} + self._create_placeholder_mapping() + + def _create_flattened_inputs(self) -> None: + """Create new placeholder nodes for flattened inputs with proper fake tensors.""" + for i in range(len(self.flat_inputs)): + placeholder = super().placeholder(f"arg_{i}", (), {}) + + # Check if this user input (index i) maps to a graph placeholder + if i in self.graph_input_order: + # graph_input_order[i] gives us which graph placeholder this user input corresponds to + graph_placeholder_idx = self.graph_input_order[i] + if graph_placeholder_idx < len(self.placeholders): + orig_placeholder = self.placeholders[graph_placeholder_idx] + # Copy other metadata but not "val" yet + for key, value in orig_placeholder.meta.items(): + if key != "val": + placeholder.node.meta[key] = value + + # Always ensure we have proper "val" metadata from fake tensor + if self.fake_mode is not None and isinstance( + self.flat_inputs[i], torch.Tensor + ): + placeholder.node.meta["val"] = self.fake_mode.from_tensor( + self.flat_inputs[i], + symbolic_context=StatelessSymbolicContext( + dynamic_sizes=[ + ( + DimDynamic.DYNAMIC + if d in self.flat_args_dynamic_dims[i] + else DimDynamic.STATIC + ) + for d in range(len(self.flat_inputs[i].shape)) + ], + constraint_sizes=[None] * len(self.flat_inputs[i].shape), + ), + ) + elif hasattr(self.flat_inputs[i], "val"): # _IntWrapper case + placeholder.node.meta["val"] = self.flat_inputs[i].val + else: + placeholder.node.meta["val"] = self.flat_inputs[i] + + self.new_input_nodes[i] = placeholder + + def _create_placeholder_mapping(self) -> None: + """Create mapping from old placeholders to new ones.""" + # graph_input_order maps: user_input_index -> graph_placeholder_index + # We need to create: old_graph_placeholder -> new_user_input_placeholder + for user_input_idx, graph_placeholder_idx in self.graph_input_order.items(): + if graph_placeholder_idx < len(self.placeholders): + old_placeholder = self.placeholders[graph_placeholder_idx] + new_placeholder = self.new_input_nodes[user_input_idx] + self.old_to_new_mapping[old_placeholder] = new_placeholder + + def placeholder(self, target, args, kwargs) -> Any: + """Replace old placeholders with new flattened ones.""" + # Return the corresponding new placeholder + if self.current_node in self.old_to_new_mapping: + new_arg = self.old_to_new_mapping[self.current_node] + + # Copy over additional metadata from current node, but don't overwrite "val" + for key in ["tensor_dict", "example_value", "unbacked_bindings"]: + if key in self.current_node.meta: + new_arg.node.meta[key] = self.current_node.meta[key] + + # Only copy "val" if we don't already have a good one + if "val" in self.current_node.meta and "val" not in new_arg.node.meta: + new_arg.node.meta["val"] = self.current_node.meta["val"] + + return new_arg + else: + # Shouldn't happen if mapping is correct, but fallback + return super().placeholder(target, args, kwargs) + + def output(self, target, args, kwargs) -> Any: + """Transform output according to graph_output_map.""" + original_outputs = args[0] + + # Build new output list based on graph_output_map + new_outputs = [] + for i in sorted(self.graph_output_map.keys()): + output_type, val = self.graph_output_map[i] + + if output_type == "graph_out": + new_outputs.append(original_outputs[val]) + elif output_type == "input": + input_idx = val.index + new_outputs.append(self.new_input_nodes[input_idx]) + elif output_type == "constant": + new_outputs.append(val) + + return super().output(target, (tuple(new_outputs),), {}) + + def run_node(self, node: Node) -> Any: + """Run node transformation and preserve metadata.""" + self.current_node = node + result = super().run_node(node) + + # Copy important metadata + if hasattr(result, "node") and result.node is not node: + for key in ["val", "example_value", "unbacked_bindings"]: + if key in node.meta: + result.node.meta[key] = node.meta[key] + + # Preserve node names (except output) + if node.op != "output" and hasattr(node, "name"): + result.node._rename(node.name) + + return result + + def transform(self) -> torch.fx.GraphModule: + """Perform the graph transformation and copy module metadata.""" + result_gm = super().transform() + + # Copy module metadata like the original implementation + if hasattr(self.module, "meta"): + if "dynamo_flat_name_to_original_fqn" in self.module.meta: + result_gm.meta["dynamo_flat_name_to_original_fqn"] = self.module.meta[ + "dynamo_flat_name_to_original_fqn" + ] + if "dynamo_compile_id" in self.module.meta: + result_gm.meta["dynamo_compile_id"] = self.module.meta[ + "dynamo_compile_id" + ] + + return result_gm + + def _dynamo_graph_capture_for_export( - mod: torch.nn.Module, + mod: Callable[..., Any], + *, + constraints: Optional[list[Constraint]] = None, + dynamic_shapes: Optional[Union[dict[str, Any], tuple[Any], list[Any]]] = None, ) -> Callable[..., torch.fx.GraphModule]: """ - This is lower level API that is used for export to capture dynamo level - torch IR. + Improved dynamo graph capture using transformer approach with proper fake tensor handling. + + This function creates a capture instance that handles: + 1. PyTree flattening/unflattening with proper input ordering + 2. Dynamo graph capture with export-specific context + 3. FX graph transformation for export compatibility + 4. Proper fake tensor metadata preservation + 5. Dynamic dimension constraint handling - Notable TODOs: + Notable improvements over manual approach: + - Uses FX Transformer for cleaner graph manipulation + - Properly handles fake tensor metadata and dynamic dimensions + - Preserves all necessary metadata for export + - More robust error handling and edge case management + + TODO: 1. Are we actually gonna run the bytecode? 2. Need to attach guards """ + _dynamic_shapes = dynamic_shapes + _constraints = constraints + def inner(*args: Any, **kwargs: Any) -> torch.fx.GraphModule: flat_inputs, in_spec = pytree.tree_flatten((args, kwargs)) module_to_trace = ModuleToTrace(mod, in_spec) signature = inspect.signature(module_to_trace.forward) - bound_arguments = signature.bind(*flat_inputs) bound_arguments.apply_defaults() - f_locals = {"self": module_to_trace, **bound_arguments.arguments} + constraints: Optional[list[Constraint]] = _constraints + dynamic_shapes: Optional[Union[dict[str, Any], tuple[Any], list[Any]]] = ( + _dynamic_shapes + ) + + from . import reset # type: ignore[attr-defined] + + reset() + f_locals = {"self": module_to_trace, **bound_arguments.arguments} frame = FrameInfo( module_to_trace.forward.__func__.__code__, # type: ignore[attr-defined] module_to_trace.forward.__func__.__globals__, # type: ignore[attr-defined] @@ -60,7 +298,14 @@ def inner(*args: Any, **kwargs: Any) -> torch.fx.GraphModule: ) dynamo_config_ctx = torch._dynamo.config.patch( - "log_graph_in_out_metadata", True + specialize_int=True, + specialize_float=True, + assume_static_by_default=True, + automatic_dynamic_shapes=False, + capture_dynamic_output_shape_ops=True, + capture_scalar_outputs=True, + prefer_deferred_runtime_asserts_over_guards=False, + log_graph_in_out_metadata=True, ) with ( @@ -69,74 +314,137 @@ def inner(*args: Any, **kwargs: Any) -> torch.fx.GraphModule: dynamo_timed("fullgraph_capture"), dynamo_config_ctx, ): - out = fullgraph_capture(frame, _is_export_deprecated_do_not_use=True) + out = fullgraph_capture( + frame, + constraints=_constraints, + _is_export_deprecated_do_not_use=True, + ) assert out.dynamo_output.tracer_output.output_graph is not None + # Extract export metadata from the new location export_metadata = ( out.dynamo_output.tracer_output.output_graph.export_metadata ) graph_inputs = export_metadata.graph_input_idx_to_local_source - output_return_type = export_metadata.output_return_type - # We need to extract out_spec here because we are not actually running the bytecode + graph_output_map = export_metadata.output_return_type out_spec = export_metadata.out_spec + module_call_spec = export_metadata.module_call_spec + example_inputs: list[Any] = [] + if out.backend_input is not None: graph = out.backend_input.graph_module + fake_mode = out.backend_input.fake_mode + example_inputs = out.backend_input.example_inputs + else: + graph = torch.fx.GraphModule(torch.nn.Module(), torch.fx.Graph()) + graph.graph.output(None) + graph.recompile() + fake_mode = out.dynamo_output.tracer_output.output_graph.fake_mode + + # Compute dynamic dimensions for each input based on constraints + flat_args_dynamic_dims = [ + { + c.dim + for c in (constraints or ()) + if ( + c.t_id == id(x) + and not isinstance(c, _RelaxedConstraint) + and c.constraint_range.vr.lower != c.constraint_range.vr.upper + ) + } + for x in flat_inputs + ] + + # Create input order mapping from dynamo's internal order to user order + graph_input_order: dict[int, int] = {} + for inp in graph_inputs: + source = graph_inputs[inp] + assert isinstance(source, torch._dynamo.source.GetItemSource) + graph_input_order[source.index] = len(graph_input_order) - # It is not guaranteed that dynamo puts inputs in right order, so we need to - # map the actual user order to the dynamo order. - graph_input_order: dict[int, int] = {} - for inp in graph_inputs: - source = graph_inputs[inp] - assert isinstance(source, torch._dynamo.source.GetItemSource) - graph_input_order[source.index] = len(graph_input_order) - - placeholders = [n for n in list(graph.graph.nodes) if n.op == "placeholder"] - output = next(n for n in list(graph.graph.nodes) if n.op == "output") - # Sometimes there can be empty inputs - anchor = placeholders[0] if len(placeholders) > 0 else output - inp_to_node = {} - - with graph.graph.inserting_before(anchor): - for i in range(len(flat_inputs)): - node_new = graph.graph.placeholder(f"arg_{i}") - if i in graph_input_order: - placeholders[graph_input_order[i]] - node_new.meta = placeholders[graph_input_order[i]].meta.copy() - inp_to_node[i] = node_new - - new_args = [] - for i in output_return_type: - type, val = output_return_type[i] - if type == "graph_out": - new_args.append(output.args[0][val]) - if type == "input": - input_idx = val.index - new_args.append(inp_to_node[input_idx]) - if type == "constant": - new_args.append(val) - output.args = (tuple(new_args),) - - for src_idx, i in graph_input_order.items(): - old = placeholders[src_idx] - new = inp_to_node[i] - old.replace_all_uses_with(new) - graph.graph.erase_node(old) - - # Dynamo uses _lazyGraphModule, so we need to force recompile - from torch.fx._lazy_graph_module import _LazyGraphModule - - _LazyGraphModule.force_recompile(graph) - - graph.graph._codegen = _PyTreeCodeGen( + for real_idx, graph_idx in graph_input_order.items(): + flat_inputs[real_idx] = example_inputs[graph_idx] + + # Use FX transformer to rebuild the graph cleanly + transformed_graph = DynamoGraphTransformer( + graph, + flat_inputs, + flat_args_dynamic_dims, + graph_input_order, + graph_output_map, + fake_mode, + ).transform() + + # Set up PyTree codegen for proper input/output handling + transformed_graph.graph._codegen = _PyTreeCodeGen( _PyTreeInfo( - argument_names(signature, args, kwargs), # type: ignore[arg-type] + argument_names(inspect.signature(mod.forward), args, kwargs), # type: ignore[attr-defined, arg-type] in_spec, out_spec, ) ) + transformed_graph.recompile() + + clean_nn_module_stack(transformed_graph) + clean_export_root(transformed_graph) + + transformed_graph.meta["module_call_specs"] = module_call_spec + + constraint_violation_error = None + try: + # Check if we have any constraint violations + check_fn = out.dynamo_output.build_guards( + module_to_trace.forward.__code__ + ).guard_manager + check_fn.check(f_locals) + except ( + ConstraintViolationError, + torch.utils._sympy.value_ranges.ValueRangeError, + ) as e: + constraint_violation_error = e + + if ( + (shape_env := getattr(fake_mode, "shape_env", None)) is not None + and (dim_constraints := shape_env.dim_constraints) is not None + and not isinstance( + module_to_trace.forward, + (torch._ops.OpOverloadPacket, torch._ops.OpOverload), + ) + ): + dim_constraints.solve() + forced_specializations = dim_constraints.forced_specializations() + msg = dim_constraints.prettify_results( + inspect.signature(mod.forward), # type: ignore[attr-defined] + dynamic_shapes, + constraint_violation_error, + forced_specializations, + ) + if constraint_violation_error: + constraint_violation_error.args = ( + constraint_violation_error.args[0] + msg, + ) + else: + if forced_specializations: + constraint_violation_error = ConstraintViolationError(msg) + else: + log.info( + "Summary of dimension constraints:%s", + msg, + ) + + # Error if we have any constraints on static values + for k in shape_env.var_to_range.keys(): + if isinstance(k, sympy.Integer): + constraint_violation_error = ConstraintViolationError( + f"{''.join(traceback.format_list(shape_env.var_to_stack[k]))}\n" + "It appears that you're trying to set a constraint on a " + f"value which we evaluated to have a static value of {k}. " + 'Set TORCH_LOGS="+export" for more information.' + ) + if constraint_violation_error: + raise constraint_violation_error - graph.recompile() - return graph + return transformed_graph return inner diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index ee3c34618413e..996431e32ebc9 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -379,6 +379,10 @@ class ExportMetaData: out_spec: Union[torch.utils._pytree.TreeSpec, torch.utils._pytree.LeafSpec] = ( torch.utils._pytree._LEAF_SPEC ) + module_call_spec: dict[ + str, + dict[str, Union[torch.utils._pytree.TreeSpec, torch.utils._pytree.LeafSpec]], + ] = dc_field(default_factory=dict) def get_builtins_dict(global_scope: Scope) -> dict[str, Any]: @@ -1695,6 +1699,19 @@ def compile_subgraph( if isinstance( mut_type, (AttributeMutationExisting, ValueMutationExisting) ): + if isinstance(var, UserDefinedDictVariable) and isinstance( + var.value, _ExportModuleSpecTrackerDict + ): + for k, v in var.items.items(): + specs = {} + for k_spec, val in v.items.items(): + specs[k_spec.vt.as_python_constant()] = ( + val.as_python_constant() + ) + assert ["in_spec", "out_spec"] == list(specs.keys()) + self.export_metadata.module_call_spec[ + k.vt.as_python_constant() + ] = specs # export uses tracepoint pass to dump submodule inp/out spec # into global state, so we filter it here if not ( diff --git a/torch/export/__init__.py b/torch/export/__init__.py index 621cabf15a3b8..0fa59538aebe2 100644 --- a/torch/export/__init__.py +++ b/torch/export/__init__.py @@ -70,6 +70,7 @@ def export_for_training( strict: bool = False, preserve_module_call_signature: tuple[str, ...] = (), prefer_deferred_runtime_asserts_over_guards: bool = False, + _use_new_tracer_experimental: bool = False, ) -> ExportedProgram: """ :func:`export_for_training` takes any nn.Module along with example inputs, and produces a traced graph representing @@ -159,6 +160,7 @@ def export_for_training( strict=strict, preserve_module_call_signature=preserve_module_call_signature, prefer_deferred_runtime_asserts_over_guards=prefer_deferred_runtime_asserts_over_guards, + _use_new_tracer_experimental=_use_new_tracer_experimental, ) @@ -171,6 +173,7 @@ def export( strict: bool = False, preserve_module_call_signature: tuple[str, ...] = (), prefer_deferred_runtime_asserts_over_guards: bool = False, + _use_new_tracer_experimental: bool = False, ) -> ExportedProgram: """ :func:`export` takes any nn.Module along with example inputs, and produces a traced graph representing @@ -283,6 +286,7 @@ def export( preserve_module_call_signature=preserve_module_call_signature, pre_dispatch=True, prefer_deferred_runtime_asserts_over_guards=prefer_deferred_runtime_asserts_over_guards, + _use_new_tracer_experimental=_use_new_tracer_experimental, ) except Exception as e: draft_export_msg = ( diff --git a/torch/export/_trace.py b/torch/export/_trace.py index 76d80ff6eeec8..560fccde982c2 100644 --- a/torch/export/_trace.py +++ b/torch/export/_trace.py @@ -757,6 +757,7 @@ def _export_to_torch_ir( preserve_module_call_signature: tuple[str, ...] = (), disable_constraint_solver: bool = False, prefer_deferred_runtime_asserts_over_guards: bool = False, + _use_new_tracer_experimental: bool = False, restore_fqn: bool = True, _log_export_usage: bool = True, same_signature: bool = True, @@ -809,20 +810,31 @@ def _export_to_torch_ir( f, preserve_module_call_signature, module_call_specs ) with ctx, _ignore_backend_decomps(): - gm_torch_level, _ = torch._dynamo.export( - f, - dynamic_shapes=dynamic_shapes, # type: ignore[arg-type] - constraints=constraints, # type: ignore[arg-type] - assume_static_by_default=True, - tracing_mode="symbolic", - disable_constraint_solver=disable_constraint_solver, - prefer_deferred_runtime_asserts_over_guards=prefer_deferred_runtime_asserts_over_guards, - _log_export_usage=_log_export_usage, - same_signature=same_signature, - )( - *args, - **kwargs, - ) + if _use_new_tracer_experimental: + from torch._dynamo.functional_export import ( + _dynamo_graph_capture_for_export, + ) + + gm_torch_level = _dynamo_graph_capture_for_export( + f, constraints=constraints, dynamic_shapes=dynamic_shapes + )(*args, **kwargs) + + else: + gm_torch_level, _ = torch._dynamo.export( + f, + dynamic_shapes=dynamic_shapes, # type: ignore[arg-type] + constraints=constraints, # type: ignore[arg-type] + assume_static_by_default=True, + tracing_mode="symbolic", + disable_constraint_solver=disable_constraint_solver, + prefer_deferred_runtime_asserts_over_guards=prefer_deferred_runtime_asserts_over_guards, + _log_export_usage=_log_export_usage, + same_signature=same_signature, + )( + *args, + **kwargs, + ) + gm_torch_level.meta["module_call_specs"] = module_call_specs except (ConstraintViolationError, ValueRangeError) as e: raise UserError(UserErrorType.CONSTRAINT_VIOLATION, str(e)) # noqa: B904 except GuardOnDataDependentSymNode as e: @@ -832,8 +844,6 @@ def _export_to_torch_ir( case_name="constrain_as_size_example", ) - gm_torch_level.meta["module_call_specs"] = module_call_specs - if isinstance(f, torch.nn.Module) and restore_fqn: _restore_state_dict(f, gm_torch_level) @@ -1407,6 +1417,7 @@ def _strict_export( orig_in_spec: TreeSpec, prefer_deferred_runtime_asserts_over_guards: bool, _to_aten_func: Callable, + _use_new_tracer_experimental: bool = False, ) -> ExportArtifact: """ _to_aten_func can either be `_export_to_aten_ir_make_fx` or `_export_to_aten_ir` @@ -1421,6 +1432,7 @@ def _strict_export( restore_fqn=False, # don't need to restore because we will do it later prefer_deferred_runtime_asserts_over_guards=prefer_deferred_runtime_asserts_over_guards, _log_export_usage=False, + _use_new_tracer_experimental=_use_new_tracer_experimental, ) # We detect the fake_mode by looking at gm_torch_level's placeholders, this is the fake_mode created in dynamo. @@ -2041,6 +2053,7 @@ def _export_for_training( strict: bool = True, preserve_module_call_signature: tuple[str, ...] = (), prefer_deferred_runtime_asserts_over_guards: bool = False, + _use_new_tracer_experimental: bool = False, ) -> ExportedProgram: global _EXPORT_MODULE_HIERARCHY _EXPORT_MODULE_HIERARCHY = _get_module_hierarchy(mod) @@ -2056,7 +2069,13 @@ def _export_for_training( original_state_dict = _get_original_state_dict(mod) # Call the appropriate export function based on the strictness of tracing. - export_func = _strict_export if strict else _non_strict_export + export_func = ( + functools.partial( + _strict_export, _use_new_tracer_experimental=_use_new_tracer_experimental + ) + if strict + else _non_strict_export + ) alive_fake_input_ids_before_export: list[int] = [] @@ -2185,6 +2204,7 @@ def _export( preserve_module_call_signature: tuple[str, ...] = (), pre_dispatch: bool = False, prefer_deferred_runtime_asserts_over_guards: bool = False, + _use_new_tracer_experimental: bool = False, ) -> ExportedProgram: """ Traces either an nn.Module's forward function or just a callable with PyTorch @@ -2260,6 +2280,7 @@ def _export( strict=strict, preserve_module_call_signature=preserve_module_call_signature, prefer_deferred_runtime_asserts_over_guards=prefer_deferred_runtime_asserts_over_guards, + _use_new_tracer_experimental=_use_new_tracer_experimental, ) dtrace_structured("exported_program", payload_fn=lambda: str(ep)) return ep diff --git a/torch/export/_unlift.py b/torch/export/_unlift.py index f876e462214ca..af746c4e3b7dd 100644 --- a/torch/export/_unlift.py +++ b/torch/export/_unlift.py @@ -729,14 +729,35 @@ def _unlift_exported_program_lifted_states( graph = unlift_gm.graph placeholders = graph.find_nodes(op="placeholder") if check_guards and placeholders and ep.example_inputs: + sig = inspect.signature(unlift_gm.forward) input_paths = _get_input_paths( ep.example_inputs, - inspect.signature(unlift_gm.forward), + sig, ) + + # TODO (tmanlaibaatar) + # This is band-aid solution to export new tracer replacing + # shape env sources to flat_args. The real fix should be replacing + # shape env sources to original user sources but this is quite + # involved because you need to carefully construct new sources using + # dynamo and replace all instances of it inside shape env. But it is + # lot easier to manipulate after we turn them into strings and only + # time we use these guards is during retracing or running exported program, + # so it is probably ok to have "not useful" guards on ep for now. + name_mapping = {} + for idx, path in enumerate(input_paths): + name_mapping[f"L['flat_args'][{idx}]"] = f"L{pytree.keystr(path)}" + + ep_guards = [] + for guard in ep._guards_code: + for old_name, new_name in name_mapping.items(): + guard = guard.replace(old_name, new_name) + ep_guards.append(guard) + guards_code = _get_input_guards_for_graph( placeholders, ep.range_constraints, input_paths ) - guards_code.extend(ep._guards_code) + guards_code.extend(ep_guards) unlift_gm._guards_fn = _convert_guards_code_to_fn(guards_code, input_paths) root_nn_module_stack = torch.fx._utils.first_call_function_nn_module_stack( diff --git a/torch/export/exported_program.py b/torch/export/exported_program.py index 1aa2e59d1752b..807321f0a1eb7 100644 --- a/torch/export/exported_program.py +++ b/torch/export/exported_program.py @@ -1086,7 +1086,7 @@ def __init__( # Validate should be always the last step of the constructor. self.validate() - self._guards_code = _convert_guards_to_code(_get_shape_env(self._graph_module)) + self._guards_code = _convert_guards_to_code(self._graph_module) @property @compatibility(is_backward_compatible=False) @@ -1690,7 +1690,8 @@ def _create_graph_module_for_export(root, graph): return gm -def _convert_guards_to_code(shape_env): +def _convert_guards_to_code(graph_module): + shape_env = _get_shape_env(graph_module) if shape_env is None: return [] diff --git a/torch/fx/graph_module.py b/torch/fx/graph_module.py index 4c067c0e76e4c..e8f68d78dfcd0 100644 --- a/torch/fx/graph_module.py +++ b/torch/fx/graph_module.py @@ -302,6 +302,12 @@ def _has_attr(model: torch.nn.Module, attr_name: str): return hasattr(t, field) +def _set_attr(model: torch.nn.Module, attr_name: str, value): + attr_names = attr_name.split(".") + t = _get_attr_via_attr_list(model, attr_names[:-1]) + setattr(t, attr_names[-1], value) + + def _print_readable( module, module_name, From 94755e81c4047f951c460b7ce0b4e3c82a706d0c Mon Sep 17 00:00:00 2001 From: Colin Peppler Date: Mon, 8 Sep 2025 17:58:07 -0700 Subject: [PATCH 063/693] [inductor] Enable combo kernels with unbacked inputs (#162442) Internal user tried enabling combo kernels, but ran into "Cannot convert symbols to int". This PR is to enable combo kernels on inputs with data-dependent shapes. ### Example exception ``` File "/data/users/colinpeppler/pytorch/torch/_inductor/codegen/triton.py", line 4997, in benchmark_combo_kernel kernel_code_list = self.generate_combo_kernel_code( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/data/users/colinpeppler/pytorch/torch/_inductor/codegen/simd.py", line 1849, in generate_combo_kernel_code src_code = kernel.codegen_kernel() ^^^^^^^^^^^^^^^^^^^^^^^ File "/data/users/colinpeppler/pytorch/torch/_inductor/codegen/triton_combo_kernel.py", line 802, in codegen_kernel code.splice(self.codegen_kernel_benchmark(num_gb=0)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/data/users/colinpeppler/pytorch/torch/_inductor/codegen/triton_combo_kernel.py", line 852, in codegen_kernel_benchmark var_names.extend(self.kernel_benchmark_extra_args()) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/data/users/colinpeppler/pytorch/torch/_inductor/codegen/triton_combo_kernel.py", line 733, in kernel_benchmark_extra_args extra_args.append(str(V.graph.sizevars.size_hint(tree.numel))) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/data/users/colinpeppler/pytorch/torch/_inductor/sizevars.py", line 584, in size_hint return int(out) ^^^^^^^^ File "/home/colinpeppler/.conda/envs/pytorch/lib/python3.12/site-packages/sympy/core/expr.py", line 307, in __int__ raise TypeError("Cannot convert symbols to int") torch._inductor.exc.InductorError: TypeError: Cannot convert symbols to int ``` Differential Revision: [D82042230](https://our.internmc.facebook.com/intern/diff/D82042230) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162442 Approved by: https://github.com/jansel --- test/inductor/test_unbacked_symints.py | 27 ++++++++++++++ torch/_inductor/codegen/simd.py | 9 +++-- torch/_inductor/codegen/triton.py | 34 +++++++++++++++--- .../_inductor/codegen/triton_combo_kernel.py | 36 ++++++++++++++++--- torch/_inductor/codegen/wrapper.py | 5 ++- 5 files changed, 99 insertions(+), 12 deletions(-) diff --git a/test/inductor/test_unbacked_symints.py b/test/inductor/test_unbacked_symints.py index cca1cb6a6dabb..cc9c1251523da 100644 --- a/test/inductor/test_unbacked_symints.py +++ b/test/inductor/test_unbacked_symints.py @@ -564,6 +564,33 @@ def fn(x): expected = fn(*example_inputs) torch.testing.assert_close(actual, expected) + @skipGPUIf(not HAS_GPU, "requires gpu and triton") + @dynamo_config.patch({"capture_dynamic_output_shape_ops": True}) + @inductor_config.patch({"combo_kernels": True, "benchmark_combo_kernel": True}) + def test_combo_kernel_size_hint_failure(self, device): + # A size hint failure is "TypeError: Cannot convert symbols to int" + if device == "cpu": + raise unittest.SkipTest("Combo kernels must be for GPU.") + + def fn(x): + nz = torch.nonzero(x) + u0 = nz.size(0) + t1 = torch.ones(u0, device=device) + t2 = torch.zeros(u0 + 1, device=device) + t3 = torch.zeros(u0 * 2, device=device) + t4 = torch.zeros(u0 - x.size(0), device=device) + out1 = t1 - 1 + out2 = t2 + 2 + out3 = t3 * 3 + out4 = t4 / 4 + return out1, out2, out3, out4 + + example_inputs = (torch.randn(32, device=device, dtype=torch.float16),) + torch._dynamo.mark_dynamic(example_inputs[0], 0) + actual = torch.compile(fn, fullgraph=True)(*example_inputs) + expected = fn(*example_inputs) + torch.testing.assert_close(actual, expected) + instantiate_device_type_tests(TestUnbackedSymints, globals(), allow_xpu=True) diff --git a/torch/_inductor/codegen/simd.py b/torch/_inductor/codegen/simd.py index d73db7ed2a227..6b1a8b5966d57 100644 --- a/torch/_inductor/codegen/simd.py +++ b/torch/_inductor/codegen/simd.py @@ -1010,7 +1010,10 @@ def estimate_kernel_num_bytes(self): # for the "cat". However, I think it might be a bit overwhelming that # we add such complexity only for handling some particular cases for # benchmarking. - out_numel = V.graph.sizevars.size_hint(sympy_product(self.numels.values())) + out_numel = V.graph.sizevars.size_hint( + sympy_product(self.numels.values()), + fallback=config.unbacked_symint_fallback, + ) for i, arg in enumerate(call_args): # "buf" may be narrowed. In this case, the number of memory accesses # should be estimated based on the reinterpreted layout. @@ -1021,7 +1024,9 @@ def estimate_kernel_num_bytes(self): nbytes.append(0) continue arg_numel = V.graph.get_numel(arg) - buf_size = V.graph.sizevars.size_hint(arg_numel) + buf_size = V.graph.sizevars.size_hint( + arg_numel, fallback=config.unbacked_symint_fallback + ) if buf_size > out_numel: # This arg points to a buf that has been sliced. # We need to count each individual slice to have diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py index 175ea55ec3af2..eafd9d8c64df9 100644 --- a/torch/_inductor/codegen/triton.py +++ b/torch/_inductor/codegen/triton.py @@ -3868,9 +3868,15 @@ def kernel_benchmark_extra_args(self) -> list[str]: if isinstance(arg, int): args.append(str(arg)) elif isinstance(arg, SymbolicCallArg): - args.append(str(V.graph.sizevars.size_hint(arg.inner_expr))) + hint = V.graph.sizevars.size_hint( + arg.inner_expr, fallback=config.unbacked_symint_fallback + ) + args.append(str(hint)) elif isinstance(arg, sympy.Expr): - args.append(str(V.graph.sizevars.size_hint(arg))) + hint = V.graph.sizevars.size_hint( + arg, fallback=config.unbacked_symint_fallback + ) + args.append(str(hint)) else: raise ValueError(f"Unsupported numel argument type: {type(arg)}") return args @@ -3887,14 +3893,34 @@ def codegen_kernel_benchmark(self, num_gb): var_name = f"arg_{next(name_cnt)}" buf = V.graph.try_get_buffer(arg_name) if buf: + size = V.graph.sizevars.size_hints( + buf.get_size(), + hint_override=self.hint_override, + fallback=config.unbacked_symint_fallback, + ) + stride = V.graph.sizevars.size_hints( + buf.get_stride(), + hint_override=self.hint_override, + fallback=config.unbacked_symint_fallback, + ) result.writeline( - f"{var_name} = rand_strided({V.graph.sizevars.size_hints(buf.get_size(), hint_override=self.hint_override)}, {V.graph.sizevars.size_hints(buf.get_stride(), hint_override=self.hint_override)}, device='{buf.get_device()}', dtype={buf.get_dtype()})" # noqa: B950 line too long + f"{var_name} = rand_strided({size}, {stride}, device='{buf.get_device()}', dtype={buf.get_dtype()})" # noqa: B950 line too long ) elif arg_name in V.graph.constants: # note that random seed is put in V.graph.constants const_tensor = V.graph.constants[arg_name] + size = V.graph.sizevars.size_hints( + const_tensor.size(), + hint_override=self.hint_override, + fallback=config.unbacked_symint_fallback, + ) + stride = V.graph.sizevars.size_hints( + const_tensor.stride(), + hint_override=self.hint_override, + fallback=config.unbacked_symint_fallback, + ) result.writeline( - f"{var_name} = rand_strided({V.graph.sizevars.size_hints(const_tensor.size(), hint_override=self.hint_override)}, {V.graph.sizevars.size_hints(const_tensor.stride(), hint_override=self.hint_override)}, device='{const_tensor.device}', dtype={const_tensor.dtype})" # type: ignore[arg-type] # noqa: B950 line too long + f"{var_name} = rand_strided({size}, {stride}, device='{const_tensor.device}', dtype={const_tensor.dtype})" # type: ignore[arg-type] # noqa: B950 line too long ) elif isinstance(arg_sig, SizeArg): symval_hint = V.graph.sizevars.size_hint(arg_sig.expr) diff --git a/torch/_inductor/codegen/triton_combo_kernel.py b/torch/_inductor/codegen/triton_combo_kernel.py index dc2392119cc51..c9669660da1d1 100644 --- a/torch/_inductor/codegen/triton_combo_kernel.py +++ b/torch/_inductor/codegen/triton_combo_kernel.py @@ -90,7 +90,10 @@ def _default_custom_combo_kernel_horizontal_partition( long_reduction = [ n for n in reduction - if V.graph.sizevars.size_hint(n.group[-1][-1]) > 2048 # type: ignore[arg-type] + if ( + V.graph.sizevars.shape_env.has_hint(n.group[-1][-1]) + and V.graph.sizevars.size_hint(n.group[-1][-1]) > 2048 # type: ignore[arg-type] + ) ] short_reduction = [n for n in reduction if n not in long_reduction] if long_reduction: @@ -103,6 +106,7 @@ def _default_custom_combo_kernel_horizontal_partition( for n in not_reduction if not kernel_map[n].inside_reduction and len(kernel_map[n].numels) == 2 + and V.graph.sizevars.shape_env.has_hint(kernel_map[n].numels["x"]) and V.graph.sizevars.size_hint(kernel_map[n].numels["x"]) > LARGE_NUMELS ] if large_pointwise: @@ -485,7 +489,11 @@ def min_x_blocks_sub_kernel(self, sub_kernel: TritonKernel, num: int) -> None: def select_heuristics(self, sub_kernel: TritonKernel) -> tuple[str, dict[str, int]]: size_hints = { - prefix: next_power_of_2(V.graph.sizevars.size_hint(numel)) + prefix: next_power_of_2( + V.graph.sizevars.size_hint( + numel, fallback=config.unbacked_symint_fallback + ) + ) for prefix, numel in sub_kernel.numels.items() if not prefix_is_reduction(prefix) or sub_kernel.inside_reduction } @@ -726,7 +734,13 @@ def kernel_benchmark_extra_args(self) -> list[str]: if numel_name not in self.dynamic_shape_args: continue if not tree.is_reduction or sub_kernel.inside_reduction: - extra_args.append(str(V.graph.sizevars.size_hint(tree.numel))) + extra_args.append( + str( + V.graph.sizevars.size_hint( + tree.numel, fallback=config.unbacked_symint_fallback + ) + ) + ) return extra_args def codegen_kernel(self, name: Optional[str] = None) -> str: @@ -810,14 +824,26 @@ def codegen_kernel_benchmark(self, num_gb: float) -> IndentedBuffer: var_name = f"arg_{next(name_cnt)}" buf = V.graph.try_get_buffer(arg_name) if buf: + size = V.graph.sizevars.size_hints( + buf.get_size(), fallback=config.unbacked_symint_fallback + ) + stride = V.graph.sizevars.size_hints( + buf.get_stride(), fallback=config.unbacked_symint_fallback + ) result.writeline( - f"{var_name} = rand_strided({V.graph.sizevars.size_hints(buf.get_size())}, {V.graph.sizevars.size_hints(buf.get_stride())}, device='{buf.get_device()}', dtype={buf.get_dtype()})" # noqa: B950 line too long + f"{var_name} = rand_strided({size}, {stride}, device='{buf.get_device()}', dtype={buf.get_dtype()})" # noqa: B950 line too long ) elif arg_name in V.graph.constants: # note that random seed is put in V.graph.constants const_tensor = V.graph.constants[arg_name] + size = V.graph.sizevars.size_hints( + const_tensor.size(), fallback=config.unbacked_symint_fallback + ) + stride = V.graph.sizevars.size_hints( + const_tensor.stride(), fallback=config.unbacked_symint_fallback + ) result.writeline( - f"{var_name} = rand_strided({V.graph.sizevars.size_hints(const_tensor.size())}, {V.graph.sizevars.size_hints(const_tensor.stride())}, device='{const_tensor.device}', dtype={const_tensor.dtype})" # type: ignore[arg-type] # noqa: B950 line too long + f"{var_name} = rand_strided({size}, {stride}, device='{const_tensor.device}', dtype={const_tensor.dtype})" # type: ignore[arg-type] # noqa: B950 line too long ) elif isinstance(arg_sig, SizeArg): symval_hint = V.graph.sizevars.size_hint(arg_sig.expr) diff --git a/torch/_inductor/codegen/wrapper.py b/torch/_inductor/codegen/wrapper.py index 9d1b82d9b9334..53037195b0577 100644 --- a/torch/_inductor/codegen/wrapper.py +++ b/torch/_inductor/codegen/wrapper.py @@ -2385,7 +2385,10 @@ def generate_numel_expr(self, kernel_name: str, tree, suffix: Optional[str] = No # constant now, need type info. I agree, this needs type info, and while this is not true type info # it suffices as a type hint for the purposes of producing the correct code for this type. arg = SymbolicCallArg(sym, tree.numel) - self.writeline(SymbolicCallArgLine(self, arg, V.graph)) + + is_benchmark_kernel = kernel_name == "" + if not is_benchmark_kernel: + self.writeline(SymbolicCallArgLine(self, arg, V.graph)) return arg From 348303ebd2c196db47b8f83301f41015f7b52964 Mon Sep 17 00:00:00 2001 From: Colin Peppler Date: Wed, 10 Sep 2025 10:28:37 -0700 Subject: [PATCH 064/693] [ez] add docstring/typing for codegen_kernel_benchmark (#162609) ``` lintrunner init && lintrunner -m origin/main ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162609 Approved by: https://github.com/coconutruben ghstack dependencies: #162442 --- torch/_inductor/codegen/triton.py | 12 +++++++++++- torch/_inductor/codegen/triton_combo_kernel.py | 10 ++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py index eafd9d8c64df9..cdf5b5d3fcb32 100644 --- a/torch/_inductor/codegen/triton.py +++ b/torch/_inductor/codegen/triton.py @@ -3881,7 +3881,17 @@ def kernel_benchmark_extra_args(self) -> list[str]: raise ValueError(f"Unsupported numel argument type: {type(arg)}") return args - def codegen_kernel_benchmark(self, num_gb): + def codegen_kernel_benchmark(self, num_gb: Optional[float]) -> IndentedBuffer: + """ + Generates Python code for benchmarking this Triton kernel. + - Creates example inputs (random tensors, constants, sizes). + - Runs the kernel on the current GPU/stream. + - Prints runtime (ms) and throughput (GB/s) using `num_gb`. + Args: + num_gb (float): The number of gigabytes to use for throughput calculation. + Returns: + IndentedBuffer: A buffer containing the generated Python benchmark code. + """ result = IndentedBuffer() _argdefs, call_args, signature, _ = self.args.python_argdefs() diff --git a/torch/_inductor/codegen/triton_combo_kernel.py b/torch/_inductor/codegen/triton_combo_kernel.py index c9669660da1d1..e3df5bc0363d2 100644 --- a/torch/_inductor/codegen/triton_combo_kernel.py +++ b/torch/_inductor/codegen/triton_combo_kernel.py @@ -814,6 +814,16 @@ def codegen_kernel(self, name: Optional[str] = None) -> str: return code.getvalue() def codegen_kernel_benchmark(self, num_gb: float) -> IndentedBuffer: + """ + Generates Python code for benchmarking this combo kernel. + - Creates example inputs (random tensors, constants, sizes). + - Runs the kernel on the current GPU/stream. + - Prints runtime (ms) and throughput (GB/s) using `num_gb`. + Args: + num_gb (float): The number of gigabytes to use for throughput calculation. + Returns: + IndentedBuffer: A buffer containing the generated Python benchmark code. + """ result = IndentedBuffer() _argdefs, call_args, signature, _ = self.args.python_argdefs() result.writelines(["", "", "def get_args():"]) From 40ea6e418a324ef8ca34e85176dec1a496621f11 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 10 Sep 2025 20:51:30 +0000 Subject: [PATCH 065/693] Revert "Fix decorators skipping NCCL tests (#158846)" This reverts commit c2388201fc85b0748173212de5a17514c7a71f21. Reverted https://github.com/pytorch/pytorch/pull/158846 on behalf of https://github.com/huydhn due to Sorry for reverting your change but it is failing some inductor tests ([comment](https://github.com/pytorch/pytorch/pull/158846#issuecomment-3276471387)) --- .../fsdp/test_fully_shard_logging.py | 6 +- test/distributed/test_functional_api.py | 25 ++++-- torch/testing/_internal/common_distributed.py | 89 +++++++++++++------ .../_shard/sharded_tensor/__init__.py | 9 +- .../distributed/_tensor/common_dtensor.py | 7 +- .../_internal/distributed/distributed_test.py | 18 ++-- 6 files changed, 103 insertions(+), 51 deletions(-) diff --git a/test/distributed/_composable/fsdp/test_fully_shard_logging.py b/test/distributed/_composable/fsdp/test_fully_shard_logging.py index 9b666eb55ba08..c9450a2b8f475 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_logging.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_logging.py @@ -1,7 +1,7 @@ # Owner(s): ["module: fsdp"] import functools import os -import unittest +import unittest.mock import torch.distributed as dist from torch._dynamo.test_case import run_tests @@ -37,9 +37,9 @@ def test_fsdp_logging(self): import torch.distributed as dist import torch.nn as nn from torch.distributed.fsdp import fully_shard -logger = logging.getLogger("torch.distributed.fsdp.fully_shard") +logger = logging.getLogger("torch.distributed._composable.fsdp") logger.setLevel(logging.DEBUG) -device = '{device_type.type}' +device = {device_type.type} torch.manual_seed(0) model = nn.Sequential(*[nn.Linear(4, 4, device=device, bias=False) for _ in range(2)]) for layer in model: diff --git a/test/distributed/test_functional_api.py b/test/distributed/test_functional_api.py index a21eb0dbf4447..b5522fe2bef06 100644 --- a/test/distributed/test_functional_api.py +++ b/test/distributed/test_functional_api.py @@ -13,7 +13,6 @@ from torch._inductor.utils import run_and_get_code from torch.testing import FileCheck from torch.testing._internal.common_device_type import instantiate_device_type_tests -from torch.testing._internal.common_distributed import exit_if_lt_x_accelerators from torch.testing._internal.inductor_utils import HAS_GPU @@ -25,7 +24,7 @@ DistributedTestBase, MultiThreadedTestCase, requires_accelerator_dist_backend, - skip_if_no_gpu, + TEST_SKIPS, ) from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, @@ -480,14 +479,25 @@ def allred_mesh_dim(input): BACKEND = dist.Backend.XCCL +# allows you to check for multiple accelerator irrespective of device type +# to add new device types to this check simply follow the same format +# and append an elif with the conditional and appropriate device count function for your new device +def exit_if_lt_x_accelerators(x): + if torch.accelerator.is_available(): + if torch.accelerator.device_count() < x: + sys.exit(TEST_SKIPS[f"multi-accelerator-{x}"].exit_code) + + def with_comms(func=None): if func is None: return partial(with_comms) @wraps(func) def wrapper(self, *args, **kwargs): - if BACKEND in (dist.Backend.NCCL, dist.Backend.XCCL): - exit_if_lt_x_accelerators(self.world_size) + if ( + BACKEND == dist.Backend.NCCL or BACKEND == dist.Backend.XCCL + ) and torch.accelerator.device_count() < self.world_size: + sys.exit(TEST_SKIPS[f"multi-gpu-{self.world_size}"].exit_code) kwargs["device"] = DEVICE self.pg = self.create_pg(device=DEVICE) @@ -500,9 +510,9 @@ def wrapper(self, *args, **kwargs): class TestCollectivesWithDistributedBackend(DistributedTestBase): - @skip_if_no_gpu @with_comms() def test_all_gather_into_tensor_coalesced(self, device): + exit_if_lt_x_accelerators(self.world_size) tensors = [ torch.ones([4], device=device), torch.ones([4], device=device) + 1, @@ -574,8 +584,9 @@ def allreduce(t, pg): compiled_allreduce(torch.randn(8, device=device), self.pg) @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") - @skip_if_no_gpu def test_tracing_with_fakepg(self, device=DEVICE): + exit_if_lt_x_accelerators(self.world_size) + def allreduce(t, pg): return ft_c.all_reduce(t, "sum", pg) @@ -616,9 +627,9 @@ class TestDistributedBackendCollectivesWithWorldSize4( def world_size(self): return 4 - @skip_if_no_gpu @with_comms() def test_permute_tensor_with_sub_group(self, device): + exit_if_lt_x_accelerators(self.world_size) mesh_dim_names = ["dp", "tp"] mesh_2d = dt.init_device_mesh( diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py index d9d07dddea3d8..c1f75697fe889 100644 --- a/torch/testing/_internal/common_distributed.py +++ b/torch/testing/_internal/common_distributed.py @@ -118,26 +118,14 @@ def requires_ddp_rank(device): return device in DDP_RANK_DEVICES -def exit_if_lt_x_cuda_devs(x): - """Exit process unless at least the given number of CUDA devices are available""" - if torch.cuda.device_count() < x: - sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code) - - -# allows you to check for multiple accelerator irrespective of device type -# to add new device types to this check simply follow the same format -# and append an elif with the conditional and appropriate device count function for your new device -def exit_if_lt_x_accelerators(x): - if torch.accelerator.device_count() < x: - sys.exit(TEST_SKIPS[f"multi-accelerator-{x}"].exit_code) - - def skip_if_no_gpu(func): """Skips if the world size exceeds the number of GPUs, ensuring that if the test is run, each rank has its own GPU via ``torch.cuda.device(rank)``.""" @wraps(func) def wrapper(*args, **kwargs): + if not (TEST_CUDA or TEST_HPU or TEST_XPU): + sys.exit(TEST_SKIPS["no_cuda"].exit_code) world_size = int(os.environ["WORLD_SIZE"]) if TEST_CUDA and torch.cuda.device_count() < world_size: sys.exit(TEST_SKIPS[f"multi-gpu-{world_size}"].exit_code) @@ -148,9 +136,7 @@ def wrapper(*args, **kwargs): return func(*args, **kwargs) - return unittest.skipUnless( - TEST_CUDA or TEST_HPU or TEST_XPU, TEST_SKIPS["no_cuda"].message - )(wrapper) + return wrapper # TODO (kwen2501): what is the purpose of this decorator? Tests with this @@ -182,16 +168,33 @@ def wrapper(*args, **kwargs): def require_n_gpus_for_nccl_backend(n, backend): - return skip_if_lt_x_gpu(n) if backend == "nccl" else unittest.skipIf(False, None) + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + if backend == "nccl" and torch.cuda.device_count() < n: + sys.exit(TEST_SKIPS[f"multi-gpu-{n}"].exit_code) + else: + return func(*args, **kwargs) + + return wrapper + + return decorator def import_transformers_or_skip(): - try: - from transformers import AutoModelForMaskedLM, BertConfig # noqa: F401 + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + from transformers import AutoModelForMaskedLM, BertConfig # noqa: F401 - return unittest.skipIf(False) - except ImportError: - return unittest.skip(TEST_SKIPS["importerror"].message) + return func(*args, **kwargs) + except ImportError: + sys.exit(TEST_SKIPS["importerror"].exit_code) + + return wrapper + + return decorator def at_least_x_gpu(x): @@ -205,7 +208,36 @@ def at_least_x_gpu(x): def skip_if_lt_x_gpu(x): - return unittest.skipUnless(at_least_x_gpu(x), TEST_SKIPS[f"multi-gpu-{x}"].message) + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + if torch.cuda.is_available() and torch.cuda.device_count() >= x: + return func(*args, **kwargs) + if TEST_HPU and torch.hpu.device_count() >= x: + return func(*args, **kwargs) + if TEST_XPU and torch.xpu.device_count() >= x: + return func(*args, **kwargs) + sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code) + + return wrapper + + return decorator + + +# This decorator helps avoiding initializing cuda while testing other backends +def nccl_skip_if_lt_x_gpu(backend, x): + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + if backend != "nccl": + return func(*args, **kwargs) + if torch.cuda.is_available() and torch.cuda.device_count() >= x: + return func(*args, **kwargs) + sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code) + + return wrapper + + return decorator def verify_ddp_error_logged(model_DDP, err_substr): @@ -392,7 +424,14 @@ def requires_multicast_support(): def skip_if_rocm_multiprocess(func): """Skips a test for ROCm""" func.skip_if_rocm_multiprocess = True - return unittest.skipUnless(TEST_WITH_ROCM, TEST_SKIPS["skipIfRocm"].message)(func) + + @wraps(func) + def wrapper(*args, **kwargs): + if not TEST_WITH_ROCM: + return func(*args, **kwargs) + sys.exit(TEST_SKIPS["skipIfRocm"].exit_code) + + return wrapper def skip_if_win32(): diff --git a/torch/testing/_internal/distributed/_shard/sharded_tensor/__init__.py b/torch/testing/_internal/distributed/_shard/sharded_tensor/__init__.py index a0a38837c14b2..60c744ac1a84c 100644 --- a/torch/testing/_internal/distributed/_shard/sharded_tensor/__init__.py +++ b/torch/testing/_internal/distributed/_shard/sharded_tensor/__init__.py @@ -7,9 +7,8 @@ import torch.distributed as dist from torch.distributed import rpc from torch.testing._internal.common_distributed import ( - exit_if_lt_x_cuda_devs, MultiProcessTestCase, - require_n_gpus_for_nccl_backend, + TEST_SKIPS, tp_transports, ) @@ -95,10 +94,10 @@ def with_comms(func=None, init_rpc=True, backend="nccl"): @wraps(func) def wrapper(self, *args, **kwargs): - if backend == "nccl": - exit_if_lt_x_cuda_devs(self.world_size) + if backend == "nccl" and torch.cuda.device_count() < self.world_size: + sys.exit(TEST_SKIPS[f"multi-gpu-{self.world_size}"].exit_code) self.init_comms(init_rpc=init_rpc, backend=backend) func(self, *args, **kwargs) self.destroy_comms(destroy_rpc=init_rpc) - return require_n_gpus_for_nccl_backend(1, backend)(wrapper) + return wrapper diff --git a/torch/testing/_internal/distributed/_tensor/common_dtensor.py b/torch/testing/_internal/distributed/_tensor/common_dtensor.py index 9758fa5d1e7d3..e25e08fbf5090 100644 --- a/torch/testing/_internal/distributed/_tensor/common_dtensor.py +++ b/torch/testing/_internal/distributed/_tensor/common_dtensor.py @@ -3,6 +3,7 @@ # Copyright (c) Meta Platforms, Inc. and affiliates import itertools +import sys from collections.abc import Iterator, Sequence from dataclasses import dataclass from functools import partial, wraps @@ -30,12 +31,12 @@ SequenceParallel, ) from torch.testing._internal.common_distributed import ( - exit_if_lt_x_cuda_devs, MultiProcContinuousTest, MultiProcessTestCase, MultiThreadedTestCase, run_subtests, skip_if_lt_x_gpu, + TEST_SKIPS, ) from torch.testing._internal.common_utils import TEST_CUDA, TEST_HPU, TEST_XPU from torch.utils._pytree import tree_flatten, tree_unflatten, TreeSpec @@ -373,8 +374,8 @@ def build_device_mesh(self) -> DeviceMesh: return init_device_mesh(self.device_type, (self.world_size,)) def init_pg(self, eager_init, backend: Optional[str] = None) -> None: - if "nccl" in self.backend: - exit_if_lt_x_cuda_devs(self.world_size) + if "nccl" in self.backend and torch.cuda.device_count() < self.world_size: + sys.exit(TEST_SKIPS[f"multi-gpu-{self.world_size}"].exit_code) if backend is None: backend = self.backend diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py index 21d51b66ad03a..024fd47285ae8 100644 --- a/torch/testing/_internal/distributed/distributed_test.py +++ b/torch/testing/_internal/distributed/distributed_test.py @@ -59,10 +59,10 @@ captured_output, cleanup_temp_dir, DistTestCases, - exit_if_lt_x_cuda_devs, init_multigpu_helper, initialize_temp_directories, MultiProcessTestCase, + nccl_skip_if_lt_x_gpu, require_n_gpus_for_nccl_backend, requires_nccl_version, simple_sparse_reduce_tests, @@ -609,8 +609,10 @@ def _run(cls, rank, test_name, file_name, pipe, **kwargs): self.rank = rank self.file_name = file_name - if torch.cuda.is_available(): - exit_if_lt_x_cuda_devs(int(self.world_size)) + if torch.cuda.is_available() and torch.cuda.device_count() < int( + self.world_size + ): + sys.exit(TEST_SKIPS[f"multi-gpu-{self.world_size}"].exit_code) try: pg_timeout_seconds = CUSTOM_PG_TIMEOUT.get(test_name, default_pg_timeout) timeout = timedelta(seconds=pg_timeout_seconds) @@ -5342,7 +5344,7 @@ def step_model(model, input, target): BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", "get_future is only supported on mpi, nccl and gloo", ) - @require_n_gpus_for_nccl_backend(2, BACKEND) + @nccl_skip_if_lt_x_gpu(BACKEND, 2) def test_accumulate_gradients_no_sync(self): """ Runs _test_accumulate_gradients_no_sync using default inputs @@ -5353,7 +5355,7 @@ def test_accumulate_gradients_no_sync(self): BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", "get_future is only supported on mpi, nccl and gloo", ) - @require_n_gpus_for_nccl_backend(2, BACKEND) + @nccl_skip_if_lt_x_gpu(BACKEND, 2) def test_accumulate_gradients_no_sync_grad_is_view(self): """ Runs _test_accumulate_gradients_no_sync using default inputs @@ -5364,7 +5366,7 @@ def test_accumulate_gradients_no_sync_grad_is_view(self): BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", "get_future is only supported on mpi, nccl and gloo", ) - @require_n_gpus_for_nccl_backend(2, BACKEND) + @nccl_skip_if_lt_x_gpu(BACKEND, 2) def test_accumulate_gradients_no_sync_allreduce_hook(self): """ Runs multiple iterations on _test_accumulate_gradients_no_sync @@ -5392,7 +5394,7 @@ def allreduce_hook( BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", "get_future is only supported on mpi, nccl and gloo", ) - @require_n_gpus_for_nccl_backend(2, BACKEND) + @nccl_skip_if_lt_x_gpu(BACKEND, 2) def test_accumulate_gradients_no_sync_allreduce_with_then_hook(self): """ Runs multiple iterations on _test_accumulate_gradients_no_sync using allreduce @@ -5426,7 +5428,7 @@ def div(fut): BACKEND != "mpi" and BACKEND != "nccl" and BACKEND != "gloo", "get_future is only supported on mpi, nccl and gloo", ) - @require_n_gpus_for_nccl_backend(2, BACKEND) + @nccl_skip_if_lt_x_gpu(BACKEND, 2) def test_get_future(self): def mult(fut): return [t * 3 for t in fut.wait()] From 0663bdb12383b9717af49d58aed9d88de0dd0ecc Mon Sep 17 00:00:00 2001 From: atalman Date: Wed, 10 Sep 2025 20:58:41 +0000 Subject: [PATCH 066/693] Move inductor jobs 3.9->3.10 (#162323) Related to: https://github.com/pytorch/pytorch/issues/161167 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162323 Approved by: https://github.com/huydhn, https://github.com/Skylion007 --- .ci/docker/build.sh | 3 +-- .github/workflows/inductor-nightly.yml | 4 ++-- .github/workflows/inductor-perf-test-nightly-x86-zen.yml | 6 +++--- .github/workflows/inductor-perf-test-nightly-x86.yml | 6 +++--- .github/workflows/inductor-periodic.yml | 4 ++-- .github/workflows/inductor-unittest.yml | 4 ++-- .github/workflows/inductor.yml | 4 ++-- .github/workflows/operator_benchmark.yml | 6 +++--- .github/workflows/trunk.yml | 4 ++-- .../cpu_inductor_amp_freezing_torchbench_inference.csv | 4 ++-- .../cpu_inductor_freezing_torchbench_inference.csv | 4 ++-- .../cpu_inductor_torchbench_inference.csv | 4 ++-- .../dynamic_cpu_inductor_torchbench_inference.csv | 4 ++-- ..._autotune_inductor_amp_freezing_torchbench_inference.csv | 4 ++-- .../rocm/aot_eager_torchbench_inference.csv | 4 ++-- .../rocm/dynamic_aot_eager_torchbench_inference.csv | 4 ++-- .../rocm/dynamic_inductor_torchbench_inference.csv | 4 ++-- .../rocm/dynamo_eager_torchbench_inference.csv | 4 ++-- .../rocm/inductor_torchbench_inference.csv | 4 ++-- 19 files changed, 40 insertions(+), 41 deletions(-) diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 89967cef96b12..be85fdcb542d0 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -214,8 +214,7 @@ case "$tag" in TRITON=yes ;; pytorch-linux-jammy-py3-gcc11-inductor-benchmarks) - # TODO (huydhn): Upgrade this to Python >= 3.10 - ANACONDA_PYTHON_VERSION=3.9 + ANACONDA_PYTHON_VERSION=3.10 GCC_VERSION=11 VISION=yes KATEX=yes diff --git a/.github/workflows/inductor-nightly.yml b/.github/workflows/inductor-nightly.yml index fe0f102406b6a..78602e05586b7 100644 --- a/.github/workflows/inductor-nightly.yml +++ b/.github/workflows/inductor-nightly.yml @@ -37,7 +37,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" test-matrix: | @@ -56,7 +56,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: nightly-dynamo-benchmarks-build with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image: ${{ needs.nightly-dynamo-benchmarks-build.outputs.docker-image }} test-matrix: ${{ needs.nightly-dynamo-benchmarks-build.outputs.test-matrix }} timeout-minutes: 720 diff --git a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml index 170de752ab875..a9a839df61af2 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml @@ -75,7 +75,7 @@ jobs: needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -101,7 +101,7 @@ jobs: needs: inductor-build if: github.event.schedule == '0 7 * * *' with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} @@ -118,7 +118,7 @@ jobs: needs: inductor-build if: github.event_name == 'workflow_dispatch' with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }} docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} diff --git a/.github/workflows/inductor-perf-test-nightly-x86.yml b/.github/workflows/inductor-perf-test-nightly-x86.yml index f894b8fdc6e03..0533184df2e0e 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86.yml @@ -80,7 +80,7 @@ jobs: needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -107,7 +107,7 @@ jobs: needs: inductor-build if: github.event.schedule == '0 7 * * *' with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} @@ -124,7 +124,7 @@ jobs: needs: inductor-build if: github.event_name == 'workflow_dispatch' with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-freezing-${{ inputs.freezing }} docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml index 21d965eaeaada..e2395087326a2 100644 --- a/.github/workflows/inductor-periodic.yml +++ b/.github/workflows/inductor-periodic.yml @@ -154,7 +154,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" test-matrix: | @@ -200,7 +200,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: periodic-dynamo-benchmarks-cpu-build with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/inductor-unittest.yml b/.github/workflows/inductor-unittest.yml index 2125a8559363b..6ab276a57fc4d 100644 --- a/.github/workflows/inductor-unittest.yml +++ b/.github/workflows/inductor-unittest.yml @@ -110,7 +110,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" test-matrix: | @@ -127,7 +127,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: inductor-cpu-build with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 4189d24a7b14f..2616141c0dc2a 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -79,7 +79,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" test-matrix: | @@ -101,7 +101,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: inductor-cpu-build with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/operator_benchmark.yml b/.github/workflows/operator_benchmark.yml index aaf32c160f0dc..dd262d31b8fc2 100644 --- a/.github/workflows/operator_benchmark.yml +++ b/.github/workflows/operator_benchmark.yml @@ -29,7 +29,7 @@ jobs: name: opbenchmark-build uses: ./.github/workflows/_linux-build.yml with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -42,7 +42,7 @@ jobs: name: opbenchmark-on-demand-build uses: ./.github/workflows/_linux-build.yml with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -55,7 +55,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: opbenchmark-build with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image: ${{ needs.opbenchmark-build.outputs.docker-image }} test-matrix: ${{ needs.opbenchmark-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 4dd465d70803d..5b1a12812003f 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -240,7 +240,7 @@ jobs: needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-jammy-py3.9-gcc11 + build-environment: linux-jammy-py3.10-gcc11 docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -255,7 +255,7 @@ jobs: - verify-cachebench-cpu-build - target-determination with: - build-environment: linux-jammy-py3.9-gcc11 + build-environment: linux-jammy-py3.10-gcc11 docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv index e68aa2fa5351f..a4dbaeb7b546d 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,5 +doctr_det_predictor,pass,3 -doctr_reco_predictor,pass,4 +doctr_reco_predictor,pass,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv index aec659fdcd654..885029ba8c56e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,5 +doctr_det_predictor,pass,3 -doctr_reco_predictor,pass,4 +doctr_reco_predictor,pass,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv index 4f2eec1493520..aa7a3161afcc6 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,5 +doctr_det_predictor,pass,3 -doctr_reco_predictor,pass,4 +doctr_reco_predictor,pass,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv index c8db4d5823203..f26dea6f692ef 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv @@ -82,11 +82,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,5 +doctr_det_predictor,pass,3 -doctr_reco_predictor,pass,4 +doctr_reco_predictor,pass,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv index f4c9ffddd9974..39149853947c3 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,5 +doctr_det_predictor,pass,3 -doctr_reco_predictor,pass,4 +doctr_reco_predictor,pass,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv index 6f316b219bb92..bf70642a855ef 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,5 +doctr_det_predictor,eager_fail_to_run,3 -doctr_reco_predictor,eager_fail_to_run,4 +doctr_reco_predictor,eager_fail_to_run,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv index 4b5138ce9c367..e019365ccbfdb 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,5 +doctr_det_predictor,eager_fail_to_run,3 -doctr_reco_predictor,eager_fail_to_run,4 +doctr_reco_predictor,eager_fail_to_run,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv index a3fc7cf192371..fed8ebded682c 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,5 +doctr_det_predictor,eager_fail_to_run,3 -doctr_reco_predictor,eager_fail_to_run,4 +doctr_reco_predictor,eager_fail_to_run,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv index 6f316b219bb92..bf70642a855ef 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,5 +doctr_det_predictor,eager_fail_to_run,3 -doctr_reco_predictor,eager_fail_to_run,4 +doctr_reco_predictor,eager_fail_to_run,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv index 8ccf95da9659e..014e23e41cb31 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,5 +doctr_det_predictor,eager_fail_to_run,3 -doctr_reco_predictor,eager_fail_to_run,4 +doctr_reco_predictor,eager_fail_to_run,1 From 35d7b321597ed00245aad533a8fa6b7fdadd73ea Mon Sep 17 00:00:00 2001 From: Gabriel Ferns Date: Wed, 10 Sep 2025 21:19:09 +0000 Subject: [PATCH 067/693] Improve device info with new flops and bandwidth formula based on hardware libraries (#162245) Previously, DeviceInfo provided theoretical hardware information based on a hardcoded list manually created from various datasheets. This update: - Attempting to gather the information from a hardware library like `pynvml`, improving accuracy and expanding support to devices that don't have entries in the datasheet list. - Adjusts flops and bw calculation based on these hardware values. For example, if the the memory or SMs are underclocked, it adjusts the theoretical max flops/bw accordingly. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162245 Approved by: https://github.com/v0i0, https://github.com/shunting314 --- test/inductor/test_analysis.py | 3 - test/inductor/test_device_info.py | 697 +++++++++++++++++++ torch/_inductor/analysis/device_info.py | 389 ++++++++++- torch/_inductor/analysis/profile_analysis.py | 4 +- torch/_inductor/utils.py | 6 +- 5 files changed, 1075 insertions(+), 24 deletions(-) create mode 100644 test/inductor/test_device_info.py diff --git a/test/inductor/test_analysis.py b/test/inductor/test_analysis.py index 55f5bec86c539..6e458b1d73b45 100644 --- a/test/inductor/test_analysis.py +++ b/test/inductor/test_analysis.py @@ -289,14 +289,12 @@ def test_diff(self, device, dtype): om = _test_model(device, dtype) REPEAT = 5 trace1, trace2 = trace_files() - print(f"first trace {trace1}") torch._dynamo.reset() # reset the cache with fresh_inductor_cache(): with torch.profiler.profile(record_shapes=True) as p: om() p.export_chrome_trace(trace1) - print(f"second trace {trace2}") torch._dynamo.reset() # reset the cache with fresh_inductor_cache(): with torch.profiler.profile(record_shapes=True) as p: @@ -304,7 +302,6 @@ def test_diff(self, device, dtype): om() p.export_chrome_trace(trace2) - print("diffing...") with patch( "sys.argv", [ diff --git a/test/inductor/test_device_info.py b/test/inductor/test_device_info.py new file mode 100644 index 0000000000000..21db7e0984c8b --- /dev/null +++ b/test/inductor/test_device_info.py @@ -0,0 +1,697 @@ +# Owner(s): ["module: inductor"] + +import unittest +from unittest.mock import MagicMock, patch + +import torch +from torch._inductor.analysis.device_info import ( + _get_amd_smi, + _get_pynvml, + datasheet_tops, + DeviceInfo, + DeviceSpec, + lookup_device_info, +) +from torch.testing._internal.common_utils import run_tests, TestCase + + +class TestDeviceInfo(TestCase): + def test_lookup_device_info(self): + h100_info = lookup_device_info("NVIDIA H100") + self.assertIsNotNone(h100_info) + if h100_info is not None: + self.assertEqual(h100_info.dram_gb, 80) + self.assertIn(torch.float32, h100_info.tops) + + unknown_info = lookup_device_info("Unknown Device") + self.assertIsNone(unknown_info) + + def test_datasheet_tops_function(self): + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + ): + mock_get_device_name.return_value = "NVIDIA H100" + tops = datasheet_tops(torch.float32) + self.assertIsNotNone(tops) + self.assertEqual(tops, 67.0) + + tops_tf32 = datasheet_tops(torch.float32, is_tf32=True) + self.assertEqual(tops_tf32, 989.0) + + mock_get_device_name.return_value = "Unknown Device" + tops_unknown = datasheet_tops(torch.float32) + self.assertIsNone(tops_unknown) + + mock_get_device_name.return_value = None + tops_no_device = datasheet_tops(torch.float32) + self.assertIsNone(tops_no_device) + + @unittest.skipIf(torch.version.hip, "only nvidia") + def test_lazy_pynvml_import(self): + """Test pynvml import through torch.cuda.""" + with ( + patch("torch.cuda._HAS_PYNVML", True), + patch.object(torch.cuda, "pynvml", MagicMock(), create=True) as mock_pynvml, + ): + pynvml = _get_pynvml() + self.assertEqual(pynvml, mock_pynvml) + + with patch("torch.cuda._HAS_PYNVML", False): + pynvml = _get_pynvml() + self.assertIsNone(pynvml) + + @patch("torch.version.hip", None) + @patch("torch._inductor.analysis.device_info._get_pynvml") + def test_hardware_lookup_clock_hz_success(self, mock_get_pynvml): + mock_pynvml = MagicMock() + mock_pynvml.nvmlInit = MagicMock() + mock_pynvml.nvmlDeviceGetHandleByIndex.return_value = "mock_handle" + mock_pynvml.nvmlDeviceGetMaxClockInfo.return_value = 1500 + mock_pynvml.NVML_CLOCK_SM = "clock_key" + mock_pynvml.nvmlShutdown = MagicMock() + mock_get_pynvml.return_value = mock_pynvml + + result = DeviceInfo._hardware_lookup_clock_hz() + self.assertEqual(result, 1500 * 1e6) + + @unittest.skipIf(torch.version.hip, "only nvidia") + def test_lazy_pynvml_import_caching(self): + """Test pynvml caching through torch.cuda (now handled by torch.cuda module).""" + with ( + patch("torch.cuda._HAS_PYNVML", True), + patch.object(torch.cuda, "pynvml", MagicMock(), create=True) as mock_pynvml, + ): + pynvml1 = _get_pynvml() + self.assertEqual(pynvml1, mock_pynvml) + + pynvml2 = _get_pynvml() + self.assertEqual(pynvml2, mock_pynvml) + + self.assertEqual(pynvml1, pynvml2) + + def test_hardware_lookup_exception_handling(self): + with ( + patch("torch.version.hip", None), + patch( + "torch.cuda.get_device_properties", side_effect=Exception("CUDA Error") + ), + patch( + "torch._inductor.analysis.device_info._get_pynvml" + ) as mock_get_pynvml, + ): + mock_pynvml = MagicMock() + mock_pynvml.nvmlInit.side_effect = Exception("NVML Error") + mock_get_pynvml.return_value = mock_pynvml + + # Test direct hardware lookup methods, not the generic lookup methods + result = DeviceInfo._hardware_lookup_sm_count() + self.assertIsNone(result) + + result = DeviceInfo._hardware_lookup_clock_hz() + self.assertIsNone(result) + + def test_device_mapping_aliases(self): + mi300x_direct = lookup_device_info("AMD MI300X") + mi300x_alias = lookup_device_info("AMD INSTINCT MI300X") + self.assertEqual(mi300x_direct, mi300x_alias) + + mi210x_direct = lookup_device_info("AMD MI210X") + mi210x_alias = lookup_device_info("AMD INSTINCT MI210X") + self.assertEqual(mi210x_direct, mi210x_alias) + + def test_lazy_amd_smi_import_success(self): + """Test AMD SMI import through torch.cuda.""" + with patch("torch.cuda._HAS_PYNVML", False): + amd_smi = _get_amd_smi() + self.assertIsNone(amd_smi) + + def test_lazy_amd_smi_import_caching(self): + """Test AMD SMI caching through torch.cuda.""" + # Test consistent behavior across multiple calls + with patch("torch.cuda._HAS_PYNVML", True): + amd_smi1 = _get_amd_smi() + amd_smi2 = _get_amd_smi() + # Both should return the same result (None in this environment) + self.assertEqual(amd_smi1, amd_smi2) + + with patch("torch.cuda._HAS_PYNVML", False): + amd_smi1 = _get_amd_smi() + amd_smi2 = _get_amd_smi() + self.assertEqual(amd_smi1, amd_smi2) + self.assertIsNone(amd_smi1) + + def test_amd_device_mapping_entries(self): + """Test that AMD devices are properly represented in device mapping.""" + mi300x = lookup_device_info("AMD MI300X") + self.assertIsNotNone(mi300x) + if mi300x is not None: + self.assertEqual(mi300x.dram_gb, 192.0) + self.assertEqual(mi300x.dram_bw_gbs, 5300.0) + self.assertIn(torch.float32, mi300x.tops) + + mi300x_instinct = lookup_device_info("AMD INSTINCT MI300X") + self.assertEqual(mi300x, mi300x_instinct) + + mi300a = lookup_device_info("AMD MI300A") + self.assertIsNotNone(mi300a) + if mi300a is not None: + self.assertEqual(mi300a.dram_gb, 128.0) + self.assertEqual(mi300a.dram_bw_gbs, 5300.0) + + mi210x = lookup_device_info("AMD MI210X") + self.assertIsNotNone(mi210x) + if mi210x is not None: + self.assertEqual(mi210x.dram_gb, 64.0) + self.assertEqual(mi210x.dram_bw_gbs, 1600.0) + + mi210x_instinct = lookup_device_info("AMD INSTINCT MI210X") + self.assertEqual(mi210x, mi210x_instinct) + + def test_amd_integration_with_datasheet_tops(self): + """Test datasheet_tops function with AMD devices.""" + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + ): + mock_get_device_name.return_value = "AMD MI300X" + + tops_fp32 = datasheet_tops(torch.float32) + self.assertEqual(tops_fp32, 163.4) + + tops_fp16 = datasheet_tops(torch.float16) + self.assertEqual(tops_fp16, 1307.4) + + tops_bf16 = datasheet_tops(torch.bfloat16) + self.assertEqual(tops_bf16, 1307.4) + + tops_tf32 = datasheet_tops(torch.float32, is_tf32=True) + self.assertEqual(tops_tf32, 653.7) + + def test_flops_hardware_calculation(self): + """Test FLOPS calculation now uses datasheet values with clock adjustment.""" + with ( + patch.object(DeviceInfo, "lookup_clock_hz", return_value=1.5e9), + patch("torch.cuda.is_available", return_value=True), + patch("torch.cuda.get_device_name", return_value="AMD MI300X"), + ): + flops = DeviceInfo.lookup_tops( + device_name="AMD MI300X", dtype=torch.float32 + ) + # Now uses datasheet value (163.4 TOPS) with clock adjustment + # Device mapping has clock_hz=2100*1e6, so ratio = 1.5e9 / (2100*1e6) = ~0.714 + datasheet_flops = 163.4 * 1e12 + device_info = lookup_device_info("AMD MI300X") + if device_info and device_info.clock_hz: + clock_ratio = 1.5e9 / device_info.clock_hz + expected_flops = datasheet_flops * clock_ratio + else: + expected_flops = datasheet_flops + self.assertEqual(flops, expected_flops) + + def test_flops_datasheet_calculation(self): + """Test FLOPS calculation using datasheet TOPS.""" + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + patch.object( + DeviceInfo, "lookup_clock_hz", return_value=1.98e9 / 2 + ), # Use datasheet clock + ): + mock_get_device_name.return_value = "NVIDIA H100" + + flops = DeviceInfo.lookup_tops( + device_name="NVIDIA H100", dtype=torch.float32 + ) + expected_flops = 67.0 * 1e12 / 2 + self.assertEqual(flops, expected_flops) + + def test_flops_fallback_to_datasheet(self): + """Test FLOPS fallback to datasheet when hardware lookup fails.""" + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + patch.object( + DeviceInfo, "lookup_clock_hz", return_value=1.98e9 / 2 + ), # Use datasheet clock + ): + mock_get_device_name.return_value = "NVIDIA H100" + + flops = DeviceInfo.lookup_tops( + device_name="NVIDIA H100", dtype=torch.float32 + ) + expected_flops = 67.0 * 1e12 / 2 + self.assertEqual(flops, expected_flops) + + def test_flops_clock_adjustment_in_fallback(self): + """Test clock adjustment when falling back to datasheet.""" + custom_device_info = DeviceSpec( + memory_clock_hz=100, + tops={torch.float32: 100.0}, + dram_bw_gbs=1000.0, + dram_gb=16.0, + sm_count=None, + clock_hz=1.5e9, + ) + + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + patch( + "torch._inductor.analysis.device_info.lookup_device_info" + ) as mock_lookup, + ): + mock_get_device_name.return_value = "Custom Device" + mock_lookup.return_value = custom_device_info + + with patch.object( + DeviceInfo, "_hardware_lookup_clock_hz", return_value=3.0e9 + ): + flops = DeviceInfo.lookup_tops("Custom Device", dtype=torch.float32) + + datasheet_flops = 100.0 * 1e12 + clock_ratio = 3.0e9 / 1.5e9 + expected_flops = datasheet_flops * clock_ratio + self.assertEqual(flops, expected_flops) + + @patch("torch._inductor.analysis.device_info.lookup_device_info") + def test_flops_clock_adjustment_no_expected_clock(self, mock_lookup): + """Test fallback behavior when device mapping has None for clock_hz.""" + device_info = DeviceSpec( + memory_clock_hz=100, + tops={torch.float32: 100.0}, + dram_bw_gbs=1000.0, + dram_gb=16.0, + sm_count=None, + clock_hz=None, + ) + mock_lookup.return_value = device_info + + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + ): + mock_get_device_name.return_value = "NVIDIA H100" + + with patch.object( + DeviceInfo, "_hardware_lookup_clock_hz", return_value=3.0e9 + ): + flops = DeviceInfo.lookup_tops("NVIDIA H100", dtype=torch.float32) + + expected_flops = 100.0 * 1e12 + self.assertEqual(flops, expected_flops) + + def test_flops_clock_adjustment_none_clock(self): + """Test fallback behavior when clock lookup returns None.""" + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + ): + mock_get_device_name.return_value = "NVIDIA H100" + + with patch.object( + DeviceInfo, "_hardware_lookup_clock_hz", return_value=None + ): + flops = DeviceInfo.lookup_tops("NVIDIA H100", dtype=torch.float32) + + expected_flops = 67.0 * 1e12 + self.assertEqual(flops, expected_flops) + + def test_flops_no_device_name(self): + """Test FLOPS calculation when device name is unavailable.""" + with ( + patch("torch.cuda.get_device_name", return_value=None), + patch("torch.cuda.is_available", return_value=False), + ): + # When there's no device name and we force datasheet, it should return None + with patch( + "torch._inductor.analysis.device_info.datasheet_tops", return_value=None + ): + flops = DeviceInfo.lookup_tops("NVIDIA H100", dtype=torch.float32) + self.assertIsNone(flops) + + # When cuda is not available, hardware lookup is skipped and datasheet is used + flops = DeviceInfo.lookup_tops("NVIDIA H100", dtype=torch.float32) + self.assertIsNone( + flops + ) # Should be None since cuda.is_available() is False + + def test_flops_unknown_device(self): + """Test FLOPS calculation with unknown device.""" + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + ): + mock_get_device_name.return_value = "Unknown Device" + + flops = DeviceInfo.lookup_tops("Unknown Device", dtype=torch.float32) + # Should be None for unknown device + self.assertIsNone(flops) + + def test_flops_partial_hardware_values(self): + """Test FLOPS calculation with some hardware values missing.""" + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + patch.object( + DeviceInfo, "lookup_clock_hz", return_value=1.98e9 / 2 + ), # Use datasheet clock + ): + mock_get_device_name.return_value = "NVIDIA H100" + + flops = DeviceInfo.lookup_tops( + device_name="NVIDIA H100", dtype=torch.float32 + ) + expected_flops = 67.0 * 1e12 / 2 + self.assertEqual(flops, expected_flops) + + def test_flops_exception_handling(self): + """Test FLOPS calculation handles exceptions gracefully.""" + with ( + patch.object( + DeviceInfo, + "_hardware_lookup_sm_count", + side_effect=Exception("Hardware error"), + ), + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + patch.object( + DeviceInfo, "lookup_clock_hz", return_value=1.98e9 / 2 + ), # Use datasheet clock + ): + mock_get_device_name.return_value = "NVIDIA H100" + + flops = DeviceInfo.lookup_tops("NVIDIA H100", dtype=torch.float32) + expected_flops = 67.0 * 1e12 / 2 + self.assertEqual(flops, expected_flops) + + def test_flops_integration_with_hardware_lookup(self): + """Test FLOPS integration with datasheet values and clock adjustment.""" + dn = "NVIDIA H100" + + with ( + patch.object(DeviceInfo, "lookup_clock_hz", return_value=1500 * 1e6), + patch("torch.cuda.is_available", return_value=True), + patch("torch.cuda.get_device_name", return_value=dn), + ): + flops = DeviceInfo.lookup_tops(device_name=dn, dtype=torch.float32) + # Now uses datasheet value (67.0 TOPS) with clock adjustment + # Device mapping has clock_hz=1.98e9, so ratio = 1500*1e6 / 1.98e9 = ~0.7576 + datasheet_flops = 67.0 * 1e12 + device_info = lookup_device_info(dn) + if device_info and device_info.clock_hz: + clock_ratio = (1500 * 1e6) / device_info.clock_hz + expected_flops = datasheet_flops * clock_ratio + else: + expected_flops = datasheet_flops + self.assertEqual(flops, expected_flops) + + @unittest.skipIf( + True, "pynvml and amdsmi are not available in CI, run these tests locally" + ) + @unittest.skipIf(torch.version.hip, "only nvidia") + def test_pynvml_integration(self): + """Test direct pynvml library integration.""" + try: + import pynvml + + # Test basic NVML initialization and device access + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + + # Test clock frequency retrieval + sm_clock_mhz = pynvml.nvmlDeviceGetMaxClockInfo( + handle, pynvml.NVML_CLOCK_SM + ) + self.assertIsInstance(sm_clock_mhz, int) + self.assertGreater(sm_clock_mhz, 0) + + # Test memory clock frequency retrieval + mem_clock_mhz = pynvml.nvmlDeviceGetMaxClockInfo( + handle, pynvml.NVML_CLOCK_MEM + ) + self.assertIsInstance(mem_clock_mhz, int) + self.assertGreater(mem_clock_mhz, 0) + + # Test memory bus width retrieval + bus_width_bits = pynvml.nvmlDeviceGetMemoryBusWidth(handle) + self.assertIsInstance(bus_width_bits, int) + self.assertGreater(bus_width_bits, 0) + + # Test bandwidth calculation (same as device_info.py implementation) + mem_clock_hz = mem_clock_mhz * 1e6 + effective_rate = mem_clock_hz * 2 # GDDR uses DDR so *2 + peak_bw = (effective_rate * bus_width_bits) / 8 + peak_bw_gbs = peak_bw / (1024**3) + + self.assertIsInstance(peak_bw_gbs, float) + self.assertGreater(peak_bw_gbs, 0) + + pynvml.nvmlShutdown() + + except ImportError: + self.fail( + "pynvml library not available - install with 'pip install nvidia-ml-py'" + ) + except Exception as e: + self.fail(f"pynvml integration failed: {e}") + + @unittest.skipIf( + True, "pynvml and amdsmi are not available in CI, run these tests locally" + ) + @unittest.skipIf(not torch.version.hip, "only amd") + def test_amdsmi_integration(self): + """Test direct amdsmi library integration.""" + try: + import amdsmi + + # Test basic AMD SMI initialization + amdsmi.amdsmi_init() + + # Test device handle retrieval (matches current implementation) + device_handle = amdsmi.amdsmi_get_processor_handles()[0] + self.assertIsNotNone(device_handle) + + # Test GPU clock info retrieval (matches current implementation) + clock_info = amdsmi.amdsmi_get_clock_info( + device_handle, amdsmi.AmdSmiClkType.SYS + ) + self.assertTrue("max_clk" in clock_info) + self.assertIsInstance(clock_info["max_clk"], int) + self.assertGreater(clock_info["max_clk"], 0) + + # Test GPU memory clock info retrieval (matches current implementation) + mem_clock_info = amdsmi.amdsmi_get_clock_info( + device_handle, amdsmi.AmdSmiClkType.MEM + ) + self.assertTrue("max_clk" in mem_clock_info) + self.assertIsInstance(mem_clock_info["max_clk"], int) + self.assertGreater(mem_clock_info["max_clk"], 0) + + amdsmi.amdsmi_shut_down() + + except ImportError: + self.fail("amdsmi library not available - install AMD SMI") + except Exception as e: + self.fail(f"amdsmi integration failed: {e}") + + @unittest.skipIf( + True, "pynvml and amdsmi are not available in CI, run these tests locally" + ) + @unittest.skipIf(torch.version.hip, "only amd") + def test_pynvml_error_handling(self): + """Test pynvml error handling for invalid operations.""" + try: + import pynvml + + pynvml.nvmlInit() + + # Test invalid device index - should raise exception + with self.assertRaises(Exception): + pynvml.nvmlDeviceGetHandleByIndex(999) # Invalid index + + pynvml.nvmlShutdown() + + except ImportError: + self.skipTest("pynvml library not available") + + @unittest.skipIf( + True, "pynvml and amdsmi are not available in CI, run these tests locally" + ) + @unittest.skipIf(not torch.version.hip, "only nvidia") + def test_amd_smi_error_handling(self): + """Test AMD SMI error handling for invalid operations.""" + # Try amdsmi only + try: + import amdsmi + + amdsmi.amdsmi_init() + + # Test invalid device index - should raise exception + with self.assertRaises(Exception): + amdsmi.amdsmi_get_processor_handle(999) # Invalid index + + amdsmi.amdsmi_shut_down() + + except ImportError: + self.skipTest("amdsmi library not available") + + @unittest.skipIf(True, "amdsmi is not available in CI, run this test locally") + @unittest.skipIf(not torch.version.hip, "only amd") + def test_amd_hardware_lookup_clock_hz(self): + """Test the _amd_hardware_lookup_clock_hz function with real AMD hardware.""" + # Test the actual function directly + clock_hz = DeviceInfo._amd_hardware_lookup_clock_hz() + + self.assertIsInstance(clock_hz, float) + self.assertGreater(clock_hz, 0) + # Clock frequency should be reasonable (between 500MHz and 5GHz) + self.assertGreater(clock_hz, 50e6) + self.assertLess(clock_hz, 5e9) + # Should return frequency in Hz, not MHz + # Most AMD clocks are in GHz range, so check it's properly converted + self.assertGreater(clock_hz, 1e8) # At least 100MHz in Hz + + @unittest.skipIf(True, "amdsmi is not available in CI, run this test locally") + @unittest.skipIf(not torch.version.hip, "only amd") + def test_amd_hardware_lookup_memory_clock_hz(self): + """Test the _amd_hardware_lookup_memory_clock_hz function with real AMD hardware.""" + try: + memory_clock_hz = DeviceInfo._amd_hardware_lookup_memory_clock_hz() + + self.assertIsInstance(memory_clock_hz, float) + self.assertGreater(memory_clock_hz, 0) + # Memory clock frequency should be reasonable (between 500MHz and 10GHz) + self.assertGreater(memory_clock_hz, 500e6) + self.assertLess(memory_clock_hz, 10e9) + # Should return frequency in Hz, not MHz + # Most AMD memory clocks are in GHz range, so check it's properly converted + self.assertGreater(memory_clock_hz, 1e8) # At least 100MHz in Hz + + except ImportError: + self.fail("amdsmi library not available - install AMD SMI") + except Exception: + # If there's a hardware error or no AMD device, the function should + # handle it gracefully and return None rather than crash + self.assertIsNone(DeviceInfo._amd_hardware_lookup_memory_clock_hz()) + + def test_dram_bw_hardware_calculation(self): + """Test DRAM bandwidth calculation with memory clock adjustment.""" + with ( + patch.object(DeviceInfo, "lookup_memory_clock_hz", return_value=7e9), + patch("torch.cuda.is_available", return_value=True), + patch("torch.cuda.get_device_name", return_value="AMD MI300X"), + ): + dram_bw = DeviceInfo.lookup_dram_bw_gbs(device_name="AMD MI300X") + # Uses datasheet value (5300.0 GB/s) with memory clock adjustment + # Device mapping has memory_clock_hz=5200*1e6, so ratio = 7e9 / (5200*1e6) = ~1.346 + datasheet_bw = 5300.0 + device_info = lookup_device_info("AMD MI300X") + if device_info and device_info.memory_clock_hz: + memory_clock_ratio = 7e9 / device_info.memory_clock_hz + expected_bw = datasheet_bw * memory_clock_ratio + else: + expected_bw = datasheet_bw + self.assertEqual(dram_bw, expected_bw) + + def test_dram_bw_datasheet_calculation(self): + """Test DRAM bandwidth calculation using datasheet values.""" + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + patch.object( + DeviceInfo, "lookup_memory_clock_hz", return_value=1.4e10 / 2 + ), # Use half datasheet memory clock + ): + mock_get_device_name.return_value = "NVIDIA H100" + + dram_bw = DeviceInfo.lookup_dram_bw_gbs(device_name="NVIDIA H100") + expected_bw = 3350 / 2 # Datasheet bandwidth scaled by memory clock ratio + self.assertEqual(dram_bw, expected_bw) + + def test_dram_bw_fallback_to_datasheet(self): + """Test DRAM bandwidth fallback to datasheet when hardware lookup fails.""" + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + patch.object( + DeviceInfo, "lookup_memory_clock_hz", return_value=1.4e10 / 2 + ), # Use half datasheet memory clock + ): + mock_get_device_name.return_value = "NVIDIA H100" + + dram_bw = DeviceInfo.lookup_dram_bw_gbs(device_name="NVIDIA H100") + expected_bw = 3350 / 2 # Datasheet bandwidth scaled by memory clock ratio + self.assertEqual(dram_bw, expected_bw) + + def test_dram_bw_memory_clock_adjustment_in_fallback(self): + """Test memory clock adjustment when falling back to datasheet.""" + custom_device_info = DeviceSpec( + memory_clock_hz=2e9, + tops={torch.float32: 100.0}, + dram_bw_gbs=1000.0, + dram_gb=16.0, + sm_count=None, + clock_hz=1.5e9, + ) + + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + patch( + "torch._inductor.analysis.device_info.lookup_device_info" + ) as mock_lookup, + ): + mock_get_device_name.return_value = "Custom Device" + mock_lookup.return_value = custom_device_info + + with patch.object(DeviceInfo, "lookup_memory_clock_hz", return_value=4e9): + dram_bw = DeviceInfo.lookup_dram_bw_gbs("Custom Device") + + datasheet_bw = 1000.0 + memory_clock_ratio = 4e9 / 2e9 + expected_bw = datasheet_bw * memory_clock_ratio + self.assertEqual(dram_bw, expected_bw) + + @patch("torch._inductor.analysis.device_info.lookup_device_info") + def test_dram_bw_memory_clock_adjustment_no_expected_clock(self, mock_lookup): + """Test fallback behavior when device mapping has None for memory_clock_hz.""" + device_info = DeviceSpec( + memory_clock_hz=None, + tops={torch.float32: 100.0}, + dram_bw_gbs=1000.0, + dram_gb=16.0, + sm_count=None, + clock_hz=1.5e9, + ) + mock_lookup.return_value = device_info + + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + ): + mock_get_device_name.return_value = "NVIDIA H100" + + with patch.object(DeviceInfo, "lookup_memory_clock_hz", return_value=4e9): + dram_bw = DeviceInfo.lookup_dram_bw_gbs("NVIDIA H100") + + expected_bw = 1000.0 # No memory clock adjustment + self.assertEqual(dram_bw, expected_bw) + + def test_dram_bw_memory_clock_adjustment_none_clock(self): + """Test fallback behavior when memory clock lookup returns None.""" + with ( + patch("torch.cuda.get_device_name") as mock_get_device_name, + patch("torch.cuda.is_available", return_value=True), + ): + mock_get_device_name.return_value = "NVIDIA H100" + + with patch.object(DeviceInfo, "lookup_memory_clock_hz", return_value=None): + dram_bw = DeviceInfo.lookup_dram_bw_gbs("NVIDIA H100") + + expected_bw = 3350 # Datasheet value without adjustment + self.assertEqual(dram_bw, expected_bw) + + +if __name__ == "__main__": + run_tests() diff --git a/torch/_inductor/analysis/device_info.py b/torch/_inductor/analysis/device_info.py index 39d62392ebb73..76eb9d3dd4877 100644 --- a/torch/_inductor/analysis/device_info.py +++ b/torch/_inductor/analysis/device_info.py @@ -1,6 +1,8 @@ import logging +from collections.abc import Callable, Generator +from contextlib import contextmanager from dataclasses import dataclass -from typing import Optional, Union +from typing import Any, Optional, Union import torch @@ -8,32 +10,360 @@ log = logging.getLogger(__name__) +def _get_pynvml() -> Optional[Any]: + """Get pynvml from torch.cuda if available.""" + return getattr(torch.cuda, "pynvml", None) if torch.cuda._HAS_PYNVML else None + + +def _get_amd_smi() -> Optional[Any]: + """Get AMD SMI from torch.cuda if available.""" + return getattr(torch.cuda, "amdsmi", None) if torch.cuda._HAS_PYNVML else None + + +@contextmanager +def _device_library_context( + library_getter: Callable[[], Optional[Any]], + library_name: str, + init_method: str, + shutdown_method: str, +) -> Generator[Any, None, None]: + """ + Generic context manager for device library operations. + Handles initialization, exception catching, and cleanup. + + Args: + library_getter: Function that returns the library module or None + library_name: Name of the library for error messages + init_method: Name of the initialization method to call + shutdown_method: Name of the shutdown method to call + """ + library = library_getter() + if library is None: + raise RuntimeError(f"{library_name} not available") + + try: + getattr(library, init_method)() + yield library + finally: + try: + getattr(library, shutdown_method)() + except Exception: + pass + + +@contextmanager +def _nvml_context() -> Generator[Any, None, None]: + """Context manager for NVML operations.""" + with _device_library_context( + _get_pynvml, "pynvml", "nvmlInit", "nvmlShutdown" + ) as library: + yield library + + +@contextmanager +def _amd_smi_context() -> Generator[Any, None, None]: + """Context manager for AMD SMI operations.""" + with _device_library_context( + _get_amd_smi, "amdsmi", "amdsmi_init", "amdsmi_shut_down" + ) as library: + yield library + + @dataclass(frozen=True) -class DeviceInfo: +class DeviceSpec: """ Theoretical Numbers from data sheet. If two numbers are given, Tensor/Matrix Core vs not, then the higher number is reported. Sparsity is not considered. - Bandwidth numbers are tricky, because there are platform differences that may not show up in the profiler trace. - For example, """ tops: dict[Union[torch.dtype, str], float] dram_bw_gbs: float dram_gb: float + sm_count: Optional[int] + clock_hz: Optional[float] + memory_clock_hz: Optional[float] + + +class DeviceInfo: + """ + Device information lookup utility for GPU hardware introspection. + + This class provides methods to retrieve various hardware specifications + and performance characteristics of GPU devices. It supports both NVIDIA + and AMD GPUs through hardware lookup methods and falls back to datasheet + values when hardware information is not available. + + The class can provide information about: + - Streaming multiprocessor (SM) count + - Clock frequencies (core and memory) + - DRAM capacity and bandwidth + - Peak FLOPS/TOPS performance + + Methods use a two-tier lookup strategy: + 1. Hardware introspection via pynvml (NVIDIA) or AMD SMI libraries + 2. Fallback to predefined datasheet values for known device models + + Example usage: + device_name = torch.cuda.get_device_name() + peak_tops = DeviceInfo.lookup_tops(device_name, torch.float32) + """ + + @staticmethod + def _hardware_lookup_sm_count() -> Optional[int]: + """Get the number of streaming multiprocessors from the hardware.""" + try: + # rely on device_properties api + device_props = torch.cuda.get_device_properties(0) + return device_props.multi_processor_count + except Exception: + return None + + @staticmethod + def _hardware_lookup_clock_hz() -> Optional[float]: + """Get the clock speed in Hz from the hardware.""" + if torch.version.hip is not None: + amd_clock = DeviceInfo._amd_hardware_lookup_clock_hz() + return amd_clock + + try: + with _nvml_context() as pynvml: + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + clock_mhz = pynvml.nvmlDeviceGetMaxClockInfo( + handle, pynvml.NVML_CLOCK_SM + ) + return clock_mhz * 1e6 + except Exception: + return None + + @staticmethod + def _amd_hardware_lookup_clock_hz() -> Optional[float]: + """Get the clock speed in Hz from AMD hardware.""" + try: + with _amd_smi_context() as amd_smi: + device_handle = amd_smi.amdsmi_get_processor_handles()[0] + clock_info = amd_smi.amdsmi_get_clock_info( + device_handle, amd_smi.AmdSmiClkType.SYS + ) + return clock_info["max_clk"] * 1e6 if "max_clk" in clock_info else None + except Exception as e: + log.info("Failed to get AMD clock frequency: %s", e) + return None + + @staticmethod + def _hardware_lookup_memory_clock_hz() -> Optional[float]: + """Get the memory clock speed in Hz from the hardware.""" + if torch.version.hip is not None: + amd_memory_clock = DeviceInfo._amd_hardware_lookup_memory_clock_hz() + return amd_memory_clock + + try: + with _nvml_context() as pynvml: + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + mem_clock_mhz = pynvml.nvmlDeviceGetMaxClockInfo( + handle, pynvml.NVML_CLOCK_MEM + ) + return mem_clock_mhz * 1e6 + except Exception: + return None + + @staticmethod + def _amd_hardware_lookup_memory_clock_hz() -> Optional[float]: + """Get the memory clock speed in Hz from AMD hardware.""" + try: + with _amd_smi_context() as amd_smi: + device_handle = amd_smi.amdsmi_get_processor_handles()[0] + mem_clock_info = amd_smi.amdsmi_get_clock_info( + device_handle, amd_smi.AmdSmiClkType.MEM + ) + return ( + mem_clock_info["max_clk"] * 1e6 + if "max_clk" in mem_clock_info + else None + ) + except Exception as e: + log.info("Failed to get AMD memory clock frequency: %s", e) + return None + + @staticmethod + def _hardware_dram_gb() -> Optional[float]: + """Get the DRAM memory size in GB from the hardware.""" + try: + device_props = torch.cuda.get_device_properties(0) + # Convert from bytes to GB + return device_props.total_memory / (1024**3) + except Exception: + return None + + @staticmethod + def _generic_lookup( + device_name: str, element_name: str + ) -> Optional[Union[int, float]]: + """ + Generic lookup method for device elements. + First attempts hardware lookup, then falls back to device mapping. + + Args: + element_name: Name of the element to lookup (e.g., 'sm_count', 'clock_hz') + + Returns: + The value from hardware lookup or device mapping, or None if not available. + """ + hardware_lookup_methods = { + "sm_count": DeviceInfo._hardware_lookup_sm_count, + "clock_hz": DeviceInfo._hardware_lookup_clock_hz, + "memory_clock_hz": DeviceInfo._hardware_lookup_memory_clock_hz, + "dram_gb": DeviceInfo._hardware_dram_gb, + } + + if torch.cuda.is_available() and torch.cuda.get_device_name() == device_name: + # we're on the device that we're testing, so try to look up values via hardware libraries. + hardware_method = hardware_lookup_methods.get(element_name) + if hardware_method: + hardware_value = hardware_method() + if hardware_value is not None: + return hardware_value + + # Attempt to lookup from device mapping + device_info = lookup_device_info(device_name) + if device_info is not None: + return getattr(device_info, element_name, None) + + return None + + @staticmethod + def lookup_sm_count(device_name: str) -> Optional[int]: + """Get the number of streaming multiprocessors for the current device.""" + result = DeviceInfo._generic_lookup(device_name, "sm_count") + return result if isinstance(result, int) or result is None else None + + @staticmethod + def lookup_clock_hz(device_name: str) -> Optional[float]: + """Get the clock speed in Hz for the current device.""" + return DeviceInfo._generic_lookup(device_name, "clock_hz") + + @staticmethod + def lookup_memory_clock_hz(device_name: str) -> Optional[float]: + """Get the memory clock speed in Hz for the current device.""" + return DeviceInfo._generic_lookup(device_name, "memory_clock_hz") + + @staticmethod + def lookup_dram_gb(device_name: str) -> Optional[float]: + """Get the DRAM memory size in GB for the current device.""" + return DeviceInfo._generic_lookup(device_name, "dram_gb") + + @staticmethod + def lookup_dram_bw_gbs(device_name: str) -> Optional[float]: + """ + Get the DRAM bandwidth in GB/s for the current device. + + Uses hardware lookup first, then falls back to datasheet value + scaled by memory clock ratio if available. + """ + lookupable = torch.cuda.is_available() and ( + torch.cuda.get_device_name() == device_name + ) + + # Fall back to datasheet value with memory clock scaling + device_info = lookup_device_info(device_name) + if device_info is None: + return None + + datasheet_bw = device_info.dram_bw_gbs + if datasheet_bw is None: + return None + + # Apply memory clock adjustment if current memory clock is available + if lookupable: + current_memory_clock_hz = DeviceInfo.lookup_memory_clock_hz(device_name) + if ( + current_memory_clock_hz is not None + and device_info.memory_clock_hz is not None + ): + # Scale bandwidth by memory clock ratio + expected_memory_clock_hz = device_info.memory_clock_hz + memory_clock_ratio = current_memory_clock_hz / expected_memory_clock_hz + datasheet_bw *= memory_clock_ratio + + return datasheet_bw + + @staticmethod + def lookup_tops( + device_name: str, + dtype: torch.dtype, + is_tf32: bool = False, + ) -> Optional[float]: + """ + Our best attempt to calculate the current tops. Adjust by the ratio of current clock speed to theoretical. + + Returns: + Peak FLOPS as a float, or None if calculation fails + """ + lookupable = torch.cuda.is_available() and ( + torch.cuda.get_device_name() == device_name + ) + + # Use datasheet values adjusted by clock ratio + peak_ops = datasheet_tops(dtype, is_tf32) + if peak_ops is None: + return None + peak_ops *= 1e12 # Convert TOPS to FLOPS + + # Apply clock adjustment for datasheet fallback calculations + + if not torch.cuda.is_available(): + return peak_ops + + device_name = torch.cuda.get_device_name() + if device_name is None: + return peak_ops + + device_info = lookup_device_info(device_name) + if device_info is None: + return peak_ops + + if lookupable: + current_clock_hz = DeviceInfo.lookup_clock_hz(device_name) + if current_clock_hz is not None and device_info.clock_hz is not None: + # Use the expected clock speed from the device mapping for scaling + expected_clock_hz = device_info.clock_hz + clock_ratio = current_clock_hz / expected_clock_hz + peak_ops *= clock_ratio + + return peak_ops + + @staticmethod + def lookup_tops_current_device( + dtype: torch.dtype, + is_tf32: bool = False, + ) -> Optional[float]: + """ + Our best attempt to calculate the current tops. Adjust by the ratio of current clock speed to theoretical. + + Returns: + Peak FLOPS as a float, or None if calculation fails + """ + if not torch.cuda.is_available(): + return None + name: Optional[str] = torch.cuda.get_device_name() + if name is None: + return None + return DeviceInfo.lookup_tops(name, dtype, is_tf32) # Indexing is based on `torch.cuda.get_device_name()` # TODO investigate profiler support for tf32 and allow device to report correct number when it's turned on. -_device_mapping: dict[str, DeviceInfo] = { +_device_mapping: dict[str, DeviceSpec] = { # Source: # @lint-ignore https://www.nvidia.com/en-us/data-center/h100/ - "NVIDIA H100": DeviceInfo( + # These are from H100 SXM. + # + "NVIDIA H100": DeviceSpec( tops={ - torch.float64: 67.0, - torch.float32: 67.5, - "torch.tf32": 156.0, + torch.float64: 34.0, + torch.float32: 67.0, + "torch.tf32": 989.0, torch.bfloat16: 1979.0, torch.float16: 1979.0, torch.float8_e8m0fnu: 3958.0, @@ -46,11 +376,17 @@ class DeviceInfo: }, dram_bw_gbs=3350, dram_gb=80, + sm_count=132, + # boost clock + clock_hz=1.98e9, + memory_clock_hz=1.4e10, + # bus: 5120 bit ), # Source: # @lint-ignore https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/ # nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf - "NVIDIA A100": DeviceInfo( + # Tensor cores enabled + SXM + "NVIDIA A100": DeviceSpec( tops={ torch.float64: 19.5, torch.float32: 19.5, @@ -58,14 +394,19 @@ class DeviceInfo: torch.float16: 312.5, # Not in datasheet: float8 torch.int8: 624.0, - "torch.tf32": 156.0, + "torch.tf32": 312.0, }, dram_bw_gbs=2039.0, dram_gb=80.0, + sm_count=108, + # boost clock + clock_hz=1410 * 1e6, + memory_clock_hz=1593 * 1e6, ), # Source: # @lint-ignore https://resources.nvidia.com/en-us-gpu-resources/l4-tensor-datasheet - "NVIDIA L4": DeviceInfo( + # @lint-ignore https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/l4/PB-11316-001_v01.pdf + "NVIDIA L4": DeviceSpec( tops={ # This is a guess, not in datasheet torch.float64: 15.1, @@ -83,11 +424,15 @@ class DeviceInfo: }, dram_bw_gbs=3350, dram_gb=24, + sm_count=58, + clock_hz=2040 * 1e6, + # bus: 192 bit + memory_clock_hz=6251 * 1e6, ), # Source: # @lint-ignore https://www.amd.com/content/dam/amd/en/documents\ # /instinct-tech-docs/data-sheets/amd-instinct-mi300a-data-sheet.pdf - "AMD MI300A": DeviceInfo( + "AMD MI300A": DeviceSpec( tops={ torch.float64: 122.6, torch.float32: 122.6, @@ -104,11 +449,15 @@ class DeviceInfo: }, dram_bw_gbs=5300.0, dram_gb=128.0, + sm_count=228, + # bus: 8192 bit + clock_hz=2100 * 1e6, + memory_clock_hz=2600 * 1e6, ), # Source: # @lint-ignore https://www.amd.com/content/dam/amd/en/documents/\ # instinct-tech-docs/data-sheets/amd-instinct-mi300x-data-sheet.pdf - "AMD MI300X": DeviceInfo( + "AMD MI300X": DeviceSpec( tops={ torch.float64: 163.4, torch.float32: 163.4, @@ -125,11 +474,14 @@ class DeviceInfo: }, dram_bw_gbs=5300.0, dram_gb=192.0, + sm_count=304, + clock_hz=2100 * 1e6, + memory_clock_hz=5200 * 1e6, ), # Source: # @lint-ignore https://www.amd.com/content/dam/amd/\ # en/documents/instinct-business-docs/product-briefs/instinct-mi210-brochure.pdf - "AMD MI210X": DeviceInfo( + "AMD MI210X": DeviceSpec( tops={ torch.float64: 45.3, torch.float32: 45.3, @@ -149,18 +501,21 @@ class DeviceInfo: # pcie4.0x16 dram_bw_gbs=1600.0, dram_gb=64.0, + sm_count=104, + clock_hz=1700 * 1e6, + memory_clock_hz=1600 * 1e6, ), } _device_mapping["AMD INSTINCT MI300X"] = _device_mapping["AMD MI300X"] _device_mapping["AMD INSTINCT MI210X"] = _device_mapping["AMD MI210X"] -def lookup_device_info(name: str) -> Optional[DeviceInfo]: +def lookup_device_info(name: str) -> Optional[DeviceSpec]: """ Problem: when diffing profiles between amd and nvidia, we don't have access to the device information of the other one. Also, since the analysis is static, we should be able to do it on another device unrelated to the recorded device. Therefore, _device_mapping statically contains the information for lots of devices. - If one is missing, please run DeviceInfo.get_device_info() and add it to _device_mapping. + If one is missing, please run DeviceSpec.get_device_info() and add it to _device_mapping. name (str): name of the device to lookup. Should map onto torch.cuda.get_device_name(). """ return _device_mapping.get(name, None) diff --git a/torch/_inductor/analysis/profile_analysis.py b/torch/_inductor/analysis/profile_analysis.py index 134d06528c0df..13545637ea7c6 100644 --- a/torch/_inductor/analysis/profile_analysis.py +++ b/torch/_inductor/analysis/profile_analysis.py @@ -6,7 +6,7 @@ from typing import Any, Callable, Optional, Union import torch -from torch._inductor.analysis.device_info import DeviceInfo, lookup_device_info +from torch._inductor.analysis.device_info import DeviceSpec, lookup_device_info from torch._inductor.utils import tabulate_2d, zip_dicts from torch.utils import _pytree as pytree from torch.utils._ordered_set import OrderedSet @@ -381,7 +381,7 @@ class KernelStats: class Device: name: str index: int - info: Optional[DeviceInfo] + info: Optional[DeviceSpec] stats: KernelNameMap def __repr__(self) -> str: diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py index 0b09f9a67a96d..b340f33b9d873 100644 --- a/torch/_inductor/utils.py +++ b/torch/_inductor/utils.py @@ -60,7 +60,7 @@ import torch import torch.utils._pytree as pytree -from torch._inductor.analysis.device_info import datasheet_tops +from torch._inductor.analysis.device_info import DeviceInfo from torch._inductor.runtime.hints import DeviceProperties from torch.utils._dtype_abbrs import dtype_abbrs from torch.utils._ordered_set import OrderedSet @@ -2381,7 +2381,9 @@ def get_device_tflops(dtype: torch.dtype) -> float: We don't want to throw errors in this function. First check to see if the device is in device_info.py, then fall back to the inaccurate triton estimation. """ - ds_tops = datasheet_tops(dtype, is_tf32=torch.backends.cuda.matmul.allow_tf32) + ds_tops = DeviceInfo.lookup_tops_current_device( + dtype, is_tf32=torch.backends.cuda.matmul.allow_tf32 + ) if ds_tops is not None: return ds_tops From 4e355946749cca9e9032663db3270a5e36a4309e Mon Sep 17 00:00:00 2001 From: Yuhui Shi Date: Wed, 10 Sep 2025 21:23:37 +0000 Subject: [PATCH 068/693] [Lowering] Fix the edge case of empty subgraph split due to dataclass node (#161716) Summary: Fix the edge case by allowing `call_function` nodes with no deps as graph entry (starter_nodes) in the splitter. Test Plan: The test shall pass in the current diff (after fix), and fail in the parent diff (before fix) ``` buck test mode/opt //glow/fb/fx/lowering:split_tests -- test_dataclass_as_graph_entry ``` Rollback Plan: Differential Revision: D81232435 Pull Request resolved: https://github.com/pytorch/pytorch/pull/161716 Approved by: https://github.com/ezyang --- test/fx/test_fx_split.py | 75 ++++++++++++++++++++++++++++++++ torch/fx/passes/splitter_base.py | 9 ++++ 2 files changed, 84 insertions(+) diff --git a/test/fx/test_fx_split.py b/test/fx/test_fx_split.py index 6d95592fd290e..7338dd0314a15 100644 --- a/test/fx/test_fx_split.py +++ b/test/fx/test_fx_split.py @@ -1,12 +1,28 @@ # Owner(s): ["module: fx"] +import dataclasses from collections import defaultdict import torch +import torch.fx.passes.operator_support as op_support +import torch.fx.passes.splitter_base as splitter_base from torch.fx.passes.split_utils import split_by_tags from torch.testing._internal.common_utils import TestCase +@torch.jit.script +@dataclasses.dataclass +class DummyDataClass: + a: int + b: int + c: int + + +@torch.fx.wrap +def wrapped_add(_dataclass, y): + return _dataclass.c + y + + class TestFXSplit(TestCase): def test_split_preserve_node_meta(self): class TestModule(torch.nn.Module): @@ -38,6 +54,65 @@ def forward(self, x, y): self.assertIn("name", node.meta) self.assertEqual(node.meta["name"], node.name) + def test_dataclass_as_graph_entry(self): + """ + Test that splitting works when the graph entry is a dataclass instance + and a wrapped function is called with it, resulting in a call_function + node with no input dependencies. This tests the edge case fixed in D81232435 + where call_function nodes with no dependencies should be handled properly + in the starter_nodes() method. + + Graph visualization: + y (input) DummyDataClass(2,3,4) (no input deps, result as a call_function_node) + \ / + \ / + wrapped_add + | + z (output) + """ # noqa: W605 + + class TestModuleWithFunctionEntry(torch.nn.Module): + def forward(self, y): + # This creates a call_function node with no input dependencies + dummy_data_class = DummyDataClass(2, 3, 4) + z = wrapped_add(dummy_data_class, y) + return z + + mod = TestModuleWithFunctionEntry() + gm = torch.fx.symbolic_trace(mod) + + # Create custom operator support to mark wrapped_add as supported + class CustomOpSupport(op_support.OperatorSupportBase): + def is_node_supported(self, submodules, node) -> bool: + return node.target == wrapped_add + + # Create a simple splitter to test the edge case + class TestSplitter(splitter_base._SplitterBase): + def __init__(self, module, sample_input, operator_support): + settings = splitter_base._SplitterSettingBase() + super().__init__(module, sample_input, operator_support, settings) + + # Create splitter instance - this tests the fix where call_function nodes + # with no input dependencies are properly handled in starter_nodes() + splitter = TestSplitter( + module=gm, + sample_input=[torch.randn(2, 3)], + operator_support=CustomOpSupport(), + ) + + # This should not raise an exception (tests the fix from D81232435) + # The fix allows call_function nodes with no dependencies as valid starter nodes + split_result = splitter() + + # Verify the splitting worked correctly + self.assertIsNotNone(split_result) + + # Test that the split module produces the same result as the original + test_input = torch.randn(2, 3) + original_result = mod(test_input) + split_module_result = split_result(test_input) + self.assertTrue(torch.equal(original_result, split_module_result)) + class TestSplitByTags(TestCase): class TestModule(torch.nn.Module): diff --git a/torch/fx/passes/splitter_base.py b/torch/fx/passes/splitter_base.py index 8a23c73785e8c..6cf708a619069 100644 --- a/torch/fx/passes/splitter_base.py +++ b/torch/fx/passes/splitter_base.py @@ -952,13 +952,22 @@ def starter_nodes(self) -> tuple[NodeSet, NodeSet]: starter_cpu_nodes: NodeSet = set() starter_acc_nodes: NodeSet = set() for node in self.module.graph.nodes: + # edge case, call_function, but with no dependencies + if node.op == "call_function" and len(node.all_input_nodes) == 0: + if node in self.acc_nodes: + starter_acc_nodes.add(node) + else: + starter_cpu_nodes.add(node) + if node.op not in {"placeholder", "get_attr"}: continue + for user in node.users: if user in self.acc_nodes: starter_acc_nodes.add(user) else: starter_cpu_nodes.add(user) + return starter_cpu_nodes, starter_acc_nodes def put_nodes_into_subgraphs(self) -> list[Subgraph]: From 864ffe12d737403230e8257b9bce0a830bd590c1 Mon Sep 17 00:00:00 2001 From: drisspg Date: Wed, 10 Sep 2025 17:26:54 +0000 Subject: [PATCH 069/693] Fix some edge cases (#162295) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` Summary 🔝 Top 5 Performance Differences (by absolute %): shape: (5, 7) ┌────────────────┬────────────────┬─────────────────────────────┬───────────────────┬──────────────────────┬───────────────────────────┬───────────┐ │ attn_type ┆ dtype ┆ shape(B,Hq,M,Hkv,N,D) ┆ TFlops BWD (base) ┆ TFlops BWD (no_peel) ┆ no_peel_speedup_over_base ┆ pct_delta │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ str ┆ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │ ╞════════════════╪════════════════╪═════════════════════════════╪═══════════════════╪══════════════════════╪═══════════════════════════╪═══════════╡ │ sliding_window ┆ torch.bfloat16 ┆ (2, 16, 1024, 4, 1024, 64) ┆ 56.937931 ┆ 58.960459 ┆ 1.035522 ┆ 3.552163 │ │ noop ┆ torch.bfloat16 ┆ (2, 16, 1024, 4, 1024, 128) ┆ 89.221306 ┆ 86.295642 ┆ 0.967209 ┆ -3.27911 │ │ causal ┆ torch.bfloat16 ┆ (2, 16, 4096, 4, 4096, 128) ┆ 111.552594 ┆ 114.380841 ┆ 1.025353 ┆ 2.535349 │ │ alibi ┆ torch.bfloat16 ┆ (2, 16, 1024, 16, 1024, 64) ┆ 74.830149 ┆ 76.685445 ┆ 1.024793 ┆ 2.479344 │ │ alibi ┆ torch.bfloat16 ┆ (2, 16, 1024, 4, 1024, 64) ┆ 55.279932 ┆ 56.369312 ┆ 1.019707 ┆ 1.97066 │ └────────────────┴────────────────┴─────────────────────────────┴───────────────────┴──────────────────────┴───────────────────────────┴───────────┘ 🔺 Top 5 Cases Where no_peel (change) is Faster than base (baseline): shape: (5, 7) ┌────────────────┬────────────────┬─────────────────────────────┬───────────────────┬──────────────────────┬───────────────────────────┬───────────┐ │ attn_type ┆ dtype ┆ shape(B,Hq,M,Hkv,N,D) ┆ TFlops BWD (base) ┆ TFlops BWD (no_peel) ┆ no_peel_speedup_over_base ┆ pct_delta │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ str ┆ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │ ╞════════════════╪════════════════╪═════════════════════════════╪═══════════════════╪══════════════════════╪═══════════════════════════╪═══════════╡ │ sliding_window ┆ torch.bfloat16 ┆ (2, 16, 1024, 4, 1024, 64) ┆ 56.937931 ┆ 58.960459 ┆ 1.035522 ┆ 3.552163 │ │ causal ┆ torch.bfloat16 ┆ (2, 16, 4096, 4, 4096, 128) ┆ 111.552594 ┆ 114.380841 ┆ 1.025353 ┆ 2.535349 │ │ alibi ┆ torch.bfloat16 ┆ (2, 16, 1024, 16, 1024, 64) ┆ 74.830149 ┆ 76.685445 ┆ 1.024793 ┆ 2.479344 │ │ alibi ┆ torch.bfloat16 ┆ (2, 16, 1024, 4, 1024, 64) ┆ 55.279932 ┆ 56.369312 ┆ 1.019707 ┆ 1.97066 │ │ causal ┆ torch.bfloat16 ┆ (4, 16, 4096, 4, 4096, 64) ┆ 111.08814 ┆ 112.447047 ┆ 1.012233 ┆ 1.22327 │ └────────────────┴────────────────┴─────────────────────────────┴───────────────────┴──────────────────────┴───────────────────────────┴───────────┘ 🔻 Top 5 Cases Where no_peel (change) is Slower than base (baseline): shape: (5, 7) ┌────────────────┬────────────────┬─────────────────────────────┬───────────────────┬──────────────────────┬───────────────────────────┬───────────┐ │ attn_type ┆ dtype ┆ shape(B,Hq,M,Hkv,N,D) ┆ TFlops BWD (base) ┆ TFlops BWD (no_peel) ┆ no_peel_speedup_over_base ┆ pct_delta │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ str ┆ str ┆ f64 ┆ f64 ┆ f64 ┆ f64 │ ╞════════════════╪════════════════╪═════════════════════════════╪═══════════════════╪══════════════════════╪═══════════════════════════╪═══════════╡ │ noop ┆ torch.bfloat16 ┆ (2, 16, 1024, 4, 1024, 128) ┆ 89.221306 ┆ 86.295642 ┆ 0.967209 ┆ -3.27911 │ │ causal ┆ torch.bfloat16 ┆ (4, 16, 1024, 4, 1024, 64) ┆ 78.23082 ┆ 76.693169 ┆ 0.980345 ┆ -1.965531 │ │ sliding_window ┆ torch.bfloat16 ┆ (2, 16, 2048, 4, 2048, 128) ┆ 96.95663 ┆ 95.573333 ┆ 0.985733 ┆ -1.426717 │ │ alibi ┆ torch.bfloat16 ┆ (4, 16, 2048, 4, 2048, 64) ┆ 93.373473 ┆ 92.294147 ┆ 0.988441 ┆ -1.155924 │ │ alibi ┆ torch.bfloat16 ┆ (2, 16, 2048, 4, 2048, 128) ┆ 96.95147 ┆ 96.105389 ┆ 0.991273 ┆ -0.872685 │ ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162295 Approved by: https://github.com/mlazos, https://github.com/v0i0 --- test/inductor/test_flex_attention.py | 92 ++++++++ .../flex/templates/flex_backwards.py.jinja | 197 ++++++------------ .../kernel/flex/templates/utilities.py.jinja | 6 +- 3 files changed, 162 insertions(+), 133 deletions(-) diff --git a/test/inductor/test_flex_attention.py b/test/inductor/test_flex_attention.py index f90d7aa77229f..b8d8dc815eb9a 100644 --- a/test/inductor/test_flex_attention.py +++ b/test/inductor/test_flex_attention.py @@ -5256,6 +5256,98 @@ def test_backward_error_with_none_q_indices(self, device): ): flex_compile(q, k, v, block_mask=block_mask) + @supported_platform + @skip_on_cpu + def test_flex_attention_poisoned_rel_logits(self, device): + B = 1 + H = 1 + S = 1025 + D = 64 + q, k, v = [ + torch.randn(B, H, S, D, requires_grad=True, device=device) for _ in range(3) + ] + rel_logits = torch.randn(2 * B, H, S, S, device=device) + rel_logits[B:] = float("nan") + + def score_mod(score, b, h, q, kv): + return score + rel_logits[b, h, q, kv] + + def causal( + b: torch.Tensor, h: torch.Tensor, q: torch.Tensor, kv: torch.Tensor + ) -> torch.Tensor: + return q >= kv + + block_mask = create_block_mask(causal, B, H, S, S, device=device) + out = torch.compile(flex_attention)( + q, k, v, score_mod=score_mod, block_mask=block_mask + ) + out.sum().backward() + + assert out.isfinite().all().item() + assert q.grad.isfinite().all().item() + assert k.grad.isfinite().all().item() + assert v.grad.isfinite().all().item() + + @supported_platform + @skip_on_cpu + def test_flex_attention_poison_mod_fwd(self, device): + """Div by score should cause our edge case handiling to NaN""" + B = 1 + H = 1 + S = 257 + D = 16 + q, k, v = [ + torch.randn(B, H, S, D, requires_grad=True, device=device) for _ in range(3) + ] + + def score_mod(score, b, h, q, kv): + return 1 / score + + def causal( + b: torch.Tensor, h: torch.Tensor, q: torch.Tensor, kv: torch.Tensor + ) -> torch.Tensor: + return q >= kv + + block_mask = create_block_mask(causal, B, H, S, S, device=device) + out = torch.compile(flex_attention, backend="inductor")( + q, k, v, score_mod=score_mod, block_mask=block_mask + ) + out.sum().backward() + assert out.isfinite().all().item() + assert q.grad.isfinite().all().item() + # assert k.grad.isfinite().all().item() + assert v.grad.isfinite().all().item() + + @supported_platform + @skip_on_cpu + def test_flex_attention_poison_mod_bwd(self, device): + """log score should cause our edge case handiling for NaN in grad score""" + B = 1 + H = 1 + S = 257 + D = 16 + q, k, v = [ + torch.randn(B, H, S, D, requires_grad=True, device=device) for _ in range(3) + ] + + def score_mod(score, b, h, q, kv): + return torch.where(score > 0, torch.log(score), score) + + def causal( + b: torch.Tensor, h: torch.Tensor, q: torch.Tensor, kv: torch.Tensor + ) -> torch.Tensor: + return q >= kv + + block_mask = create_block_mask(causal, B, H, S, S, device=device) + out = torch.compile(flex_attention, backend="inductor")( + q, k, v, score_mod=score_mod, block_mask=block_mask + ) + out.sum().backward() + assert out.isfinite().all().item() + assert q.grad.isfinite().all().item() + # assert k.grad.isfinite().all().item() + assert v.grad.isfinite().all().item() + @supported_platform @skip_on_cpu def test_forward_pass_with_none_q_indices(self, device): diff --git a/torch/_inductor/kernel/flex/templates/flex_backwards.py.jinja b/torch/_inductor/kernel/flex/templates/flex_backwards.py.jinja index f5a4dd5d3c195..236f68540e4fd 100644 --- a/torch/_inductor/kernel/flex/templates/flex_backwards.py.jinja +++ b/torch/_inductor/kernel/flex/templates/flex_backwards.py.jinja @@ -312,61 +312,28 @@ def bwd_dq_inner( tl.static_assert(BLOCK_M2 % BLOCK_N2 == 0) hi = tl.minimum(sparse_kv_num_blocks * SPARSE_KV_MULTIPLE, tl.maximum(tl.cdiv(KV_LEN, BLOCK_N2), 1)) - if not IS_DIVISIBLE: - if hi >= 1: - for start_n in range(0, hi - 1): - dq = bwd_dq_block_mn( - {{gen_argdefs()}}, - dq, q, kT_ptrs, vT_ptrs, do, Di, lse, Q_LEN, KV_LEN, - off_z, off_hq, offs_m2, offs_n2, offs_k, offs_v, - stride_kn, stride_kd, stride_vn, stride_vd, - kv_indices, sparse_kv_num_blocks, - MATMUL_PRECISION, RCP_LN2, - IS_FULL_BLOCKS, - ) - - # Increment pointers. - offset = get_offset_for_next_block( - start_n, kv_indices, sparse_kv_num_blocks, - SPARSE_KV_BLOCK_SIZE, SPARSE_KV_MULTIPLE, BLOCK_N2, BLOCKS_ARE_CONTIGUOUS - ) - - kT_ptrs += offset * stride_kn - vT_ptrs += offset * stride_vn - - offs_n2 += offset - dq = bwd_dq_block_mn( - {{gen_argdefs()}}, - dq, q, kT_ptrs, vT_ptrs, do, Di, lse, Q_LEN, KV_LEN, - off_z, off_hq, offs_m2, offs_n2, offs_k, offs_v, - stride_kn, stride_kd, stride_vn, stride_vd, - kv_indices, sparse_kv_num_blocks, - MATMUL_PRECISION, RCP_LN2, - IS_FULL_BLOCKS, CHECK_BLOCK_BOUNDARY=True, - ) - else: - for start_n in range(0, hi): - dq = bwd_dq_block_mn( - {{gen_argdefs()}}, - dq, q, kT_ptrs, vT_ptrs, do, Di, lse, Q_LEN, KV_LEN, - off_z, off_hq, offs_m2, offs_n2, offs_k, offs_v, - stride_kn, stride_kd, stride_vn, stride_vd, - kv_indices, sparse_kv_num_blocks, - MATMUL_PRECISION, RCP_LN2, - IS_FULL_BLOCKS, - ) + for start_n in range(0, hi): + dq = bwd_dq_block_mn( + {{gen_argdefs()}}, + dq, q, kT_ptrs, vT_ptrs, do, Di, lse, Q_LEN, KV_LEN, + off_z, off_hq, offs_m2, offs_n2, offs_k, offs_v, + stride_kn, stride_kd, stride_vn, stride_vd, + kv_indices, sparse_kv_num_blocks, + MATMUL_PRECISION, RCP_LN2, + IS_FULL_BLOCKS, + ) - # Increment pointers. - offset = get_offset_for_next_block( - start_n, kv_indices, sparse_kv_num_blocks, - SPARSE_KV_BLOCK_SIZE, SPARSE_KV_MULTIPLE, BLOCK_N2, BLOCKS_ARE_CONTIGUOUS - ) + # Increment pointers. + offset = get_offset_for_next_block( + start_n, kv_indices, sparse_kv_num_blocks, + SPARSE_KV_BLOCK_SIZE, SPARSE_KV_MULTIPLE, BLOCK_N2, BLOCKS_ARE_CONTIGUOUS + ) - kT_ptrs += offset * stride_kn - vT_ptrs += offset * stride_vn + kT_ptrs += offset * stride_kn + vT_ptrs += offset * stride_vn - offs_n2 += offset + offs_n2 += offset return dq @@ -379,7 +346,7 @@ def bwd_dq_block_mn( stride_kn, stride_kd, stride_vn, stride_vd, kv_indices, sparse_kv_num_blocks, MATMUL_PRECISION, RCP_LN2, - IS_FULL_BLOCKS, CHECK_BLOCK_BOUNDARY=False, + IS_FULL_BLOCKS, ): {{gen_defines() | indent_except_first(1)}} @@ -390,10 +357,10 @@ def bwd_dq_block_mn( qk *= SM_SCALE # ~~~~~~~~~~~~~~~~~~~ Apply score modification ~~~~~~~~~~~~~~~~~~~ pre_mod_scores = qk - n = get_bounded_indices(offs_n2[None, :], KV_LEN if CHECK_BLOCK_BOUNDARY else None) + n = get_bounded_indices(offs_n2[None, :], KV_LEN if not IS_DIVISIBLE else None) # The boundary check is done for the outer loop, but here it's possible since we're iterating across N dim - # that the M reads out of bounds prior to the last loop - m = get_bounded_indices(offs_m2[:, None], Q_LEN if (not IS_DIVISIBLE or CHECK_BLOCK_BOUNDARY) else None) + # that the M reads out of bounds for the PIDS spanning the Q_LEN boundary + m = get_bounded_indices(offs_m2[:, None], Q_LEN if not IS_DIVISIBLE else None) {{ modification( subgraph_number=0, @@ -406,8 +373,13 @@ def bwd_dq_block_mn( out="qk" ) | indent_except_first(1) }} - if CHECK_BLOCK_BOUNDARY: - # Mask out the elements that are out of the KV_LEN for non divisible seqlen. + + {# Note: Selective masking DQ + We load elements beyond KV_LEN w/ zero, some score mods may convert this elements to NaN + Example: lambda x, *_: 1 / score, this NaN would propagate regardless of other masking + We only need to do this on the m1 dim since these elements take part in the final reduction + for DQ #} + if not IS_DIVISIBLE: post_mod_scores = tl.where(offs_n2[None, :] < KV_LEN, post_mod_scores, float("-inf")) if not IS_FULL_BLOCKS: @@ -421,8 +393,6 @@ def bwd_dq_block_mn( n="n", ) | indent_except_first(2) }} - if CHECK_BLOCK_BOUNDARY: - mask_mod_output = tl.where(offs_n2[None, :] < KV_LEN, mask_mod_output, False) # apply mask for partial masked block post_mod_scores = tl.where(mask_mod_output, post_mod_scores, float("-inf")) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -446,7 +416,8 @@ def bwd_dq_block_mn( n="n", grad_score_mod="ds" ) | indent_except_first(1) }} - if CHECK_BLOCK_BOUNDARY: + {# See Note Selective masking DQ #} + if not IS_DIVISIBLE: grad_scores = tl.where(offs_n2[None, :] < KV_LEN, grad_scores, 0.0) # ~~~~~~~~~~~~~~~~~~~ Apply other buffer grad writes ~~~~~~~~~~~~~ @@ -467,8 +438,6 @@ def bwd_dq_block_mn( ds = grad_scores if not IS_FULL_BLOCKS: - if CHECK_BLOCK_BOUNDARY: - mask_mod_output = tl.where(offs_n2[None, :] < KV_LEN, mask_mod_output, False) # (grads) apply mask for partially unmasked block ds = tl.where(mask_mod_output, ds, 0.0) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -503,61 +472,30 @@ def bwd_dkdv_inner( do_ptrs = DO + offs_m1[:, None] * stride_dom + offs_v[None, :] * stride_dod # BLOCK_N1 must be a multiple of BLOCK_M1, otherwise the code wouldn't work. tl.static_assert(BLOCK_N1 % BLOCK_M1 == 0) - hi = tl.minimum(sparse_q_num_blocks * SPARSE_Q_MULTIPLE, tl.maximum(tl.cdiv(Q_LEN, BLOCK_M1), 1)) - - if not IS_DIVISIBLE: - if hi >= 1: - for start_m in range(0, hi - 1): - dk, dv = bwd_dkdv_block_mn( - {{gen_argdefs()}}, - dk, dv, qT_ptrs, k, v, do_ptrs, DELTA, LSE, Q_LEN, KV_LEN, - off_z, off_hq, offs_n1, offs_m1, offs_k, offs_v, - stride_qm, stride_qd, stride_dom, stride_dod, - q_indices, sparse_q_num_blocks, - MATMUL_PRECISION, RCP_LN2, - IS_FULL_BLOCKS, - ) - # Increment pointers. - offset = get_offset_for_next_block( - start_m, q_indices, sparse_q_num_blocks, - SPARSE_Q_BLOCK_SIZE, SPARSE_Q_MULTIPLE, BLOCK_M1, BLOCKS_ARE_CONTIGUOUS - ) - - qT_ptrs += offset * stride_qm - do_ptrs += offset * stride_dom - offs_m1 += offset - - dk, dv = bwd_dkdv_block_mn( - {{gen_argdefs()}}, - dk, dv, qT_ptrs, k, v, do_ptrs, DELTA, LSE, Q_LEN, KV_LEN, - off_z, off_hq, offs_n1, offs_m1, offs_k, offs_v, - stride_qm, stride_qd, stride_dom, stride_dod, - q_indices, sparse_q_num_blocks, - MATMUL_PRECISION, RCP_LN2, - IS_FULL_BLOCKS, CHECK_BLOCK_BOUNDARY=True, - ) - else: - for start_m in range(0, hi): - dk, dv = bwd_dkdv_block_mn( - {{gen_argdefs()}}, - dk, dv, qT_ptrs, k, v, do_ptrs, DELTA, LSE, Q_LEN, KV_LEN, - off_z, off_hq, offs_n1, offs_m1, offs_k, offs_v, - stride_qm, stride_qd, stride_dom, stride_dod, - q_indices, sparse_q_num_blocks, - MATMUL_PRECISION, RCP_LN2, - IS_FULL_BLOCKS, - ) - # Increment pointers. - offset = get_offset_for_next_block( - start_m, q_indices, sparse_q_num_blocks, - SPARSE_Q_BLOCK_SIZE, SPARSE_Q_MULTIPLE, BLOCK_M1, BLOCKS_ARE_CONTIGUOUS - ) + # The minimum is needed to handle the case where we run with a super large + # SPARSE_BLOCK_SIZE (i.e. no block-mask!) + hi = tl.minimum(sparse_q_num_blocks * SPARSE_Q_MULTIPLE, tl.maximum(tl.cdiv(Q_LEN, BLOCK_M1), 1)) - qT_ptrs += offset * stride_qm - do_ptrs += offset * stride_dom + for start_m in range(0, hi): + dk, dv = bwd_dkdv_block_mn( + {{gen_argdefs()}}, + dk, dv, qT_ptrs, k, v, do_ptrs, DELTA, LSE, Q_LEN, KV_LEN, + off_z, off_hq, offs_n1, offs_m1, offs_k, offs_v, + stride_qm, stride_qd, stride_dom, stride_dod, + q_indices, sparse_q_num_blocks, + MATMUL_PRECISION, RCP_LN2, + IS_FULL_BLOCKS, + ) + # Increment pointers. + offset = get_offset_for_next_block( + start_m, q_indices, sparse_q_num_blocks, + SPARSE_Q_BLOCK_SIZE, SPARSE_Q_MULTIPLE, BLOCK_M1, BLOCKS_ARE_CONTIGUOUS + ) - offs_m1 += offset + qT_ptrs += offset * stride_qm + do_ptrs += offset * stride_dom + offs_m1 += offset return dk, dv @@ -570,7 +508,7 @@ def bwd_dkdv_block_mn( stride_qm, stride_qd, stride_dom, stride_dod, q_indices, sparse_q_num_blocks, MATMUL_PRECISION, RCP_LN2, - IS_FULL_BLOCKS, CHECK_BLOCK_BOUNDARY=False, + IS_FULL_BLOCKS, ): {{gen_defines() | indent_except_first(1) }} @@ -586,10 +524,10 @@ def bwd_dkdv_block_mn( if not PRESCALE_QK: qkT *= SM_SCALE # ~~~~~~~~~~~~~~~~~~~ Apply score modification ~~~~~~~~~~~~~~~~~~~ - m = get_bounded_indices(offs_m1[None, :], Q_LEN if CHECK_BLOCK_BOUNDARY else None) + m = get_bounded_indices(offs_m1[None, :], Q_LEN if not IS_DIVISIBLE else None) # The boundary check is done for the outer loop, but here it's possible since we're iterating across M dim - # that the n reads out of bounds prior to the last loop - n = get_bounded_indices(offs_n1[:, None], KV_LEN if (not IS_DIVISIBLE or CHECK_BLOCK_BOUNDARY) else None) + # that the n reads out of bounds for the PIDS spanning the KV_LEN boundary + n = get_bounded_indices(offs_n1[:, None], KV_LEN if not IS_DIVISIBLE else None) pre_mod_scores = qkT {{ modification( @@ -603,22 +541,23 @@ def bwd_dkdv_block_mn( out="qkT" ) | indent_except_first(1) }} - if CHECK_BLOCK_BOUNDARY: - # Mask out the elements that are out of the KV_LEN for non divisible seqlen. - post_mod_scores = tl.where(offs_n1[:, None] < KV_LEN, post_mod_scores, float("-inf")) + {# Note: Selective masking DK/DV + We load elements beyond Q_LEN w/ zero, some score mods may convert this elements to NaN + Example: lambda x, *_: 1 / score, this NaN would propagate regardless of other masking + We only need to do this on the m1 dim since these elements take part in the final reduction + for DK/DV #} + if not IS_DIVISIBLE: + post_mod_scores = tl.where(offs_m1[None, :] < Q_LEN, post_mod_scores, float("-inf")) if not IS_FULL_BLOCKS: {{ modification( subgraph_number=2, output_name="mask_mod_output", - score="qkT", b="off_z", h="off_hq", m="m", n="n", ) | indent_except_first(2) }} - if CHECK_BLOCK_BOUNDARY: - mask_mod_output = tl.where(offs_n1[:, None] < KV_LEN, mask_mod_output, False) # (grads) apply mask for fully masked block post_mod_scores = tl.where(mask_mod_output, post_mod_scores, float("-inf")) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -648,6 +587,10 @@ def bwd_dkdv_block_mn( grad_score_mod="dsT" ) | indent_except_first(1) }} + {# See Note: Selective masking DK/DV#} + if not IS_DIVISIBLE: + grad_scores = tl.where(offs_m1[None, :] < Q_LEN, grad_scores, 0.0) + # ~~~~~~~~~~~~~~~~~~~ Apply other buffer grad writes ~~~~~~~~~~~~~ if not WRITE_DQ: idx_b = off_z @@ -667,14 +610,8 @@ def bwd_dkdv_block_mn( grad_score_mod="dsT" ) | indent_except_first(2) }} # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - if CHECK_BLOCK_BOUNDARY: - grad_scores = tl.where(offs_n1[:, None] < KV_LEN, grad_scores, 0.0) - dsT = grad_scores if not IS_FULL_BLOCKS: - if CHECK_BLOCK_BOUNDARY: - mask_mod_output = tl.where(offs_n1[:, None] < KV_LEN, mask_mod_output, False) # (grads) apply mask for partially unmasked block dsT = tl.where(mask_mod_output, dsT, 0.0) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/torch/_inductor/kernel/flex/templates/utilities.py.jinja b/torch/_inductor/kernel/flex/templates/utilities.py.jinja index 7e2367e4f2692..0c40b43277f8a 100644 --- a/torch/_inductor/kernel/flex/templates/utilities.py.jinja +++ b/torch/_inductor/kernel/flex/templates/utilities.py.jinja @@ -42,7 +42,7 @@ def load_checked_2d( IS_DIVISIBLE_M: tl.constexpr, IS_DIVISIBLE_N: tl.constexpr, M_LEN: tl.constexpr, - N_DIM: tl.constexpr, + N_LEN: tl.constexpr, ): # Calculate final pointer if strides are provided if stride_m is not None and stride_n is not None: @@ -50,9 +50,9 @@ def load_checked_2d( # Handle all masking cases if not IS_DIVISIBLE_M and not IS_DIVISIBLE_N: - return tl.load(ptr, mask=(offs_m[:, None] < M_LEN) & (offs_n[None, :] < N_DIM), other=0.0) + return tl.load(ptr, mask=(offs_m[:, None] < M_LEN) & (offs_n[None, :] < N_LEN), other=0.0) elif IS_DIVISIBLE_M and not IS_DIVISIBLE_N: - return tl.load(ptr, mask=(offs_n[None, :] < N_DIM), other=0.0) + return tl.load(ptr, mask=(offs_n[None, :] < N_LEN), other=0.0) elif not IS_DIVISIBLE_M and IS_DIVISIBLE_N: return tl.load(ptr, mask=(offs_m[:, None] < M_LEN), other=0.0) else: # Both divisible From 5dbee5691cc63f64beaec757e343386d50792ccc Mon Sep 17 00:00:00 2001 From: eqy Date: Wed, 10 Sep 2025 21:39:32 +0000 Subject: [PATCH 070/693] [cuDNN][Convolution][TF32][64bit] Add `tf32_on_and_off` decorator to conv3d 64bit test (#161004) cuDNN has new generated kernels that can use TF32. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161004 Approved by: https://github.com/janeyx99, https://github.com/Skylion007 --- test/nn/test_convolution.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/nn/test_convolution.py b/test/nn/test_convolution.py index 2687ab5d089ce..abc3cffe3d4df 100644 --- a/test/nn/test_convolution.py +++ b/test/nn/test_convolution.py @@ -4041,6 +4041,7 @@ def test_conv_double_backward_strided_with_3D_input_and_weight(self, device): @onlyCUDA @largeTensorTest("40GB") @largeTensorTest("24GB", "cpu") + @tf32_on_and_off(0.005) def test_conv3d_64bit_indexing(self, device): x = torch.rand(1, 32, 512, 512, 256) m = torch.nn.Conv3d(32, 1, kernel_size=1, padding=0, stride=1, bias=False) From 31c25c7d01c75c2cb823044957cf9f60c7050b51 Mon Sep 17 00:00:00 2001 From: Michael Lazos Date: Tue, 9 Sep 2025 15:26:23 -0700 Subject: [PATCH 071/693] [Cutlass] Add tanh activation and test case for activations (#162535) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162535 Approved by: https://github.com/henrylhtsang --- test/inductor/test_cutlass_backend.py | 28 ++++++++++++++++++- .../codegen/cuda/cutlass_python_evt.py | 2 +- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/test/inductor/test_cutlass_backend.py b/test/inductor/test_cutlass_backend.py index b807df5d6691c..b3d253122ba8f 100644 --- a/test/inductor/test_cutlass_backend.py +++ b/test/inductor/test_cutlass_backend.py @@ -107,13 +107,15 @@ def _check_if_instances_equal(op1, op2) -> bool: return True -un_ops_under_test = [torch.relu] +un_ops_under_test = [torch.relu, torch.tanh] bin_ops_under_test = [torch.add, torch.mul, torch.sub, torch.div] evt_all_ops = parametrize( "op", un_ops_under_test + bin_ops_under_test, name_fn=lambda f: f.__name__ ) +evt_un_ops = parametrize("op", un_ops_under_test, name_fn=lambda f: f.__name__) + evt_bin_ops = parametrize("op", bin_ops_under_test, name_fn=lambda f: f.__name__) evt_all_shapes = parametrize("shape", itertools.product([512, 1024], repeat=2)) @@ -1976,6 +1978,30 @@ def forward(self, a, b, extra_args): ) torch.testing.assert_close(result, ref_result) + @unittest.skipIf(not SM90OrLater, "need sm_90") + @use_evt_config + @evt_un_ops + def test_evt_activations(self, op): + class TestModel(torch.nn.Module): + def forward(self, a, b, extra_args): + acc = a @ b + return acc, op(acc, *extra_args) + + M = 1024 + N = 512 + a = torch.ones(M, N).cuda().half() + b = torch.ones(N, N).cuda().half().t() + extra_args = gen_args(op, (M, N)) + model = TestModel().cuda() + + result = torch.compile(model)(a, b, extra_args) + ref_result = model(a, b, extra_args) + + self.assertEqual( + torch._dynamo.utils.counters["inductor"]["cuda_epilogue_fusion_counter"], 1 + ) + torch.testing.assert_close(result, ref_result) + @unittest.skipIf(not SM90OrLater, "need sm_90") @use_evt_config @evt_all_ops diff --git a/torch/_inductor/codegen/cuda/cutlass_python_evt.py b/torch/_inductor/codegen/cuda/cutlass_python_evt.py index ca5e6031b19cd..102b61cbb18fc 100644 --- a/torch/_inductor/codegen/cuda/cutlass_python_evt.py +++ b/torch/_inductor/codegen/cuda/cutlass_python_evt.py @@ -96,7 +96,7 @@ def sub(x0: str, x1: str) -> str: @staticmethod def tanh(x0: str) -> str: - raise NotImplementedError("tanh is not supported in CUTLASS python evt") + return CutlassEVTOpsMixIn._prefix_un_op("tanh", x0) class MockCutlassHandler(CutlassEVTOpsMixIn, WrapperHandler): From d8e6b2fddc54c748d976e8f0ebe4b63ebe36d85b Mon Sep 17 00:00:00 2001 From: Michael Lazos Date: Tue, 9 Sep 2025 15:26:26 -0700 Subject: [PATCH 072/693] [Cutlass] Add exp and sigmoid activations (#162536) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162536 Approved by: https://github.com/henrylhtsang, https://github.com/eellison ghstack dependencies: #162535 --- test/inductor/test_cutlass_backend.py | 2 +- torch/_inductor/codegen/cuda/cutlass_python_evt.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/test/inductor/test_cutlass_backend.py b/test/inductor/test_cutlass_backend.py index b3d253122ba8f..d0618886660a6 100644 --- a/test/inductor/test_cutlass_backend.py +++ b/test/inductor/test_cutlass_backend.py @@ -107,7 +107,7 @@ def _check_if_instances_equal(op1, op2) -> bool: return True -un_ops_under_test = [torch.relu, torch.tanh] +un_ops_under_test = [torch.relu, torch.tanh, torch.exp, torch.sigmoid] bin_ops_under_test = [torch.add, torch.mul, torch.sub, torch.div] evt_all_ops = parametrize( diff --git a/torch/_inductor/codegen/cuda/cutlass_python_evt.py b/torch/_inductor/codegen/cuda/cutlass_python_evt.py index 102b61cbb18fc..72108b29b3cb0 100644 --- a/torch/_inductor/codegen/cuda/cutlass_python_evt.py +++ b/torch/_inductor/codegen/cuda/cutlass_python_evt.py @@ -88,7 +88,7 @@ def relu(x0: str) -> str: @staticmethod def sigmoid(x0: str) -> str: - raise NotImplementedError("sigmoid is not supported in CUTLASS python evt") + return CutlassEVTOpsMixIn._prefix_un_op("sigmoid", x0) @staticmethod def sub(x0: str, x1: str) -> str: @@ -98,6 +98,10 @@ def sub(x0: str, x1: str) -> str: def tanh(x0: str) -> str: return CutlassEVTOpsMixIn._prefix_un_op("tanh", x0) + @staticmethod + def exp(x0: str) -> str: + return CutlassEVTOpsMixIn._prefix_un_op("exp", x0) + class MockCutlassHandler(CutlassEVTOpsMixIn, WrapperHandler): """Passthrough handler for cutlass ops, used for running epilogue nodes for memory planning""" From f4aeceaa9d3bdb765fd181fbd52774cea67959eb Mon Sep 17 00:00:00 2001 From: eellison Date: Wed, 10 Sep 2025 09:59:01 -0700 Subject: [PATCH 073/693] Use upper bound for persistent rblock (#162441) Previously, we were using 128 and increasing to upper bound. We should be setting at the upper bound and raising to next power of 2. Differential Revision: [D81984103](https://our.internmc.facebook.com/intern/diff/D81984103) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162441 Approved by: https://github.com/PaulZhang12 --- .../test_torchinductor_dynamic_shapes.py | 31 +++++++++++++++++++ torch/_inductor/codegen/triton.py | 17 +++++++--- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/test/inductor/test_torchinductor_dynamic_shapes.py b/test/inductor/test_torchinductor_dynamic_shapes.py index 57d263a63e8ac..a72d984939627 100644 --- a/test/inductor/test_torchinductor_dynamic_shapes.py +++ b/test/inductor/test_torchinductor_dynamic_shapes.py @@ -12,6 +12,7 @@ import torch.library from torch._dynamo.testing import CompileCounterWithBackend, make_test_cls_with_patches from torch._inductor import metrics +from torch._inductor.choices import InductorChoices from torch._inductor.codegen.wrapper import PythonWrapperCodegen from torch._inductor.test_case import TestCase from torch._inductor.utils import run_and_get_code @@ -1085,6 +1086,36 @@ def fn(x, y): self.assertEqual(fn(x, 4.0), fn_opt(x, 4.0)) self.assertEqual(cnt.frame_count, 2) + @onlyOn(GPU_TYPE) + def test_dynamic_rblock_bounds(self): + class ForcePersistent(InductorChoices): + @staticmethod + def should_use_cooperative_reduction(*args, **kwargs) -> bool: + return False + + @staticmethod + def should_use_persistent_reduction(*args, **kwargs) -> bool: + return True + + def fn(x): + return x.sum() + + x = torch.rand([31], device=GPU_TYPE) + + with V.set_choices_handler(ForcePersistent()): + torch._dynamo.mark_dynamic(x, 0, min=1, max=62) + fn_c = torch.compile(fn) + actual, source_codes = run_and_get_code(fn_c, x) + self.assertEqual(fn(x), actual) + FileCheck().check("R0_BLOCK: tl.constexpr = 64").run(source_codes[0]) + torch._dynamo.reset() + + torch._dynamo.mark_dynamic(x, 2, min=1, max=64) + fn_c = torch.compile(fn) + actual, source_codes = run_and_get_code(fn_c, x) + self.assertEqual(fn(x), actual) + FileCheck().check("R0_BLOCK: tl.constexpr = 64").run(source_codes[0]) + def test_unspecialized_float_dynamic(self): def fn(x, y): return x * y diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py index cdf5b5d3fcb32..bf22d5ec587a0 100644 --- a/torch/_inductor/codegen/triton.py +++ b/torch/_inductor/codegen/triton.py @@ -26,6 +26,7 @@ from torch._prims_common import is_integer_dtype from torch.utils._ordered_set import OrderedSet from torch.utils._sympy.functions import CeilDiv, FloorDiv, ModularIndexing +from torch.utils._sympy.value_ranges import bound_sympy from torch.utils._triton import has_triton_package, has_triton_stable_tma_api from ...utils._sympy.symbol import free_symbol_is_type, prefix_str, symbol_is_type, SymT @@ -4355,11 +4356,17 @@ def _get_persistent_RBLOCK(rnumel): val = int(rnumel) val = next_power_of_2(val) else: - val = 128 - while not V.graph.sizevars.statically_known_leq(rnumel, val): - if val > 16 * 1024: - raise ValueError(f"Failed to find static RBLOCK for {rnumel}") - val *= 2 + val = bound_sympy(rnumel).upper + assert isinstance(val, int) or val.is_constant() + + if val == torch.utils._sympy.numbers.IntInfinity(): + raise ValueError(f"Failed to find static RBLOCK for {rnumel}") + + val = next_power_of_2(int(val)) + + if val > 16 * 1024: + raise ValueError(f"Failed to find static RBLOCK for {rnumel}") + return val @staticmethod From e0c910149ca1de603e216bf6f60eff1ab9d72665 Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Wed, 10 Sep 2025 22:59:37 +0000 Subject: [PATCH 074/693] Build fbgemm_gpu for TORCH_CUDA_ARCH_LIST=10.0 and CUDA 12.8 and 12.9 (#162544) ## Summary - pytorch is not built for *a variants of SM architectures, due to non-portability. However, we need fbgemm_gpu kernels built for sm100a (see #162209) ## Changes - **Setting USE_FBGEMM_GENAI for CUDA builds**: fbgemm_gpu builds for sm100a if using CUDA 12.8 or 12.9 ([source](https://github.com/pytorch/FBGEMM/blob/2033a0a08fbc08f83a1c8da5717546407f9bd972/.github/scripts/nova_dir.bash#L29-L32)), so I follow the same rule here. - **Extra nvcc flags**: if USE_FBGEMM_GENAI and USE_CUDA are set, we add extra nvcc flags for sm100a ## Test plan Test build: ``` echo $CUDA_HOME /usr/local/cuda-12.9 export TORCH_CUDA_ARCH_LIST=10.0 python -m pip install --no-build-isolation -v -e . ``` Check build logs: ``` CMake Warning at CMakeLists.txt:901 (message): Setting USE_FBGEMM_GENAI to ON, doing CUDA build for SM100a ``` Run unit tests: - `pytest test/test_matmul_cuda.py -k test_mxfp8_scaled_grouped_mm` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162544 Approved by: https://github.com/drisspg --- CMakeLists.txt | 6 +++--- aten/src/ATen/CMakeLists.txt | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 105e0db67ecd1..d367b078604e7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -902,9 +902,9 @@ IF(USE_FBGEMM_GENAI AND USE_ROCM AND NOT "gfx942" IN_LIST PYTORCH_ROCM_ARCH) set(USE_FBGEMM_GENAI off) endif() -# Set USE_FBGEMM_GENAI to ON for CUDA build on SM100 -if(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0a") - message(WARNING "Setting USE_FBGEMM_GENAI to ON for CUDA build on SM100") +# Set USE_FBGEMM_GENAI to ON for CUDA build on SM100. +if(USE_CUDA AND "$ENV{TORCH_CUDA_ARCH_LIST}" MATCHES "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) + message(STATUS "Setting USE_FBGEMM_GENAI to ON, doing CUDA build for SM100a") set(USE_FBGEMM_GENAI ON) endif() diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt index 6c095680733fe..aa250c8b7fae9 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt @@ -265,6 +265,14 @@ IF(USE_FBGEMM_GENAI) "${FBGEMM_GENAI_SRCS}/cutlass_extensions/**/*.cu") list(FILTER fbgemm_genai_native_cuda_cu INCLUDE REGEX ${FBGEMM_CUTLASS_KERNELS_REGEX}) + # PyTorch is not built for 10.0a in CI, due to lack of portability, + # so we need to explicitly build these files for 10.0a. + foreach(cu_file ${fbgemm_genai_native_cuda_cu}) + _BUILD_FOR_ADDITIONAL_ARCHS( + "${cu_file}" + "100a") + endforeach() + file(GLOB_RECURSE fbgemm_genai_native_cuda_cpp "${FBGEMM_GENAI_SRCS}/common/*.cpp" ) From 36338fc7f22a65d3c4b53b3933e22ce13ae4ce4f Mon Sep 17 00:00:00 2001 From: Ben Niu Date: Wed, 10 Sep 2025 23:16:58 +0000 Subject: [PATCH 075/693] Relax fences for intrusive ptr's refcnt (#162072) Summary: Relax fences for intrusive ptr's refcnt dec op for performance testing. lock needs acquire when the op succeeds and relaxed if the op is not. In addition, the expire call and the following refcnt reads were merged to remove one extra read. incref does not need any fences because the caller should already have a valid reference. use_count follows the same reasoning. decref only needs a release fence to make sure every write op prior to it has finished. When the refcnt goes to zero, there should be a acquire fence to make sure no read op reads stale data before the object is destructed. However, microbenchmark showed that the optimal fence for decref is not performing noticeably better than the current decref with acq-rel, so we keep decref as-is. This change should have no material impact on x86, but for Arm64 (and other CPUs with weak memory models), it should boost performance. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162072 Approved by: https://github.com/swolchok, https://github.com/yfeldblum --- c10/util/intrusive_ptr.h | 41 ++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/c10/util/intrusive_ptr.h b/c10/util/intrusive_ptr.h index 288b19df0a6c8..de81d4c1b7df3 100644 --- a/c10/util/intrusive_ptr.h +++ b/c10/util/intrusive_ptr.h @@ -196,20 +196,25 @@ TTarget* assign_ptr_(TTarget* rhs) { } } -// Increment needs to be acquire-release to make use_count() and -// unique() reliable. +// The only requirement for refcount increment is that it happens-before +// decrement, so no additional memory ordering is needed. inline uint32_t atomic_refcount_increment(std::atomic& refcount) { - return refcount.fetch_add(1, std::memory_order_acq_rel) + 1; + return refcount.fetch_add(1, std::memory_order_relaxed) + 1; } -// weak_use_count() is only used for testing, so we don't need it to -// be reliable. Relaxed should be fine. inline uint32_t atomic_weakcount_increment(std::atomic& weakcount) { return weakcount.fetch_add(1, std::memory_order_relaxed) + 1; } -// Both decrements need to be acquire-release for correctness. See -// e.g. std::shared_ptr implementation. +// The requirement is that all modifications to the managed object happen-before +// invocation of the managed object destructor, and that allocation of the +// managed object storage happens-before deallocation of the storage. +// +// To get this ordering, all non-final decrements must synchronize-with the +// final decrement. So all non-final decrements have to store-release while the +// final decrement has to load-acquire, either directly or with the help of +// fences. But it's easiest just to have all decrements be acq-rel. And it turns +// out, on modern architectures and chips, it's also fastest. inline uint32_t atomic_refcount_decrement(std::atomic& refcount) { return refcount.fetch_sub(1, std::memory_order_acq_rel) - 1; } @@ -332,7 +337,7 @@ class intrusive_ptr final { intrusive_ptr() noexcept : intrusive_ptr(NullType::singleton(), raw::DontIncreaseRefcount{}) {} - intrusive_ptr(std::nullptr_t) noexcept + /* implicit */ intrusive_ptr(std::nullptr_t) noexcept : intrusive_ptr(NullType::singleton(), raw::DontIncreaseRefcount{}) {} // This constructor will not increase the ref counter for you. @@ -445,14 +450,14 @@ class intrusive_ptr final { if (target_ == NullType::singleton()) { return 0; } - return target_->refcount_.load(std::memory_order_acquire); + return target_->refcount_.load(std::memory_order_relaxed); } uint32_t weak_use_count() const noexcept { if (target_ == NullType::singleton()) { return 0; } - return target_->weakcount_.load(std::memory_order_acquire); + return target_->weakcount_.load(std::memory_order_relaxed); } bool unique() const noexcept { @@ -851,14 +856,14 @@ class weak_intrusive_ptr final { return 0; } return target_->refcount_.load( - std::memory_order_acquire); // refcount, not weakcount! + std::memory_order_relaxed); // refcount, not weakcount! } uint32_t weak_use_count() const noexcept { if (target_ == NullType::singleton()) { return 0; } - return target_->weakcount_.load(std::memory_order_acquire); + return target_->weakcount_.load(std::memory_order_relaxed); } bool expired() const noexcept { @@ -866,18 +871,22 @@ class weak_intrusive_ptr final { } intrusive_ptr lock() const noexcept { - if (expired()) { + if (target_ == NullType::singleton()) { return intrusive_ptr(); } else { - auto refcount = target_->refcount_.load(std::memory_order_seq_cst); + auto refcount = target_->refcount_.load(std::memory_order_relaxed); do { if (refcount == 0) { // Object already destructed, no strong references left anymore. // Return nullptr. return intrusive_ptr(); } - } while ( - !target_->refcount_.compare_exchange_weak(refcount, refcount + 1)); + } while (!target_->refcount_.compare_exchange_weak( + refcount, + refcount + 1, + std::memory_order_acquire, + std::memory_order_relaxed)); + return intrusive_ptr( target_, raw::DontIncreaseRefcount{}); } From bb1d53bc47109c7c97e5fa072280d05b04e023e5 Mon Sep 17 00:00:00 2001 From: Ting Lu Date: Thu, 11 Sep 2025 00:03:47 +0000 Subject: [PATCH 076/693] [CD] CUDA 13 specific followup changes (#162455) Follow up for CUDA 13 bring up https://github.com/pytorch/pytorch/issues/159779 sm50-70 should not be added to sbsa build arch list, as previous archs had no support for arm. remove platform_machine from PYTORCH_EXTRA_INSTALL_REQUIREMENTS Pull Request resolved: https://github.com/pytorch/pytorch/pull/162455 Approved by: https://github.com/atalman --- .ci/aarch64_linux/aarch64_ci_build.sh | 7 +- .ci/aarch64_linux/aarch64_wheel_ci_build.py | 7 +- .../scripts/generate_binary_build_matrix.py | 90 +++++++++---------- ...linux-aarch64-binary-manywheel-nightly.yml | 42 ++++----- .../generated-linux-binary-manywheel-main.yml | 2 +- ...nerated-linux-binary-manywheel-nightly.yml | 42 ++++----- 6 files changed, 95 insertions(+), 95 deletions(-) diff --git a/.ci/aarch64_linux/aarch64_ci_build.sh b/.ci/aarch64_linux/aarch64_ci_build.sh index 9878c4afa3bfb..a0eb0b72df2b3 100644 --- a/.ci/aarch64_linux/aarch64_ci_build.sh +++ b/.ci/aarch64_linux/aarch64_ci_build.sh @@ -5,9 +5,9 @@ GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-} # Set CUDA architecture lists to match x86 build_cuda.sh if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then - export TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;8.0;9.0" + export TORCH_CUDA_ARCH_LIST="8.0;9.0" elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then - export TORCH_CUDA_ARCH_LIST="7.0;8.0;9.0;10.0;12.0" + export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0" elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX" fi @@ -41,9 +41,6 @@ else echo "Bundling CUDA libraries with wheel for aarch64." else echo "Using nvidia libs from pypi for aarch64." - # Fix platform constraints in PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64 - # Replace 'platform_machine == "x86_64"' with 'platform_machine == "aarch64"' - export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS//platform_machine == \'x86_64\'/platform_machine == \'aarch64\'}" echo "Updated PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64: $PYTORCH_EXTRA_INSTALL_REQUIREMENTS" export USE_NVIDIA_PYPI_LIBS=1 fi diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py index dc75516fe1294..b7a895fe05869 100755 --- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py +++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py @@ -170,7 +170,8 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None: ] # CUDA version-specific libraries - if "130" in desired_cuda: + if "13" in desired_cuda: + minor_version = desired_cuda[-1] version_specific_libs = [ "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13", "/usr/local/cuda/lib64/libcublas.so.13", @@ -180,7 +181,7 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None: "/usr/local/cuda/lib64/libcusolver.so.12", "/usr/local/cuda/lib64/libnvJitLink.so.13", "/usr/local/cuda/lib64/libnvrtc.so.13", - "/usr/local/cuda/lib64/libnvrtc-builtins.so.13.0", + f"/usr/local/cuda/lib64/libnvrtc-builtins.so.13.{minor_version}", ] elif "12" in desired_cuda: # Get the last character for libnvrtc-builtins version (e.g., "129" -> "9") @@ -196,6 +197,8 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None: "/usr/local/cuda/lib64/libnvrtc.so.12", f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}", ] + else: + raise ValueError(f"Unsupported CUDA version: {desired_cuda}.") # Combine all libraries libs_to_copy = common_libs + version_specific_libs diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index a3e65b340f649..e57c2d5ef0749 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -43,55 +43,55 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = { "12.6": ( - "nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'" + "nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | " + "nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | " + "nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | " + "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | " + "nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | " + "nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | " + "nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | " + "nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | " + "nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | " + "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " + "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | " + "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | " + "nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | " + "nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | " + "nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux'" ), "12.8": ( - "nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'" + "nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | " + "nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | " + "nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | " + "nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | " + "nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | " + "nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | " + "nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | " + "nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | " + "nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | " + "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | " + "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | " + "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | " + "nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | " + "nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | " + "nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux'" ), "13.0": ( - "nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | " - "nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'" + "nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | " + "nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | " + "nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | " + "nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | " + "nvidia-cublas==13.0.0.19; platform_system == 'Linux' | " + "nvidia-cufft==12.0.0.15; platform_system == 'Linux' | " + "nvidia-curand==10.4.0.35; platform_system == 'Linux' | " + "nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | " + "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | " + "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | " + "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | " + "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | " + "nvidia-nvtx==13.0.39; platform_system == 'Linux' | " + "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | " + "nvidia-cufile==1.15.0.42; platform_system == 'Linux'" ), "xpu": ( "intel-cmplr-lib-rt==2025.2.1 | " diff --git a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml index 8bbcf1138e46d..8a3c0840f8430 100644 --- a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml @@ -132,7 +132,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_10-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -178,7 +178,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_10-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -224,7 +224,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_10-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -335,7 +335,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_11-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -381,7 +381,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_11-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -427,7 +427,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_11-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -538,7 +538,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_12-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -584,7 +584,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_12-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -630,7 +630,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_12-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -741,7 +741,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -787,7 +787,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -833,7 +833,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -944,7 +944,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13t-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -990,7 +990,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13t-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1036,7 +1036,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_13t-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1147,7 +1147,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1193,7 +1193,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1239,7 +1239,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1350,7 +1350,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14t-cuda-aarch64-12_6 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1396,7 +1396,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14t-cuda-aarch64-12_8 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} @@ -1442,7 +1442,7 @@ jobs: ALPINE_IMAGE: "arm64v8/alpine" build_name: manywheel-py3_14t-cuda-aarch64-13_0 build_environment: linux-aarch64-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' timeout-minutes: 420 secrets: github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/generated-linux-binary-manywheel-main.yml b/.github/workflows/generated-linux-binary-manywheel-main.yml index a33d84c057cc8..96b9f9f739f72 100644 --- a/.github/workflows/generated-linux-binary-manywheel-main.yml +++ b/.github/workflows/generated-linux-binary-manywheel-main.yml @@ -60,7 +60,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_12-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_12-cuda12_8-test: # Testing diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml index 1fa68ad32f81b..0f87f97df694d 100644 --- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml @@ -127,7 +127,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_10-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_10-cuda12_6-test: # Testing @@ -193,7 +193,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_10-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_10-cuda12_8-test: # Testing @@ -259,7 +259,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_10-cuda13_0 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_10-cuda13_0-test: # Testing @@ -719,7 +719,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_11-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_11-cuda12_6-test: # Testing @@ -785,7 +785,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_11-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_11-cuda12_8-test: # Testing @@ -851,7 +851,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_11-cuda13_0 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_11-cuda13_0-test: # Testing @@ -1311,7 +1311,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_12-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_12-cuda12_6-test: # Testing @@ -1377,7 +1377,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_12-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_12-cuda12_8-test: # Testing @@ -1443,7 +1443,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_12-cuda13_0 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_12-cuda13_0-test: # Testing @@ -1903,7 +1903,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13-cuda12_6-test: # Testing @@ -1969,7 +1969,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13-cuda12_8-test: # Testing @@ -2035,7 +2035,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13-cuda13_0 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13-cuda13_0-test: # Testing @@ -2495,7 +2495,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13t-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13t-cuda12_6-test: # Testing @@ -2561,7 +2561,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13t-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13t-cuda12_8-test: # Testing @@ -2627,7 +2627,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13t-cuda13_0 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13t-cuda13_0-test: # Testing @@ -3087,7 +3087,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_14-cuda12_6-test: # Testing @@ -3153,7 +3153,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_14-cuda12_8-test: # Testing @@ -3219,7 +3219,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14-cuda13_0 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_14-cuda13_0-test: # Testing @@ -3679,7 +3679,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14t-cuda12_6 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.6.77; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.6.80; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.6.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.0.4; platform_system == 'Linux' | nvidia-curand-cu12==10.3.7.77; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.1.2; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' | nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_14t-cuda12_6-test: # Testing @@ -3745,7 +3745,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14t-cuda12_8 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.8.93; platform_system == 'Linux' | nvidia-cuda-runtime-cu12==12.8.90; platform_system == 'Linux' | nvidia-cuda-cupti-cu12==12.8.90; platform_system == 'Linux' | nvidia-cudnn-cu12==9.10.2.21; platform_system == 'Linux' | nvidia-cublas-cu12==12.8.4.1; platform_system == 'Linux' | nvidia-cufft-cu12==11.3.3.83; platform_system == 'Linux' | nvidia-curand-cu12==10.3.9.90; platform_system == 'Linux' | nvidia-cusolver-cu12==11.7.3.90; platform_system == 'Linux' | nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' | nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' | nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' | nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' | nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' | nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' | nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_14t-cuda12_8-test: # Testing @@ -3811,7 +3811,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_14t-cuda13_0 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' | nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' | nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' | nvidia-cudnn-cu13==9.13.0.50; platform_system == 'Linux' | nvidia-cublas==13.0.0.19; platform_system == 'Linux' | nvidia-cufft==12.0.0.15; platform_system == 'Linux' | nvidia-curand==10.4.0.35; platform_system == 'Linux' | nvidia-cusolver==12.0.3.29; platform_system == 'Linux' | nvidia-cusparse==12.6.2.49; platform_system == 'Linux' | nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' | nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' | nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' | nvidia-nvtx==13.0.39; platform_system == 'Linux' | nvidia-nvjitlink==13.0.39; platform_system == 'Linux' | nvidia-cufile==1.15.0.42; platform_system == 'Linux' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_14t-cuda13_0-test: # Testing From 4fd2a2b2733283130087d90c17f3c16df5c22811 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 11 Sep 2025 00:20:31 +0000 Subject: [PATCH 077/693] Add cuda headers automatically for compile_kernel (#162634) Issue was pointed out before by @ngimel and more recently by https://gau-nernst.github.io/nvrtc-matmul/#missing-cuda-and-c-headers- by @gau-nernst Benefit is now we can add `#include ` without crapping out Pull Request resolved: https://github.com/pytorch/pytorch/pull/162634 Approved by: https://github.com/ngimel --- test/test_cuda.py | 33 +++++++++++++++++++++++++++++++++ torch/cuda/_utils.py | 7 +++++++ 2 files changed, 40 insertions(+) diff --git a/test/test_cuda.py b/test/test_cuda.py index 6b4e4c371098c..d53e41b2fb84e 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -7023,6 +7023,39 @@ def _(input_tensor, scalar): expected = input_data + scalar_val torch.testing.assert_close(result, expected, rtol=1e-5, atol=1e-5) + @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") + @unittest.skipIf(not TEST_CUDA, "No CUDA") + def test_compile_kernel_cuda_headers(self): + """Test that kernels can include and use CUDA headers like cuda_fp16.h.""" + kernel_source = """ + #include + + extern "C" + __global__ void half_precision_kernel(__half* output, float input_value, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) { + output[idx] = __float2half(input_value); + } + } + """ + + from torch.cuda import _compile_kernel + + compiled_kernel = _compile_kernel(kernel_source, "half_precision_kernel") + + n = 100 + test_value = 3.14159 + output = torch.zeros(n, device="cuda", dtype=torch.float16) + + compiled_kernel( + grid=(1, 1, 1), + block=(256, 1, 1), + args=[output, test_value, n], + ) + + expected = torch.full((n,), test_value, device="cuda", dtype=torch.float16) + torch.testing.assert_close(output, expected, rtol=1e-3, atol=1e-3) + @unittest.skipIf(not TEST_CUDA, "CUDA not available, skipping tests") class TestCudaDeviceParametrized(TestCase): diff --git a/torch/cuda/_utils.py b/torch/cuda/_utils.py index 5fdcd65ddf7b7..af3927f337286 100644 --- a/torch/cuda/_utils.py +++ b/torch/cuda/_utils.py @@ -104,6 +104,13 @@ def check_nvrtc(result: int) -> None: options = [] options.append(f"--gpu-architecture=sm_{compute_capability}".encode()) + # Auto-detect and add CUDA include paths + from torch.utils.cpp_extension import include_paths + + cuda_include_paths = include_paths("cuda") + for cuda_path in cuda_include_paths: + options.append(f"-I{cuda_path}".encode()) + # Add custom include directories if cuda_include_dirs: for directory in cuda_include_dirs: From da5069f2892a4019e8a34ae241f20dad0e0b2874 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Wed, 10 Sep 2025 16:28:59 -0400 Subject: [PATCH 078/693] Don't include cuh header when USE_NVSHMEM is off (#162635) Signed-off-by: Edward Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/162635 Approved by: https://github.com/kwen2501 --- torch/csrc/distributed/c10d/init.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp index 0189326683585..a5270354cf61d 100644 --- a/torch/csrc/distributed/c10d/init.cpp +++ b/torch/csrc/distributed/c10d/init.cpp @@ -48,7 +48,10 @@ #include #include #include + +#ifdef USE_NVSHMEM #include +#endif #include #include From 612cdc8f4868a405860ea5f08fb8b4e707327a1e Mon Sep 17 00:00:00 2001 From: dolpm <34420038+dolpm@users.noreply.github.com> Date: Thu, 11 Sep 2025 00:35:53 +0000 Subject: [PATCH 079/693] -ldl for nativert tests (#162643) Fixes #162640 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162643 Approved by: https://github.com/yiming0416, https://github.com/robert-hardwick --- test/cpp/nativert/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/cpp/nativert/CMakeLists.txt b/test/cpp/nativert/CMakeLists.txt index 91605c0933d2c..524b7a82d960c 100644 --- a/test/cpp/nativert/CMakeLists.txt +++ b/test/cpp/nativert/CMakeLists.txt @@ -63,6 +63,7 @@ target_compile_definitions(test_nativert PRIVATE USE_GTEST) set(NATIVERT_TEST_DEPENDENCIES torch gtest_main) +target_link_libraries(test_nativert PRIVATE ${CMAKE_DL_LIBS}) target_link_libraries(test_nativert PRIVATE ${NATIVERT_TEST_DEPENDENCIES}) target_link_libraries(test_nativert PRIVATE fmt::fmt-header-only) target_include_directories(test_nativert PRIVATE ${ATen_CPU_INCLUDE}) From 435c18fb4aa7ef6f33c3329fcb8f93f1dc2a89c6 Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Wed, 10 Sep 2025 14:54:45 -0700 Subject: [PATCH 080/693] [DTensor] add op support for aten.unbind.int (#162560) As titled. It seems unbind returns views of the original tensor. E.g. see https://stackoverflow.com/questions/78910951/does-unbind-return-the-views-of-tensors-in-pytorch So we error out when `shard_dim == unbind_dim`. This is similar to why we error out in view ops. https://github.com/pytorch/pytorch/blob/main/torch/distributed/tensor/_ops/_view_ops.py#L544-L546 This PR also refactors some other tensor ops code, by creating two utils function `shift_shard_dims_after_insert`, `shift_shard_dims_after_remove`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162560 Approved by: https://github.com/zpcore --- test/distributed/tensor/test_tensor_ops.py | 32 +++++++ torch/distributed/tensor/_ops/_tensor_ops.py | 92 ++++++++++++-------- torch/distributed/tensor/_ops/utils.py | 24 +++++ 3 files changed, 111 insertions(+), 37 deletions(-) diff --git a/test/distributed/tensor/test_tensor_ops.py b/test/distributed/tensor/test_tensor_ops.py index b41e8f53b1369..eaa1969068c1f 100644 --- a/test/distributed/tensor/test_tensor_ops.py +++ b/test/distributed/tensor/test_tensor_ops.py @@ -1,6 +1,8 @@ # Copyright (c) Meta Platforms, Inc. and affiliates # Owner(s): ["oncall: distributed"] +import itertools + import torch from torch.distributed.tensor import ( DeviceMesh, @@ -789,6 +791,36 @@ def _test_split_on_partial(self, reduce_op: str, split_size: int, split_dim: int dim=split_dim, ) + @with_comms + def test_unbind(self): + device_mesh = self.build_device_mesh() + shard_dims = [0, 1] + unbind_dims = [0, 1] + local_tensor = torch.randn(4, 8, requires_grad=True) + for shard_dim, unbind_dim in itertools.product(shard_dims, unbind_dims): + dist_tensor = distribute_tensor( + local_tensor, device_mesh, (Shard(shard_dim),) + ) + + if shard_dim == unbind_dim: + with self.assertRaisesRegex( + RuntimeError, "Sharding propagation failed" + ): + dist_tensor.unbind(dim=unbind_dim) + else: + unbinded_dist_tensors = dist_tensor.unbind(dim=unbind_dim) + new_shard_dim = shard_dim if shard_dim < unbind_dim else shard_dim - 1 + self.assertTrue( + all( + elem.placements[0].is_shard(dim=new_shard_dim) + for elem in unbinded_dist_tensors + ) + ) + for x, y in zip( + unbinded_dist_tensors, local_tensor.unbind(dim=unbind_dim) + ): + self.assertEqual(x.full_tensor(), y) + if __name__ == "__main__": run_tests() diff --git a/torch/distributed/tensor/_ops/_tensor_ops.py b/torch/distributed/tensor/_ops/_tensor_ops.py index 0e62b817477c7..a94c68c58739d 100644 --- a/torch/distributed/tensor/_ops/_tensor_ops.py +++ b/torch/distributed/tensor/_ops/_tensor_ops.py @@ -27,6 +27,8 @@ normalize_dim, register_op_strategy, register_prop_rule, + shift_shard_dims_after_insert, + shift_shard_dims_after_remove, ) from torch.distributed.tensor.placement_types import ( Partial, @@ -309,16 +311,11 @@ def select_int_strategy(op_schema: OpSchema) -> StrategyType: output_specs = input_specs if input_specs.is_sharded(): # handle cases with sharded_dim != selected_dim - output_spec_placements = [] - for placement in input_specs.placements: - if placement.is_shard(): - shard_dim = cast(Shard, placement).dim - if shard_dim > selected_dim: - shard_dim -= 1 - placement = Shard(dim=shard_dim) - output_spec_placements.append(placement) + output_placements = shift_shard_dims_after_remove( + input_specs.placements, selected_dim + ) output_specs = DTensorSpec( - arg_spec.mesh, placements=tuple(output_spec_placements) + arg_spec.mesh, placements=tuple(output_placements) ) select_strategy.strategies.append( @@ -343,19 +340,10 @@ def select_backward_strategy(op_schema: OpSchema) -> OpStrategy: output_strategies: list[OpSpec] = [] for placement_strategy in input_strategy.strategies: input_spec = placement_strategy.output_spec - output_spec_placements: list[Placement] = [] - for placement in input_spec.placements: - if isinstance(placement, Shard): - shard_dim = placement.dim - if shard_dim >= dim: - # NOTE: shard_dim is guaranteed to exist because - # grad_input has one more dim than grad_output - output_spec_placements.append(Shard(shard_dim + 1)) - else: - output_spec_placements.append(Shard(shard_dim)) - else: - output_spec_placements.append(placement) - output_specs = DTensorSpec(input_spec.mesh, tuple(output_spec_placements)) + # NOTE: shard_dim is guaranteed to exist because + # grad_input has one more dim than grad_output + output_placements = shift_shard_dims_after_insert(input_spec.placements, dim) + output_specs = DTensorSpec(input_spec.mesh, tuple(output_placements)) output_strategies.append( OpSpec(output_specs=output_specs, input_specs=(input_spec,)) ) @@ -724,20 +712,6 @@ def merge_placement( return follow_placements -def normalize_shard_for_stack( - placements: Sequence[Placement], insert_dim: int = 0 -) -> Sequence[Placement]: - # stack op would "insert" new dim, so all sharded dim >= the inserted dim need to - # be normalized with the new Shard placement - normalized_placements: list[Placement] = [] - for placement in placements: - if isinstance(placement, Shard) and placement.dim >= insert_dim: - normalized_placements.append(Shard(placement.dim + 1)) - else: - normalized_placements.append(placement) - return normalized_placements - - @register_op_strategy(aten.stack.default, RuntimeSchemaInfo(1, needs_pytree=True)) def stack_strategy(op_schema: OpSchema) -> StrategyType: args_schema = op_schema.args_schema @@ -764,7 +738,9 @@ def stack_strategy(op_schema: OpSchema) -> StrategyType: for _ in range(len(input_tuple_strategy.children)) ) - follow_placements = normalize_shard_for_stack(follow_placements, dim) + # stack op would "insert" new dim, so all sharded dim >= the inserted dim need to + # be normalized with the new Shard placement + follow_placements = shift_shard_dims_after_insert(follow_placements, dim) for strategy in input_tuple_strategy.children: assert isinstance(strategy, OpStrategy) @@ -1167,3 +1143,45 @@ def size_split(N, i) -> list: ) return OpStrategy(all_strategies) + + +# TODO: fix remaining failures in xfail("unbind") in test_dtensor_ops.py +# and remove this xfail item +@register_op_strategy(aten.unbind.int, schema_info=RuntimeSchemaInfo(1)) +def gen_unbind_strategy(op_schema: OpSchema) -> StrategyType: + """Forward all shardings except the unbind dimension.""" + input_strategy = op_schema.args_schema[0] + assert isinstance(input_strategy, OpStrategy) + input_ndim = input_strategy.ndim + input_shape = input_strategy.shape + unbind_dim = ( + cast(int, op_schema.args_schema[1]) if len(op_schema.args_schema) > 1 else 0 + ) + unbind_dim = normalize_dim(unbind_dim, input_ndim) + + mesh = input_strategy.mesh + unbind_strategy = OpStrategy([]) + for arg_strategy in input_strategy.strategies: + arg_spec = arg_strategy.output_spec + if is_tensor_dim_sharded(arg_spec, dim=unbind_dim): + raise RuntimeError( + f"Attempted to unbind along the sharded dimension {unbind_dim}. ", + "It cannot be performed without redistribution, which is disallowed " + "by the current operator.", + ) + # only add the strategy if the unbind dim is not sharded + output_placements = shift_shard_dims_after_remove( + arg_spec.placements, unbind_dim + ) + output_specs = tuple( + DTensorSpec(mesh, tuple(output_placements)) + for _ in range(input_shape[unbind_dim]) + ) + unbind_strategy.strategies.append( + OpSpec( + output_specs=output_specs, + input_specs=(arg_spec,), + redistribute_cost=[[0.0] * len(input_strategy.strategies)], + ) + ) + return unbind_strategy diff --git a/torch/distributed/tensor/_ops/utils.py b/torch/distributed/tensor/_ops/utils.py index fb6f8a8ba8108..2d05b62aef44f 100644 --- a/torch/distributed/tensor/_ops/utils.py +++ b/torch/distributed/tensor/_ops/utils.py @@ -370,3 +370,27 @@ def expand_to_full_mesh_op_strategy( ) all_strategies.append(strategy) return OpStrategy(all_strategies) + + +def shift_shard_dims_after_insert( + placements: Sequence[Placement], insert_dim: int = 0 +) -> Sequence[Placement]: + normalized_placements: list[Placement] = [] + for placement in placements: + if isinstance(placement, Shard) and placement.dim >= insert_dim: + normalized_placements.append(Shard(placement.dim + 1)) + else: + normalized_placements.append(placement) + return normalized_placements + + +def shift_shard_dims_after_remove( + placements: Sequence[Placement], remove_dim: int = 0 +) -> Sequence[Placement]: + normalized_placements: list[Placement] = [] + for placement in placements: + if isinstance(placement, Shard) and placement.dim > remove_dim: + normalized_placements.append(Shard(placement.dim - 1)) + else: + normalized_placements.append(placement) + return normalized_placements From f17c5e0789d454bc451c0b04494a18d2c9e3554f Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Wed, 10 Sep 2025 20:06:50 +0000 Subject: [PATCH 081/693] [inductor] Add shape for store_output in matmul templates (#162426) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162426 Approved by: https://github.com/eellison --- torch/_inductor/fx_passes/b2b_gemm.py | 4 ++-- torch/_inductor/kernel/bmm.py | 2 +- torch/_inductor/kernel/conv.py | 4 ++-- .../kernel/flex/templates/flex_decode.py.jinja | 2 +- torch/_inductor/kernel/mm.py | 8 ++++---- torch/_inductor/kernel/mm_grouped.py | 4 ++-- torch/_inductor/kernel/mm_plus_mm.py | 2 +- torch/_inductor/select_algorithm.py | 10 ++++++++-- 8 files changed, 21 insertions(+), 15 deletions(-) diff --git a/torch/_inductor/fx_passes/b2b_gemm.py b/torch/_inductor/fx_passes/b2b_gemm.py index ff434ccba0952..a87c86fe9e52f 100644 --- a/torch/_inductor/fx_passes/b2b_gemm.py +++ b/torch/_inductor/fx_passes/b2b_gemm.py @@ -123,7 +123,7 @@ def b2b_gemm_grid(M, P, meta, *, cdiv): idx_p = offs_p[None, :] out_mask = (idx_m < M) & (idx_p < P) - {{store_output(("idx_m", "idx_p"), "acc", "out_mask")}} + {{store_output(("idx_m", "idx_p"), "acc", "out_mask", val_shape=("BLOCK_SIZE_M", "BLOCK_SIZE_P"))}} """, ) @@ -205,7 +205,7 @@ def b2b_gemm_grid(M, P, meta, *, cdiv): idx_p = offs_p[None, :] out_mask = (idx_m < M) & (idx_p < P) - {{store_output(("idx_m", "idx_p"), "acc", "out_mask")}} + {{store_output(("idx_m", "idx_p"), "acc", "out_mask", val_shape=("BLOCK_SIZE_M", "BLOCK_SIZE_P"))}} """, ) diff --git a/torch/_inductor/kernel/bmm.py b/torch/_inductor/kernel/bmm.py index 42cd742fa2928..20d101b951c09 100644 --- a/torch/_inductor/kernel/bmm.py +++ b/torch/_inductor/kernel/bmm.py @@ -108,7 +108,7 @@ def bmm_grid(b, m, n, meta, *, cdiv): mask = (idx_m < M) & (idx_n < N) # inductor generates a suffix - {{store_output(("idx_q", "idx_m", "idx_n"), "acc", "mask")}} + {{store_output(("idx_q", "idx_m", "idx_n"), "acc", "mask", val_shape=("BLOCK_M", "BLOCK_N"))}} """, cache_codegen_enabled_for_template=True, ) diff --git a/torch/_inductor/kernel/conv.py b/torch/_inductor/kernel/conv.py index c929299cc7951..d6e802d00aaad 100644 --- a/torch/_inductor/kernel/conv.py +++ b/torch/_inductor/kernel/conv.py @@ -180,7 +180,7 @@ def conv3d_grid(n, c, d, h, w, meta, *, cdiv): idx_w = idx_y_w[:, None] # inductor generates a suffix - {{store_output(("idx_n", "idx_c", "idx_h", "idx_w"), "acc", "mask")}} + {{store_output(("idx_n", "idx_c", "idx_h", "idx_w"), "acc", "mask", val_shape=("BLOCK_M", "BLOCK_N"))}} """, ) @@ -318,7 +318,7 @@ def conv3d_grid(n, c, d, h, w, meta, *, cdiv): idx_w = idx_y_w[:, None] # inductor generates a suffix - {{store_output(("idx_n", "idx_c", "idx_d", "idx_h", "idx_w"), "acc", "mask")}} + {{store_output(("idx_n", "idx_c", "idx_d", "idx_h", "idx_w"), "acc", "mask", val_shape=("BLOCK_M", "BLOCK_N"))}} """, ) diff --git a/torch/_inductor/kernel/flex/templates/flex_decode.py.jinja b/torch/_inductor/kernel/flex/templates/flex_decode.py.jinja index 57adc1cd69d63..e5f0e118c5631 100644 --- a/torch/_inductor/kernel/flex/templates/flex_decode.py.jinja +++ b/torch/_inductor/kernel/flex/templates/flex_decode.py.jinja @@ -239,4 +239,4 @@ mask = (idx_m < Q_LEN) & (idx_d < V_HEAD_DIM) acc = acc.reshape(G, BLOCK_M_PER_HQ, V_HEAD_DIM) - {{store_output(("idx_z", "idx_t", "idx_hq", "idx_m", "idx_d"), "acc", "mask")}} + {{store_output(("idx_z", "idx_t", "idx_hq", "idx_m", "idx_d"), "acc", "mask", val_shape=("GQA_SHARED_HEADS", "BLOCK_M_PER_HQ", "V_HEAD_DIM"))}} diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 3f54854827c6d..202c9bd8638e3 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -134,7 +134,7 @@ mask = (idx_m < M) & (idx_n < N) # inductor generates a suffix - {{store_output(("idx_m", "idx_n"), "acc", "mask")}} + {{store_output(("idx_m", "idx_n"), "acc", "mask", val_shape=("BLOCK_M", "BLOCK_N"))}} """ if (torch.version.hip is None) or triton_version >= "3.3.0" # FIXME: To get around rocm failures like https://github.com/pytorch/pytorch/actions/runs/13123783322/job/36617154943 @@ -209,7 +209,7 @@ mask = (idx_m < M) & (idx_n < N) # inductor generates a suffix - {{store_output(("idx_m", "idx_n"), "acc", "mask")}} + {{store_output(("idx_m", "idx_n"), "acc", "mask", val_shape=("BLOCK_M", "BLOCK_N"))}} """ ), cache_codegen_enabled_for_template=True, @@ -344,7 +344,7 @@ mask = (idx_m < M) & (idx_n < N) # inductor generates a suffix - {{store_output(("idx_m", "idx_n"), "acc", "mask", indent_width=12)}} + {{store_output(("idx_m", "idx_n"), "acc", "mask", indent_width=12, val_shape=("BLOCK_M", "BLOCK_N"))}} acc = tl.zeros((BLOCK_M, BLOCK_N), dtype=ACC_TYPE) """, @@ -535,7 +535,7 @@ def apply_scaling( idx_n = offs_cn[None, :] mask = (idx_m < M) & (idx_n < N) # inductor generates a suffix - {{store_output(("idx_m", "idx_n"), "accumulator", "mask", indent_width=12)}} + {{store_output(("idx_m", "idx_n"), "accumulator", "mask", indent_width=12, val_shape=("BLOCK_M", "BLOCK_N"))}} accumulator = tl.zeros((BLOCK_M, BLOCK_N), dtype=tl.float32) """ diff --git a/torch/_inductor/kernel/mm_grouped.py b/torch/_inductor/kernel/mm_grouped.py index 6508146fa49af..c25da722a7b4a 100644 --- a/torch/_inductor/kernel/mm_grouped.py +++ b/torch/_inductor/kernel/mm_grouped.py @@ -389,9 +389,9 @@ def early_config_prune(g, m, configs, named_args): {%- endif %} mask = (offs_am[:, None] < m_size) & (offs_bn[None, :] < n_size) {%- if M_IS_VARYING or N_IS_VARYING %} - {{store_output(("idx_m", "idx_n"), "c", "mask", indent_width=16)}} + {{store_output(("idx_m", "idx_n"), "c", "mask", indent_width=16, val_shape=("BLOCK_M", "BLOCK_N"))}} {%- else %} - {{store_output(("g", "idx_m", "idx_n"), "c", "mask", indent_width=16)}} + {{store_output(("g", "idx_m", "idx_n"), "c", "mask", indent_width=16, val_shape=("BLOCK_M", "BLOCK_N"))}} {%- endif %} tidx += NUM_SMS diff --git a/torch/_inductor/kernel/mm_plus_mm.py b/torch/_inductor/kernel/mm_plus_mm.py index cf169e81067da..df94e3e5cd7bb 100644 --- a/torch/_inductor/kernel/mm_plus_mm.py +++ b/torch/_inductor/kernel/mm_plus_mm.py @@ -118,7 +118,7 @@ mask = (idx_m < M) & (idx_n < N) # inductor generates a suffix - {{store_output(("idx_m", "idx_n"), "acc", "mask")}} + {{store_output(("idx_m", "idx_n"), "acc", "mask", val_shape=("BLOCK_M", "BLOCK_N"))}} """, cache_codegen_enabled_for_template=True, ) diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py index ac8daee16417a..fad17cc91726b 100644 --- a/torch/_inductor/select_algorithm.py +++ b/torch/_inductor/select_algorithm.py @@ -366,7 +366,7 @@ class TritonTemplateKernel(TritonKernel): def __init__( self, kernel_name, - input_nodes, + input_nodes: tuple[ir.IRNode], output_node, defines, num_stages, @@ -1079,7 +1079,13 @@ def store_output( self.input_nodes[len(self.input_nodes) - self.suffix_args :], ): input_node.freeze_layout() - epilogue_args.append(input_node.make_loader()(index_symbols)) + epilogue_arg = V.kernel.cse.generate( + self.compute, + input_node.make_loader()(index_symbols), + dtype=acc_dtype, + shape=input_node.get_size(), + ) + epilogue_args.append(epilogue_arg) # We update frozen_layouts_cnt in order to replay this function on a cache hit. self.frozen_layouts_cnt += 1 From f654cff5663c1972172f150f529a587fc3c0d2c1 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Wed, 10 Sep 2025 20:06:51 +0000 Subject: [PATCH 082/693] [inductor] Add shape to load_input in matmul templates (#162513) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162513 Approved by: https://github.com/eellison ghstack dependencies: #162426 --- torch/_inductor/codegen/triton.py | 21 +++++++++++++++------ torch/_inductor/kernel/mm.py | 12 ++++++++---- torch/_inductor/select_algorithm.py | 15 ++++++++------- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py index bf22d5ec587a0..7fb6d71cd3620 100644 --- a/torch/_inductor/codegen/triton.py +++ b/torch/_inductor/codegen/triton.py @@ -2046,7 +2046,7 @@ def indexing( self, index: sympy.Expr, *, - copy_shape=None, + copy_shape: Optional[Union[str, tuple[str]]] = None, dense_indexing=False, override_mask=None, block_ptr=False, @@ -2333,9 +2333,18 @@ def match_block_expr() -> Optional[BlockDescriptorOptions]: expand_str = None expand_shape: BlockShapeType = None index_str = self.index_to_str(index) + + def _get_expand_str(): + if copy_shape: + if isinstance(copy_shape, str): + return f"{copy_shape}.shape", None + else: + return "[" + ", ".join(str(c) for c in copy_shape) + "]", copy_shape + else: + return self.dense_size_str(), tuple(self.dense_size_list()) + if isinstance(index, sympy.Integer): - expand_str = f"{copy_shape}.shape" if copy_shape else self.dense_size_str() - expand_shape = None if copy_shape else tuple(self.dense_size_list()) + expand_str, expand_shape = _get_expand_str() index_str = f"tl.full({expand_str}, {index_str}, tl.int32)" if self.fixed_config and not self._has_constant_xmask(): mask_vars = OrderedSet(["xmask"]) @@ -2353,12 +2362,12 @@ def match_block_expr() -> Optional[BlockDescriptorOptions]: ) if need_dense and not have_dense: - expand_str = f"{copy_shape}.shape" if copy_shape else self.dense_size_str() - expand_shape = None if copy_shape else tuple(self.dense_size_list()) + expand_str, expand_shape = _get_expand_str() index_str = f"tl.broadcast_to({index_str}, {expand_str})" mask_vars = dense_mask_vars elif not have_loop_vars and copy_shape: - index_str = f"tl.broadcast_to({index_str}, {copy_shape}.shape)" + expand_shape_str, expand_shape = _get_expand_str() + index_str = f"tl.broadcast_to({index_str}, {expand_shape_str})" mask_vars = dense_mask_vars if expand_shape is None: diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 202c9bd8638e3..24c5c23218ba6 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -114,11 +114,13 @@ idx_m = offs_a_m[:, None] idx_n = a_k_idx_vals - {{load_input("A", "a", ("idx_m", "idx_n"), mask=None if EVEN_K else "a_mask", indent_width=8)}} + {{load_input("A", "a", ("idx_m", "idx_n"), mask=None if EVEN_K else "a_mask", + indent_width=8, index_shape=("BLOCK_M", "BLOCK_K"))}} idx_m = b_k_idx_vals idx_n = offs_b_n[None, :] - {{load_input("B", "b", ("idx_m", "idx_n"), mask=None if EVEN_K else "b_mask", indent_width=8)}} + {{load_input("B", "b", ("idx_m", "idx_n"), mask=None if EVEN_K else "b_mask", + indent_width=8, index_shape=("BLOCK_K", "BLOCK_N"))}} {% if USE_FAST_ACCUM %} acc = tl.dot(a, b, acc, allow_tf32=ALLOW_TF32, out_dtype=ACC_TYPE) @@ -190,11 +192,13 @@ idx_m = offs_a_m[:, None] idx_n = a_k_idx_vals - {{load_input("A", "a", ("idx_m", "idx_n"), mask=None if EVEN_K else "a_mask", indent_width=8)}} + {{load_input("A", "a", ("idx_m", "idx_n"), mask=None if EVEN_K else "a_mask", + indent_width=8, index_shape=("BLOCK_M", "BLOCK_K"))}} idx_m = b_k_idx_vals idx_n = offs_b_n[None, :] - {{load_input("B", "b", ("idx_m", "idx_n"), mask=None if EVEN_K else "b_mask", indent_width=8)}} + {{load_input("B", "b", ("idx_m", "idx_n"), mask=None if EVEN_K else "b_mask", + indent_width=8, index_shape=("BLOCK_K", "BLOCK_N"))}} {% if USE_FAST_ACCUM %} acc = tl.dot(a, b, acc, allow_tf32=ALLOW_TF32, out_dtype=ACC_TYPE) {% else %} diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py index fad17cc91726b..eff89a21223c7 100644 --- a/torch/_inductor/select_algorithm.py +++ b/torch/_inductor/select_algorithm.py @@ -254,7 +254,7 @@ def finalize_all(self) -> str: class SubgraphInfo: body: IndentedBuffer template_mask: Optional[str] = None - template_out: Optional[str] = None + template_out_shape: Optional[Union[str, tuple[str]]] = None compute: IndentedBuffer = dataclasses.field(default_factory=IndentedBuffer) indexing_code: IndentedBuffer = dataclasses.field(default_factory=IndentedBuffer) loads: IndentedBuffer = dataclasses.field(default_factory=IndentedBuffer) @@ -445,7 +445,7 @@ def __init__( self.loads: IndentedBuffer = FakeIndentedBuffer() self.stores: IndentedBuffer = FakeIndentedBuffer() self.template_mask: Optional[str] = None - self.template_out: Optional[str] = None + self.template_out_shape: Optional[Union[str, tuple[str]]] = None self.ops_handler: Optional[V.WrapperHandler] = None # type: ignore[name-defined] # When caching is enabled, the generated code is not dependent on the input nodes names, or @@ -841,6 +841,7 @@ def load_input( mask: Optional[str] = None, other: Optional[Union[float, int]] = 0.0, indent_width: int = 4, + index_shape: Optional[tuple[str]] = None, ): """Loads an input and applies any necessary preprocessing or masking. @@ -918,7 +919,7 @@ def load_input( # We are using "None" for clarity in output code, but # we could alternatively emit `xmask = tl.full([xindex.shape], True, tl.int1)` self.template_mask = mask if mask is not None else "None" - self.template_out = "xindex" + self.template_out_shape = index_shape if index_shape else "xindex" self.template_indices = indices self.named_input_nodes[input_name].data.freeze_layout() self.cse.invalidate(OrderedSet()) @@ -981,7 +982,7 @@ def store( else: out_indexing = self.indexing( output_index, - copy_shape=self.template_out, + copy_shape=self.template_out_shape, override_mask=self.template_mask, ) from .codegen.triton import IndexingOptions @@ -1020,7 +1021,7 @@ def store_output( val: str, mask: Optional[str] = None, indent_width: int = 4, - val_shape: Optional[list[str]] = None, + val_shape: Optional[tuple[str]] = None, ): """Stores the final output and appends any epilogue fusions if the buffer hasn't been optimized away. @@ -1059,7 +1060,7 @@ def store_output( "xindex" ) self.template_mask = mask - self.template_out = val + self.template_out_shape = val_shape if val_shape else val self.template_indices = indices output_index = self.output_node.get_layout().make_indexer()(index_symbols) output_index = self.rename_indexing(output_index) @@ -1209,7 +1210,7 @@ def indexing( dense_indexing=False, # We pass template_out as the shape to broadcast the indexing to as # the mask might be broadcast to the output shape - copy_shape=self.template_out, + copy_shape=self.template_out_shape, override_mask=self.template_mask, block_ptr=block_ptr, tma_compatibility_checker=tma_compatibility_checker, From 6944d4b63974fe710ef388798ea1c9c9bec18575 Mon Sep 17 00:00:00 2001 From: Jagadish Krishnamoorthy Date: Thu, 11 Sep 2025 03:34:07 +0000 Subject: [PATCH 083/693] [ROCm] rocblas Aten GEMM overload for FP32 output from FP16/BF16 inputs (#162600) Fix ROCm GEMM helper to set output type (C/D) based on C_Dtype template parameter. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162600 Approved by: https://github.com/jeffdaily, https://github.com/pruthvistony --- aten/src/ATen/cuda/CUDABlas.cpp | 12 ++++++++---- test/test_matmul_cuda.py | 6 ++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp index 0d319ea593840..a81d34df4d64f 100644 --- a/aten/src/ATen/cuda/CUDABlas.cpp +++ b/aten/src/ATen/cuda/CUDABlas.cpp @@ -644,6 +644,8 @@ inline void bgemm_internal_cublas_half_helper(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYP void * beta_ptr = &fbeta; #ifdef USE_ROCM int flag = 0; + rocblas_datatype c_type = std::is_same::value ? rocblas_datatype_f32_r : rocblas_datatype_f16_r; + rocblas_datatype d_type = c_type; #if USE_GEMM_FLAGS_FP16_ALT_IMPL flag = at::ROCmBackwardPassGuard::is_backward_pass() ? rocblas_gemm_flags_fp16_alt_impl : 0; #endif @@ -652,8 +654,8 @@ inline void bgemm_internal_cublas_half_helper(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYP hipOperationToRocOperation(opb), (int)m, (int)n, (int)k, (void*)alpha_ptr, a, rocblas_datatype_f16_r, (int)lda, stridea, b, rocblas_datatype_f16_r, (int)ldb, strideb, - (void*)beta_ptr, c, rocblas_datatype_f16_r, (int)ldc, stridec, - c, rocblas_datatype_f16_r, (int)ldc, stridec, + (void*)beta_ptr, c, c_type, (int)ldc, stridec, + c, d_type, (int)ldc, stridec, (int) num_batches, rocblas_datatype_f32_r, rocblas_gemm_algo_standard, 0, flag))); #else @@ -1096,6 +1098,8 @@ inline void gemm_internal_cublas_half_helper(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE( GEMM_CHECK_ARGVALUES(at::Half); #ifdef USE_ROCM int flag = 0; + rocblas_datatype c_type = std::is_same::value ? rocblas_datatype_f32_r : rocblas_datatype_f16_r; + rocblas_datatype d_type = c_type; #if USE_GEMM_FLAGS_FP16_ALT_IMPL flag = at::ROCmBackwardPassGuard::is_backward_pass() ? rocblas_gemm_flags_fp16_alt_impl : 0; #endif @@ -1115,10 +1119,10 @@ inline void gemm_internal_cublas_half_helper(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE( ldb, beta_ptr, c, - rocblas_datatype_f16_r, + c_type, ldc, c, - rocblas_datatype_f16_r, + d_type, ldc, rocblas_datatype_f32_r, rocblas_gemm_algo_standard, diff --git a/test/test_matmul_cuda.py b/test/test_matmul_cuda.py index 5d76ac383e8d8..ea73ccfd5b372 100644 --- a/test/test_matmul_cuda.py +++ b/test/test_matmul_cuda.py @@ -624,8 +624,7 @@ def test_grouped_gemm_compiled(self, op, a_row_major, b_row_major, max_autotune) @parametrize("N", [1, 32, 64]) @parametrize("K", [1, 32, 64]) @parametrize("batch_size", [None, 1, 16]) - # TODO: enable rocblas path on ROCm - @parametrize("backend", ["cublaslt"] if torch.version.hip else ["cublas", "cublaslt"]) + @parametrize("backend", ["cublas", "cublaslt"]) def test_mm_bmm_dtype_overload(self, input_dtype, M, N, K, batch_size, backend): device = "cuda" dtype = input_dtype @@ -679,8 +678,7 @@ def create_inputs(B=None): @parametrize("N", [1, 32, 64]) @parametrize("K", [1, 32, 64]) @parametrize("batch_size", [None, 1, 32]) - # TODO: enable rocblas path on ROCm - @parametrize("backend", ["cublaslt"] if torch.version.hip else ["cublas", "cublaslt"]) + @parametrize("backend", ["cublas", "cublaslt"]) def test_addmm_baddmm_dtype_overload(self, input_dtype, M, N, K, batch_size, backend): device = "cuda" dtype = input_dtype From 07d25316723cebcc636508838c8d322059a0294a Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Thu, 11 Sep 2025 04:56:01 +0000 Subject: [PATCH 084/693] [vllm hash update] update the pinned vllm hash (#162551) This PR is auto-generated nightly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/nightly.yml). Update the pinned vllm hash. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162551 Approved by: https://github.com/pytorchbot --- .github/ci_commit_pins/vllm.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ci_commit_pins/vllm.txt b/.github/ci_commit_pins/vllm.txt index c9c4265b2f37f..e4ac57f1eb501 100644 --- a/.github/ci_commit_pins/vllm.txt +++ b/.github/ci_commit_pins/vllm.txt @@ -1 +1 @@ -e10fef08838612b4560e9c72e5cb1414a5edfa13 +cc99baf14dacc2497d0c5ed84e076ef2c37f6a4d From 12e993f5335fb1f982e0774fd2c8cbbb648d9a07 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 11 Sep 2025 05:52:46 +0000 Subject: [PATCH 085/693] compile_kernel large shared memory fix (#162647) Alternate solution to https://github.com/pytorch/pytorch/pull/162328 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162647 Approved by: https://github.com/eqy --- test/test_cuda.py | 64 ++++++++++++++++++++++++++++++++++++ torch/cuda/_utils.py | 78 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 135 insertions(+), 7 deletions(-) diff --git a/test/test_cuda.py b/test/test_cuda.py index d53e41b2fb84e..115952e9ae800 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -6844,6 +6844,70 @@ def test_compile_kernel(self): with self.assertRaises(RuntimeError): _compile_kernel(invalid_kernel_source, "invalid_kernel") + @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") + @unittest.skipIf(not TEST_CUDA, "No CUDA") + def test_compile_kernel_large_shared_memory(self): + kernel_source = """ + __global__ void large_shared_memory_kernel(const float* input, float* output, int n) { + extern __shared__ float shared_data[]; + + int tid = threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Load data into shared memory + if (idx < n) { + shared_data[tid] = input[idx]; + } else { + shared_data[tid] = 0.0f; + } + __syncthreads(); + + // Perform reduction in shared memory + for (int stride = blockDim.x / 2; stride > 0; stride >>= 1) { + if (tid < stride) { + shared_data[tid] += shared_data[tid + stride]; + } + __syncthreads(); + } + + // Write result + if (tid == 0) { + output[blockIdx.x] = shared_data[0]; + } + } + """ + + from torch.cuda import _compile_kernel, get_device_properties + + kernel = _compile_kernel(kernel_source, "large_shared_memory_kernel") + + threads_per_block = 1024 # 1024 threads * 4 bytes = 4KB, but we'll request 64KB + shared_mem_size = 64 * 1024 # 64KB + + kernel.set_shared_memory_config(shared_mem_size) + + N = 4096 + input_data = torch.ones(N, device="cuda", dtype=torch.float32) + output_data = torch.zeros(4, device="cuda", dtype=torch.float32) # 4 blocks + + kernel( + grid=(4, 1, 1), + block=(threads_per_block, 1, 1), + args=[input_data, output_data, N], + shared_mem=shared_mem_size, + ) + + # Each block should sum 1024 ones = 1024 + expected = torch.full((4,), 1024.0, dtype=torch.float32) + self.assertEqual(output_data.cpu(), expected) + + # Test error handling with more than supported shared memory size + max_smem = get_device_properties().shared_memory_per_block_optin + excessive_shared_mem = max_smem * 2 + + with self.assertRaises(RuntimeError): + kernel.set_shared_memory_config(excessive_shared_mem) + @tf32_on_and_off(0.005) @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") @unittest.skipIf(not TEST_CUDA, "No CUDA") diff --git a/torch/cuda/_utils.py b/torch/cuda/_utils.py index af3927f337286..5854ffcb81c71 100644 --- a/torch/cuda/_utils.py +++ b/torch/cuda/_utils.py @@ -38,6 +38,27 @@ def _get_nvrtc_library() -> ctypes.CDLL: return ctypes.CDLL("libnvrtc.so") +def _get_nvrtc_compatible_flags() -> list[str]: + """ + Get NVCC flags that are compatible with NVRTC compilation. + + Returns: + List of NVCC flags that can be safely used with NVRTC. + """ + from torch.utils.cpp_extension import COMMON_NVCC_FLAGS + + nvrtc_unsupported_flags = { + "--expt-relaxed-constexpr", + } + + # Filter out unsupported flags + compatible_flags = [ + flag for flag in COMMON_NVCC_FLAGS if flag not in nvrtc_unsupported_flags + ] + + return compatible_flags + + def _nvrtc_compile( kernel_source: str, kernel_name: str, @@ -121,13 +142,7 @@ def check_nvrtc(result: int) -> None: for option in nvcc_options: options.append(option.encode("utf-8")) - # TODO: Should we refactor flags into a common place? - from torch.utils.cpp_extension import COMMON_NVCC_FLAGS - - # Filter out flags not supported by NVRTC - nvrtc_compatible_flags = [ - flag for flag in COMMON_NVCC_FLAGS if flag != "--expt-relaxed-constexpr" - ] + nvrtc_compatible_flags = _get_nvrtc_compatible_flags() options.extend([flag.encode("utf-8") for flag in nvrtc_compatible_flags]) # Convert options to C array @@ -206,6 +221,7 @@ class _CudaKernel: def __init__(self, func: ctypes.c_void_p, module: ctypes.c_void_p) -> None: self.func = func self.module = module + self._max_shared_mem_bytes = 0 def __call__( self, @@ -273,6 +289,22 @@ def __call__( stream = torch.cuda.current_stream() + # Check if kernel requires large shared memory but hasn't been configured + if shared_mem >= 48 * 1024 and ( + self._max_shared_mem_bytes == 0 or shared_mem > self._max_shared_mem_bytes + ): + configured_msg = ( + "not configured" + if self._max_shared_mem_bytes == 0 + else f"only {self._max_shared_mem_bytes} bytes configured" + ) + raise RuntimeError( + f"Kernel requires {shared_mem} bytes of shared memory (>= 48KB), " + f"but {configured_msg}. " + "Call kernel.set_shared_memory_config(shared_mem) after compilation " + "and before launching the kernel." + ) + _check_cuda( libcuda.cuLaunchKernel( self.func, @@ -289,6 +321,38 @@ def __call__( ) ) + def set_shared_memory_config(self, shared_mem_bytes: int) -> None: + if shared_mem_bytes < 48 * 1024: + # No configuration needed for <= 48KB, just update the value + self._max_shared_mem_bytes = shared_mem_bytes + return + + libcuda = _get_cuda_library() + + # Get device properties to validate against limits + device_props = torch.cuda.get_device_properties() + max_shared_mem = getattr(device_props, "shared_memory_per_block_optin", 49152) + + if shared_mem_bytes > max_shared_mem: + raise RuntimeError( + f"Requested shared memory ({shared_mem_bytes} bytes) exceeds " + f"device limit ({max_shared_mem} bytes). " + "Consider reducing block size or shared memory usage." + ) + + # Set the function attribute once + # https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html + cudaFuncAttributeMaxDynamicSharedMemorySize = 8 + _check_cuda( + libcuda.cuFuncSetAttribute( + self.func, + cudaFuncAttributeMaxDynamicSharedMemorySize, + shared_mem_bytes, + ) + ) + + self._max_shared_mem_bytes = shared_mem_bytes + def _cuda_load_module( ptx: Union[str, bytes], kernel_names: Optional[list[str]] = None From 23170dfebcd79751531e4202b472f3ae98bad388 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Thu, 11 Sep 2025 05:57:13 +0000 Subject: [PATCH 086/693] Revert "Move inductor jobs 3.9->3.10 (#162323)" This reverts commit 0663bdb12383b9717af49d58aed9d88de0dd0ecc. Reverted https://github.com/pytorch/pytorch/pull/162323 on behalf of https://github.com/huydhn due to Not sure what had happened, but some inductor unit tests start failing after this lands ([comment](https://github.com/pytorch/pytorch/pull/162323#issuecomment-3278125192)) --- .ci/docker/build.sh | 3 ++- .github/workflows/inductor-nightly.yml | 4 ++-- .github/workflows/inductor-perf-test-nightly-x86-zen.yml | 6 +++--- .github/workflows/inductor-perf-test-nightly-x86.yml | 6 +++--- .github/workflows/inductor-periodic.yml | 4 ++-- .github/workflows/inductor-unittest.yml | 4 ++-- .github/workflows/inductor.yml | 4 ++-- .github/workflows/operator_benchmark.yml | 6 +++--- .github/workflows/trunk.yml | 4 ++-- .../cpu_inductor_amp_freezing_torchbench_inference.csv | 4 ++-- .../cpu_inductor_freezing_torchbench_inference.csv | 4 ++-- .../cpu_inductor_torchbench_inference.csv | 4 ++-- .../dynamic_cpu_inductor_torchbench_inference.csv | 4 ++-- ..._autotune_inductor_amp_freezing_torchbench_inference.csv | 4 ++-- .../rocm/aot_eager_torchbench_inference.csv | 4 ++-- .../rocm/dynamic_aot_eager_torchbench_inference.csv | 4 ++-- .../rocm/dynamic_inductor_torchbench_inference.csv | 4 ++-- .../rocm/dynamo_eager_torchbench_inference.csv | 4 ++-- .../rocm/inductor_torchbench_inference.csv | 4 ++-- 19 files changed, 41 insertions(+), 40 deletions(-) diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index be85fdcb542d0..89967cef96b12 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -214,7 +214,8 @@ case "$tag" in TRITON=yes ;; pytorch-linux-jammy-py3-gcc11-inductor-benchmarks) - ANACONDA_PYTHON_VERSION=3.10 + # TODO (huydhn): Upgrade this to Python >= 3.10 + ANACONDA_PYTHON_VERSION=3.9 GCC_VERSION=11 VISION=yes KATEX=yes diff --git a/.github/workflows/inductor-nightly.yml b/.github/workflows/inductor-nightly.yml index 78602e05586b7..fe0f102406b6a 100644 --- a/.github/workflows/inductor-nightly.yml +++ b/.github/workflows/inductor-nightly.yml @@ -37,7 +37,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" test-matrix: | @@ -56,7 +56,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: nightly-dynamo-benchmarks-build with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image: ${{ needs.nightly-dynamo-benchmarks-build.outputs.docker-image }} test-matrix: ${{ needs.nightly-dynamo-benchmarks-build.outputs.test-matrix }} timeout-minutes: 720 diff --git a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml index a9a839df61af2..170de752ab875 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml @@ -75,7 +75,7 @@ jobs: needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -101,7 +101,7 @@ jobs: needs: inductor-build if: github.event.schedule == '0 7 * * *' with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} @@ -118,7 +118,7 @@ jobs: needs: inductor-build if: github.event_name == 'workflow_dispatch' with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }} docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} diff --git a/.github/workflows/inductor-perf-test-nightly-x86.yml b/.github/workflows/inductor-perf-test-nightly-x86.yml index 0533184df2e0e..f894b8fdc6e03 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86.yml @@ -80,7 +80,7 @@ jobs: needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -107,7 +107,7 @@ jobs: needs: inductor-build if: github.event.schedule == '0 7 * * *' with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} @@ -124,7 +124,7 @@ jobs: needs: inductor-build if: github.event_name == 'workflow_dispatch' with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-freezing-${{ inputs.freezing }} docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml index e2395087326a2..21d965eaeaada 100644 --- a/.github/workflows/inductor-periodic.yml +++ b/.github/workflows/inductor-periodic.yml @@ -154,7 +154,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" test-matrix: | @@ -200,7 +200,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: periodic-dynamo-benchmarks-cpu-build with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/inductor-unittest.yml b/.github/workflows/inductor-unittest.yml index 6ab276a57fc4d..2125a8559363b 100644 --- a/.github/workflows/inductor-unittest.yml +++ b/.github/workflows/inductor-unittest.yml @@ -110,7 +110,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" test-matrix: | @@ -127,7 +127,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: inductor-cpu-build with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 2616141c0dc2a..4189d24a7b14f 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -79,7 +79,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" test-matrix: | @@ -101,7 +101,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: inductor-cpu-build with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/operator_benchmark.yml b/.github/workflows/operator_benchmark.yml index dd262d31b8fc2..aaf32c160f0dc 100644 --- a/.github/workflows/operator_benchmark.yml +++ b/.github/workflows/operator_benchmark.yml @@ -29,7 +29,7 @@ jobs: name: opbenchmark-build uses: ./.github/workflows/_linux-build.yml with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -42,7 +42,7 @@ jobs: name: opbenchmark-on-demand-build uses: ./.github/workflows/_linux-build.yml with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -55,7 +55,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: opbenchmark-build with: - build-environment: linux-jammy-py3.10-gcc11-build + build-environment: linux-jammy-py3.9-gcc11-build docker-image: ${{ needs.opbenchmark-build.outputs.docker-image }} test-matrix: ${{ needs.opbenchmark-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 5b1a12812003f..4dd465d70803d 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -240,7 +240,7 @@ jobs: needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-jammy-py3.10-gcc11 + build-environment: linux-jammy-py3.9-gcc11 docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -255,7 +255,7 @@ jobs: - verify-cachebench-cpu-build - target-determination with: - build-environment: linux-jammy-py3.10-gcc11 + build-environment: linux-jammy-py3.9-gcc11 docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv index a4dbaeb7b546d..e68aa2fa5351f 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,3 +doctr_det_predictor,pass,5 -doctr_reco_predictor,pass,1 +doctr_reco_predictor,pass,4 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv index 885029ba8c56e..aec659fdcd654 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,3 +doctr_det_predictor,pass,5 -doctr_reco_predictor,pass,1 +doctr_reco_predictor,pass,4 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv index aa7a3161afcc6..4f2eec1493520 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,3 +doctr_det_predictor,pass,5 -doctr_reco_predictor,pass,1 +doctr_reco_predictor,pass,4 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv index f26dea6f692ef..c8db4d5823203 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv @@ -82,11 +82,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,3 +doctr_det_predictor,pass,5 -doctr_reco_predictor,pass,1 +doctr_reco_predictor,pass,4 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv index 39149853947c3..f4c9ffddd9974 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,3 +doctr_det_predictor,pass,5 -doctr_reco_predictor,pass,1 +doctr_reco_predictor,pass,4 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv index bf70642a855ef..6f316b219bb92 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,3 +doctr_det_predictor,eager_fail_to_run,5 -doctr_reco_predictor,eager_fail_to_run,1 +doctr_reco_predictor,eager_fail_to_run,4 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv index e019365ccbfdb..4b5138ce9c367 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,3 +doctr_det_predictor,eager_fail_to_run,5 -doctr_reco_predictor,eager_fail_to_run,1 +doctr_reco_predictor,eager_fail_to_run,4 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv index fed8ebded682c..a3fc7cf192371 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,3 +doctr_det_predictor,eager_fail_to_run,5 -doctr_reco_predictor,eager_fail_to_run,1 +doctr_reco_predictor,eager_fail_to_run,4 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv index bf70642a855ef..6f316b219bb92 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,3 +doctr_det_predictor,eager_fail_to_run,5 -doctr_reco_predictor,eager_fail_to_run,1 +doctr_reco_predictor,eager_fail_to_run,4 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv index 014e23e41cb31..8ccf95da9659e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,3 +doctr_det_predictor,eager_fail_to_run,5 -doctr_reco_predictor,eager_fail_to_run,1 +doctr_reco_predictor,eager_fail_to_run,4 From 7345454e2ef30df6737e6f0e6e217d174ce0908a Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 11 Sep 2025 06:03:25 +0000 Subject: [PATCH 087/693] compile_kernel: Handle python floats as c double (#162626) This was an open todo in the code and probably a footgun in waiting Pull Request resolved: https://github.com/pytorch/pytorch/pull/162626 Approved by: https://github.com/malfet --- test/test_cuda.py | 41 ++++++++++++++++++++++++++++++++++++++--- torch/cuda/_utils.py | 9 ++++----- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/test/test_cuda.py b/test/test_cuda.py index 115952e9ae800..d6ce00d9e8db4 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -7043,7 +7043,7 @@ def _(a, b): @unittest.skipIf(not TEST_CUDA, "No CUDA") def test_compile_kernel_custom_op_validation(self): kernel_source = """ - __global__ void add_scalar(const float* input, float* output, float scalar, int n) { + __global__ void add_scalar(const float* input, float* output, double scalar, int n) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < n) { output[idx] = input[idx] + scalar; @@ -7087,6 +7087,41 @@ def _(input_tensor, scalar): expected = input_data + scalar_val torch.testing.assert_close(result, expected, rtol=1e-5, atol=1e-5) + @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") + @unittest.skipIf(not TEST_CUDA, "No CUDA") + def test_compile_kernel_double_precision(self): + """Test that Python floats are correctly handled as doubles in kernels.""" + kernel_source = """ + __global__ void test_double_precision(double* output, double value, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) { + output[idx] = value; + } + } + """ + + from torch.cuda import _compile_kernel + + compiled_kernel = _compile_kernel(kernel_source, "test_double_precision") + + # Test with high precision value that would lose precision if cast to float32 + # float32 has 7 digits of precision, so we use a value with 15 digits + high_precision_value = 1.23456789012345 + n = 10 + + output = torch.zeros(n, device="cuda", dtype=torch.float64) + compiled_kernel( + grid=(1, 1, 1), + block=(256, 1, 1), + args=[output, high_precision_value, n], + ) + + # Verify high precision is preserved (would fail with old float32 casting) + expected = torch.full( + (n,), high_precision_value, device="cuda", dtype=torch.float64 + ) + torch.testing.assert_close(output, expected, rtol=1e-14, atol=1e-14) + @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") @unittest.skipIf(not TEST_CUDA, "No CUDA") def test_compile_kernel_cuda_headers(self): @@ -7095,10 +7130,10 @@ def test_compile_kernel_cuda_headers(self): #include extern "C" - __global__ void half_precision_kernel(__half* output, float input_value, int n) { + __global__ void half_precision_kernel(__half* output, double input_value, int n) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < n) { - output[idx] = __float2half(input_value); + output[idx] = __float2half((float)input_value); } } """ diff --git a/torch/cuda/_utils.py b/torch/cuda/_utils.py index 5854ffcb81c71..cc2411f52b7af 100644 --- a/torch/cuda/_utils.py +++ b/torch/cuda/_utils.py @@ -268,12 +268,11 @@ def __call__( c_int = ctypes.c_int(arg) # Store the C int for reference keeping, not in processed_args c_args.append(ctypes.byref(c_int)) - # TODO: Python floats are actually doubles elif isinstance(arg, float): - # Convert floats to C float - c_float = ctypes.c_float(arg) - # Store the C float for reference keeping, not in processed_args - c_args.append(ctypes.byref(c_float)) + # Python floats are doubles - use double by default + c_double = ctypes.c_double(arg) + # Store the C double for reference keeping, not in processed_args + c_args.append(ctypes.byref(c_double)) else: raise TypeError(f"Unsupported argument type: {type(arg)}") From 52d4660ae9b098e2dac27c135a4c67c7d1d9c0af Mon Sep 17 00:00:00 2001 From: Xu Han Date: Thu, 11 Sep 2025 06:22:18 +0000 Subject: [PATCH 088/693] [AOTI] Fix Windows fail to zip opened file. (#162617) Original issue: Image reproducer: ```cmd pytest test\inductor\test_aot_inductor.py -v -k test_weight_on_disk_legacy_cpu ``` Fixed list: 1. `WritableTempFile`'s `__exit__` function auto unlink opened file, when the file was opened, it should raise error. Ignore it on Windows. 2. When open zip file, if the file is opened, it would be failed. Switch to `_wfsopen` with shared access flag, which can open file with shared access. Local test passed: image Pull Request resolved: https://github.com/pytorch/pytorch/pull/162617 Approved by: https://github.com/jansel --- third_party/miniz-3.0.2/miniz.c | 9 ++++++--- torch/_inductor/codecache.py | 9 ++++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/third_party/miniz-3.0.2/miniz.c b/third_party/miniz-3.0.2/miniz.c index bd6b9f8562255..0f0cf1833b6da 100644 --- a/third_party/miniz-3.0.2/miniz.c +++ b/third_party/miniz-3.0.2/miniz.c @@ -3136,6 +3136,7 @@ extern "C" { #define WIN32_LEAN_AND_MEAN #include +#include static WCHAR* mz_utf8z_to_widechar(const char* str) { @@ -3149,11 +3150,13 @@ static FILE *mz_fopen(const char *pFilename, const char *pMode) { WCHAR* wFilename = mz_utf8z_to_widechar(pFilename); WCHAR* wMode = mz_utf8z_to_widechar(pMode); - FILE* pFile = NULL; - errno_t err = _wfopen_s(&pFile, wFilename, wMode); + /* + Must use _wfsopen with _SH_DENYNO on Windows, to open opened temp files. + */ + FILE* pFile = _wfsopen(wFilename, wMode, _SH_DENYNO); free(wFilename); free(wMode); - return err ? NULL : pFile; + return pFile; } static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py index 7b24208a2c512..cda24724575e2 100644 --- a/torch/_inductor/codecache.py +++ b/torch/_inductor/codecache.py @@ -390,7 +390,14 @@ def __enter__(self) -> _TemporaryFileWrapper[Any]: def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: self.temp_file.close() - os.unlink(self.temp_file.name) + try: + os.unlink(self.temp_file.name) + except OSError as e: + if _IS_WINDOWS: + # On Windows, some case temp file is opened and fail to unlink. Need to ignore it. + pass + else: + raise e def write( From fa1d409e83af93425a2672d62e134e8f20c5ccc0 Mon Sep 17 00:00:00 2001 From: "Deng, Daisy" Date: Thu, 11 Sep 2025 06:44:26 +0000 Subject: [PATCH 089/693] [2/N]Port several test files under test/distributed to Intel GPU (#159473) For https://github.com/pytorch/pytorch/issues/114850, we will port distributed tests to Intel GPU. This PR will work on some test files under test/distributed. We could enable Intel GPU with following methods and try the best to keep the original code styles: - instantiate_device_type_tests() - use "torch.accelerator.current_accelerator()" to determine the accelerator backend - use requires_accelerator_dist_backend to allow both nccl and xccl test - enabled XPU for some test path - Change the hardcoded world_size according to device_count. - Unify some common code under torch/testing/_internal for multiple backend, for example: Added xpu for Backend.backend_capability and dist.Backend.register_backend() Pull Request resolved: https://github.com/pytorch/pytorch/pull/159473 Approved by: https://github.com/guangyey, https://github.com/d4l3k --- test/distributed/test_c10d_common.py | 67 ++++++--- .../test_c10d_functional_native.py | 77 +++++----- test/distributed/test_device_mesh.py | 41 ++++-- test/distributed/test_dynamo_distributed.py | 138 ++++++++++++------ test/distributed/test_inductor_collectives.py | 138 +++++++++++------- test/distributed/test_store.py | 14 +- test/distributions/test_distributions.py | 39 +++-- test/inductor/test_snode_runtime.py | 18 --- torch/distributed/distributed_c10d.py | 6 +- torch/testing/_internal/common_distributed.py | 29 ++-- .../testing/_internal/distributed/fake_pg.py | 2 +- 11 files changed, 336 insertions(+), 233 deletions(-) diff --git a/test/distributed/test_c10d_common.py b/test/distributed/test_c10d_common.py index 1857feffd9394..89afc369fe149 100644 --- a/test/distributed/test_c10d_common.py +++ b/test/distributed/test_c10d_common.py @@ -43,6 +43,7 @@ retry_on_connect_failures, run_tests, TEST_WITH_DEV_DBG_ASAN, + TEST_XPU, TestCase, ) from torch.utils.checkpoint import checkpoint @@ -63,6 +64,8 @@ torch.backends.cuda.matmul.allow_tf32 = False +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + def gpus_for_rank(world_size): """Multigpu tests are designed to simulate the multi nodes with multi @@ -70,8 +73,9 @@ def gpus_for_rank(world_size): On a single node, all visible GPUs are evenly divided to subsets, each process only uses a subset. """ - visible_devices = list(range(torch.cuda.device_count())) - gpus_per_process = torch.cuda.device_count() // world_size + device_count = torch.accelerator.device_count() + visible_devices = list(range(device_count)) + gpus_per_process = device_count // world_size gpus_for_rank = [] for rank in range(world_size): gpus_for_rank.append( @@ -401,7 +405,7 @@ def _prepare_multi_device_module( gradient_as_bucket_view=gradient_as_bucket_view, ) - input = torch.randn(global_batch_size, 2).cuda(devices[0]) + input = torch.randn(global_batch_size, 2).to(devices[0]) target = torch.randn(global_batch_size, 4) return model, ddp_model, input, target @@ -435,10 +439,10 @@ def _test_ddp_checkpointing( allow_none_grads=False, ): # to reproduce the same training results - torch.cuda.set_device(self.rank) + torch.accelerator.set_device_index(self.rank) torch.manual_seed(31415) - model = copy.deepcopy(input_model).cuda() - ddp_model = copy.deepcopy(input_model).cuda() + model = copy.deepcopy(input_model).to(device_type) + ddp_model = copy.deepcopy(input_model).to(device_type) ddp_model = nn.parallel.DistributedDataParallel( ddp_model, bucket_cap_mb=1, @@ -554,8 +558,8 @@ def __init__(self, use_reentrant=True): def _prepare_dummy_data(self): ddp_bs = 16 bs = ddp_bs * self.world_size - input = torch.rand((bs, 20), device="cuda", requires_grad=True) - target = torch.randn((bs, 20), device="cuda") + input = torch.rand((bs, 20), device=device_type, requires_grad=True) + target = torch.randn((bs, 20), device=device_type) offset = self.rank * ddp_bs ddp_input = input[offset : offset + ddp_bs] ddp_target = target[offset : offset + ddp_bs] @@ -715,7 +719,7 @@ def test_ddp_checkpointing_weight_sharing(self, use_reentrant): Test that checkpointing with weight sharing works. """ process_group = self._get_process_group() - torch.cuda.set_device(self.rank) + torch.accelerator.set_device_index(self.rank) for use_bucket_view, static_graph in product((False, True), (False, True)): torch.manual_seed(31415) l1 = nn.Linear(20, 20) @@ -738,7 +742,7 @@ def test_ddp_checkpointing_twice_weight_sharing(self): same layer twice and having weights shared across layers. """ process_group = self._get_process_group() - torch.cuda.set_device(self.rank) + torch.accelerator.set_device_index(self.rank) for use_bucket_view in (True, False): self._test_ddp_checkpointing( self.CheckpointTwiceModuleWeightSharing(), @@ -1162,7 +1166,7 @@ def _test_sequence_num_incremented(self, process_group, ranks): # Verify sequence numbers are appropriately incremented for i in range(10): - t = torch.ones(1, device=torch.cuda.current_device()) + t = torch.ones(1, device=device_type) dist.all_reduce(t, group=process_group) if not c10d._rank_not_in_group(process_group): seq_num = self._verify_sequence_number_across_pg( @@ -1193,7 +1197,7 @@ def _test_sequence_num_incremented(self, process_group, ranks): self.assertEqual(rank_to_seq_num[0] + 1, rank_to_seq_num[1]) def _test_sequence_num_incremented_default_group(self, backend_name): - torch.cuda.set_device(self.rank) + torch.accelerator.set_device_index(self.rank) store = dist.FileStore(self.file_name, self.world_size) dist.init_process_group( backend_name, @@ -1207,7 +1211,7 @@ def _test_sequence_num_incremented_default_group(self, backend_name): ) def _test_sequence_num_incremented_subgroup(self, backend_name): - torch.cuda.set_device(self.rank) + torch.accelerator.set_device_index(self.rank) store = dist.FileStore(self.file_name, self.world_size) dist.init_process_group( backend_name, @@ -1262,8 +1266,8 @@ def _test_warn_not_in_group(self, backend): in_group_ranks = list(filter(lambda x: x % 2 == 0, range(self.world_size))) group = dist.new_group(in_group_ranks) - x = torch.zeros(2, 2).cuda(self.rank) - xs = [torch.zeros(2, 2).cuda(self.rank) for _ in range(len(in_group_ranks))] + x = torch.zeros(2, 2).to(self.rank) + xs = [torch.zeros(2, 2).to(self.rank) for _ in range(len(in_group_ranks))] if self.rank not in in_group_ranks: msg = ".*{}.*does not belong to.*" with self.assertWarnsOnceRegex(UserWarning, msg.format("all_gather")): @@ -1392,7 +1396,7 @@ def _test_bool_tensors(self, backend): rank=self.rank, store=store, ) - device = "cuda" if backend == "nccl" else "cpu" + device = "cuda" if backend == "nccl" else "xpu" if backend == "xccl" else "cpu" # test alltoall_base tensor = torch.tensor([1, 0, 0, 1], dtype=torch.bool, device=device) zeros = torch.tensor([0, 0, 0, 0], dtype=torch.bool, device=device) @@ -1574,8 +1578,8 @@ def test_debug_level(self): class DummyWork(dist._Work): def wait(self, timeout=5.0): - if torch.cuda.is_available(): - torch.cuda.current_stream().synchronize() + if torch.accelerator.is_available(): + torch.accelerator.current_stream().synchronize() return True @@ -1790,6 +1794,18 @@ def test_backend_config(self): ("cpu:gloo,cuda:nccl", "cpu:gloo,cuda:nccl"), ] + if TEST_XPU: + # Override backend_config_strings_and_expected_values for Intel GPU. + backend_config_strings_and_expected_values[4:10] = [ + (dist.Backend.DUMMY, "cpu:dummy,cuda:dummy,xpu:dummy"), + ("DUMMY", "cpu:dummy,cuda:dummy,xpu:dummy"), + ("dummy", "cpu:dummy,cuda:dummy,xpu:dummy"), + ("cpu:dummy,xpu:dummy", "cpu:dummy,xpu:dummy"), + ("cpu:dummy,xpu:xccl", "cpu:dummy,xpu:xccl"), + ("cpu:gloo,xpu:dummy", "cpu:gloo,xpu:dummy"), + ("cpu:gloo,xpu:xccl", "cpu:gloo,xpu:xccl"), + ] + for config_str, expected_value in backend_config_strings_and_expected_values: with self.subTest(config_str): # ensures these configs strings are valid and no ValueError is raised @@ -1800,6 +1816,8 @@ def test_backend_config(self): invalid_backend_config_strings = [ "cpu:gloo,cuda:nccl,", # trailing comma "cpu:gloo,cuda:nccl,cpu:dummy", # duplicate device + "cpu:gloo,xpu:xccl,", # trailing comma + "cpu:gloo,xpu:xccl,cpu:dummy", # duplicate device ] for config_str in invalid_backend_config_strings: with self.subTest(config_str): @@ -1814,7 +1832,7 @@ def test_init_process_group_with_multiple_backends(self): os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = "6789" dist.init_process_group( - "cpu:dummy,cuda:dummy", rank=self.rank, world_size=self.world_size + "cpu:dummy,cuda:dummy,xpu:dummy", rank=self.rank, world_size=self.world_size ) # test all_gather @@ -2053,7 +2071,7 @@ def _call_collective_with_varying_tensors(self, backend, collective, *args): # correctly dispatched # TODO: this will be updated in the future to not be backend specific - device = "cuda" if backend == "nccl" else "cpu" + device = "cuda" if backend == "nccl" else "xpu" if backend == "xccl" else "cpu" # ensure supported devices (cpu, cuda) succeeds during dispatch call tensor = torch.zeros(2, 2, device=torch.device(device)) # multi tensor collectives @@ -2119,7 +2137,7 @@ def _test_all_to_all_single(self, backend): rank=self.rank, store=store, ) - device = "cuda" if backend == "nccl" else "cpu" + device = "cuda" if backend == "nccl" else "xpu" if backend == "xccl" else "cpu" # test alltoall_base input_tensor = torch.ones(2, 2, device=torch.device(device)) output_tensor = torch.zeros(2, 2, device=torch.device(device)) @@ -2251,8 +2269,9 @@ def testNodeLocalRank(self): if __name__ == "__main__": - assert not torch.cuda._initialized, ( - "test_distributed must not have initialized CUDA context on main process" - ) + if device_type != "cpu": + assert not torch.get_device_module()._initialized, ( + "test_distributed must not have initialized {device_type} context on main process" + ) run_tests() diff --git a/test/distributed/test_c10d_functional_native.py b/test/distributed/test_c10d_functional_native.py index bafc781b591c6..930f034759395 100644 --- a/test/distributed/test_c10d_functional_native.py +++ b/test/distributed/test_c10d_functional_native.py @@ -24,7 +24,7 @@ from torch.testing._internal.common_cuda import SM90OrLater from torch.testing._internal.common_distributed import ( MultiProcessTestCase, - requires_nccl, + requires_accelerator_dist_backend, skip_if_lt_x_gpu, ) from torch.testing._internal.common_utils import ( # type: ignore[attr-defined] @@ -59,7 +59,7 @@ def load_test_module(name): sys.exit(0) -@requires_nccl() +@requires_accelerator_dist_backend(["nccl", "xccl"]) class TestWithNCCL(MultiProcessTestCase): def setUp(self) -> None: super().setUp() @@ -75,13 +75,15 @@ def ranks(self) -> list[int]: @property def device(self) -> torch.device: - return torch.device(f"cuda:{self.rank}") + return torch.device(self.rank) def _init_process_group(self) -> None: - torch.cuda.set_device(self.device) + torch.accelerator.set_device_index(self.rank) store = dist.FileStore(self.file_name, self.world_size) + backend = dist.get_default_backend_for_device(self.device.type) + dist.init_process_group( - backend="nccl", + backend=backend, world_size=self.world_size, rank=self.rank, store=store, @@ -273,7 +275,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: ) # check memory leak for i in range(1, 10): - mem_usage[i] = torch.cuda.max_memory_allocated() + mem_usage[i] = torch.accelerator.max_memory_allocated() compiled(arg) assert mem_usage[9] == mem_usage[8] @@ -370,14 +372,16 @@ def test_reduce_scatter_tensor_coalesced(self) -> None: @skip_if_lt_x_gpu(2) def test_all_to_all_single(self) -> None: self._init_process_group() - torch.cuda.set_device(self.device) + torch.accelerator.set_device_index(self.rank) torch.manual_seed(42) send_sz_matrix = torch.randint(0, 20, (self.world_size, self.world_size)) input_split_sizes = send_sz_matrix[self.rank].tolist() output_split_sizes = send_sz_matrix[:, self.rank].tolist() - input = torch.full((sum(input_split_sizes),), float(self.rank)).cuda() + input = torch.full((sum(input_split_sizes),), float(self.rank)).to( + self.device.type + ) output = torch.ops._c10d_functional.all_to_all_single( input, @@ -388,7 +392,7 @@ def test_all_to_all_single(self) -> None: output = torch.ops._c10d_functional.wait_tensor(output) expect = torch.cat( [ - torch.full((sz,), float(rank)).cuda() + torch.full((sz,), float(rank)).to(self.device.type) for rank, sz in enumerate(output_split_sizes) ] ) @@ -464,7 +468,7 @@ def test_unwaited(self) -> None: @fresh_cache() def test_threading(self): self._init_process_group() - device = torch.device(f"cuda:{self.rank}") + device = self.device def func(arg: torch.Tensor) -> torch.Tensor: buf0 = arg + 42 @@ -546,9 +550,9 @@ def fp8_rowwise_backward(in_, w, out_grad): return in_grad, w_grad m, n, k = 128, 256, 64 - in_ = torch.randn((m, k), device="cuda", dtype=torch.bfloat16) - w = torch.randn((n, k), device="cuda", dtype=torch.bfloat16) - out_grad = torch.randn((m, n), device="cuda", dtype=torch.bfloat16) + in_ = torch.randn((m, k), device=self.device.type, dtype=torch.bfloat16) + w = torch.randn((n, k), device=self.device.type, dtype=torch.bfloat16) + out_grad = torch.randn((m, n), device=self.device.type, dtype=torch.bfloat16) eager_in_grad, eager_w_grad = fp8_rowwise_backward(in_, w, out_grad) compile_in_grad, compile_w_grad = torch.compile(fp8_rowwise_backward)( @@ -777,7 +781,8 @@ def setUp(self): self.rank = 0 self.world_size = 2 - torch.cuda.set_device("cuda:0") + torch.accelerator.set_device_index(0) + self.device = torch.accelerator.current_accelerator() store = FakeStore() dist.init_process_group( @@ -803,7 +808,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: ar1 = funcol.wait_tensor(ar1) return ar0, ar1 - arg = torch.rand(4, 4, device="cuda") + arg = torch.rand(4, 4, device=self.device) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) @@ -836,7 +841,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (arg,)) - torch.cuda.synchronize() + torch.accelerator.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -851,7 +856,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: ar1 = [funcol.wait_tensor(out) for out in ar1] return ar0, ar1 - args = [torch.rand(4, 4, device="cuda") for _ in range(2)] + args = [torch.rand(4, 4, device=self.device.type) for _ in range(2)] compiled = torch.compile(func) code = run_and_get_triton_code(compiled, args) buf0, buf1, buf2, buf3 = find_buffer_assignments(code) @@ -881,7 +886,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: # Test aoti out = AOTIRunnerUtil.run(func, (args,)) # noqa: F841 - torch.cuda.synchronize() + torch.accelerator.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -892,7 +897,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: ar0 = funcol.wait_tensor(ar0) return ar0 - arg = torch.rand(4, 4, device="cuda") + arg = torch.rand(4, 4, device=self.device.type) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) @@ -917,7 +922,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Expect allocation return ar0 - arg = torch.rand(4, 4, device="cuda").T + arg = torch.rand(4, 4, device=self.device.type).T compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) @@ -948,7 +953,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: buf2 = torch.mm(arg, buf1) return buf1, buf2 - arg = torch.rand(4, 4, device="cuda") + arg = torch.rand(4, 4, device=self.device.type) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) buf0, buf1 = find_buffer_assignments(code) @@ -978,7 +983,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: ag0 = funcol.wait_tensor(ag0) return ag0 - arg = torch.rand(4, 4, device="cuda") + arg = torch.rand(4, 4, device=self.device.type) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) ( @@ -995,7 +1000,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (arg,)) - torch.cuda.synchronize() + torch.accelerator.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -1005,7 +1010,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: ag0 = [funcol.wait_tensor(out) for out in ag0] return ag0 - args = [torch.rand(4, 4, device="cuda") for _ in range(4)] + args = [torch.rand(4, 4, device=self.device.type) for _ in range(4)] compiled = torch.compile(func) code = run_and_get_triton_code(compiled, args) ( @@ -1029,7 +1034,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: # Test aoti out = AOTIRunnerUtil.run(func, (args,)) # noqa: F841 - torch.cuda.synchronize() + torch.accelerator.synchronize() @unittest.skipIf(not HAS_GPU, "This is a GPU test!") @fresh_cache() @@ -1039,7 +1044,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: return funcol.wait_tensor(t) # Test aoti - arg = torch.rand(4, 4, device="cuda") + arg = torch.rand(4, 4, device=self.device.type) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) ( @@ -1051,7 +1056,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (arg,)) - torch.cuda.synchronize() + torch.accelerator.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -1061,7 +1066,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: rs0 = funcol.wait_tensor(rs0) return rs0 - arg = torch.rand(4, 4, device="cuda") + arg = torch.rand(4, 4, device=self.device.type) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) ( @@ -1077,7 +1082,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (arg,)) - torch.cuda.synchronize() + torch.accelerator.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -1089,7 +1094,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: rs0 = [funcol.wait_tensor(out) for out in rs0] return rs0 - args = [torch.rand(4, 4, device="cuda") for _ in range(4)] + args = [torch.rand(4, 4, device=self.device.type) for _ in range(4)] compiled = torch.compile(func) code = run_and_get_triton_code(compiled, args) ( @@ -1113,7 +1118,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (args,)) - torch.cuda.synchronize() + torch.accelerator.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -1142,7 +1147,9 @@ def func( input_split_sizes = send_sz_matrix[self.rank] output_split_sizes = send_sz_matrix[:, self.rank].contiguous() - input = torch.full((input_split_sizes.sum().item(),), float(self.rank)).cuda() + input = torch.full((input_split_sizes.sum().item(),), float(self.rank)).to( + self.device.type + ) with torch._dynamo.config.patch( dynamic_shapes=True, @@ -1176,7 +1183,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: br1 = funcol.wait_tensor(br1) return br0, br1 - arg = torch.rand(4, 4, device="cuda") + arg = torch.rand(4, 4, device=self.device.type) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) @@ -1199,7 +1206,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (arg,)) - torch.cuda.synchronize() + torch.accelerator.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -1214,7 +1221,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: ar1 = funcol.wait_tensor(ar1) return ar0, ar1 - arg = torch.rand(4, 4, device="cuda") + arg = torch.rand(4, 4, device=self.device.type) compiled = torch.compile(func, fullgraph=True) code = run_and_get_triton_code(compiled, arg) diff --git a/test/distributed/test_device_mesh.py b/test/distributed/test_device_mesh.py index 98557c9fe941a..29b66ade63efd 100644 --- a/test/distributed/test_device_mesh.py +++ b/test/distributed/test_device_mesh.py @@ -1,6 +1,7 @@ # Copyright (c) Meta Platforms, Inc. and affiliates # Owner(s): ["oncall: distributed"] import os +import unittest import torch import torch.distributed as dist @@ -26,7 +27,7 @@ ) from torch.distributed.tensor.placement_types import _Partial, Shard from torch.testing._internal.common_distributed import skip_if_lt_x_gpu -from torch.testing._internal.common_utils import run_tests +from torch.testing._internal.common_utils import run_tests, TEST_XPU from torch.testing._internal.distributed._tensor.common_dtensor import ( DTensorTestBase, with_comms, @@ -35,6 +36,10 @@ from torch.utils._typing_utils import not_none +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" +device_count = torch.accelerator.device_count() + + def _set_env_var(addr="localhost", port="25364", world_size=1, rank=0, local_rank=-1): os.environ["MASTER_ADDR"] = addr os.environ["MASTER_PORT"] = port @@ -44,6 +49,7 @@ def _set_env_var(addr="localhost", port="25364", world_size=1, rank=0, local_ran os.environ["LOCAL_RANK"] = f"{local_rank}" +@unittest.skipIf(TEST_XPU, "XPU does not support gloo backend.") class DeviceMeshTestGlooBackend(DTensorTestBase): @property def backend(self): @@ -73,14 +79,16 @@ def test_manual_set_device(self): # Set the device on each process before DeviceMesh constructor, # and device to be different than the default world rank - torch.cuda.set_device((self.rank + 2) % self.world_size) + torch.accelerator.set_device_index((self.rank + 2) % self.world_size) _set_env_var(world_size=self.world_size, rank=self.rank) DeviceMesh(self.device_type, mesh_tensor) self.assertTrue(is_initialized()) # check that the device is set to the correct device # and respect the previous set_device calls - self.assertEqual(torch.cuda.current_device(), (self.rank + 2) % self.world_size) + self.assertEqual( + torch.accelerator.current_device_idx(), (self.rank + 2) % self.world_size + ) self.destroy_pg() @skip_if_lt_x_gpu(4) @@ -101,7 +109,7 @@ def test_auto_set_device_from_local_rank(self): # check that the device is set to the correct device # and respect the LOCAL_RANK env var - self.assertEqual(torch.cuda.current_device(), local_rank) + self.assertEqual(torch.accelerator.current_device_idx(), local_rank) self.destroy_pg() @skip_if_lt_x_gpu(4) @@ -120,7 +128,7 @@ def test_auto_set_device_from_heuristic(self): self.assertTrue(is_initialized()) # check that the device is set to the correct device - self.assertEqual(torch.cuda.current_device(), self.rank) + self.assertEqual(torch.accelerator.current_device_idx(), self.rank) self.destroy_pg() @@ -222,7 +230,7 @@ def test_get_local_rank(self): @with_comms def test_device_mesh_2d(self): mesh_tensor = torch.arange(4).reshape(2, 2) - # construct a cuda device mesh + # construct a device mesh for self.device_type mesh = DeviceMesh(self.device_type, mesh_tensor) # check all dim groups @@ -257,10 +265,10 @@ def test_device_mesh_init_backend(self): # we call init_backend we should make sure the default pg already created self.assertEqual(mesh.get_coordinate(), [5]) + @unittest.skipIf(not torch.accelerator.is_available(), "No accelerator available!") def test_fake_pg_device_mesh(self): fake_store = FakeStore() init_process_group("fake", store=fake_store, rank=0, world_size=self.world_size) - device_type = "cuda" if torch.cuda.is_available() else "cpu" mesh = DeviceMesh(device_type, torch.arange(self.world_size)) local_tensor = torch.randn(2, 8) @@ -300,7 +308,7 @@ def test_from_group_with_invalid_mesh(self): regex = r"Invalid mesh \[\[0, 1\], \[2, 3\]\] for ProcessGroup with ranks \[0, 1, 2, 3\]" with self.assertRaisesRegex(ValueError, regex): DeviceMesh.from_group( - global_pg, "cuda", invalid_mesh, mesh_dim_names=("dim0", "dim1") + global_pg, device_type, invalid_mesh, mesh_dim_names=("dim0", "dim1") ) device_mesh = init_device_mesh(self.device_type, (2, 2)) @@ -320,12 +328,11 @@ def test_raises_invalid_device_type(self): # test init_device_mesh with an invalid device type that contains a GPU index mesh_shape = (2, self.world_size // 2) init_device_mesh( - "cuda:0", mesh_shape=mesh_shape, mesh_dim_names=("dp", "tp") + f"{device_type}:0", mesh_shape=mesh_shape, mesh_dim_names=("dp", "tp") ) @with_comms def test_set_mesh_dim_group_options(self): - device_type = "cuda" if torch.cuda.is_available() else "cpu" _mesh_resources._set_mesh_dim_group_options(1, "fake", None) mesh_tensor = torch.arange(4).reshape(2, 2) @@ -341,7 +348,7 @@ def world_size(self): @with_comms def test_device_mesh_nd(self): - # construct a cuda device mesh + # construct a device mesh for self.device_type mesh_tensor = torch.arange(8).reshape(2, 2, 2) mesh = DeviceMesh(self.device_type, mesh_tensor) @@ -710,7 +717,9 @@ def test_raises_invalid_mesh_dim_name(self): with self.assertRaisesRegex(KeyError, "Invalid mesh_dim_name"): mesh_dim_names = ("DP", "TP") mesh = init_device_mesh( - self.device_type, (2, 4), mesh_dim_names=mesh_dim_names + self.device_type, + (2, 4), + mesh_dim_names=mesh_dim_names, ) mesh[child_mesh_dim_name] @@ -938,7 +947,9 @@ def world_size(self): @with_comms def test_get_root_mesh(self): mesh_3d = init_device_mesh( - self.device_type, (2, 2, 2), mesh_dim_names=("dp", "cp", "tp") + self.device_type, + (2, 2, 2), + mesh_dim_names=("dp", "cp", "tp"), ) dp_cp_mesh = mesh_3d["dp", "cp"] @@ -986,7 +997,9 @@ def test_get_mesh_dim_by_name(self): @with_comms def test_get_all_submeshes(self): mesh_2d = init_device_mesh( - self.device_type, (2, 4), mesh_dim_names=("replicate", "shard") + self.device_type, + (2, 4), + mesh_dim_names=("replicate", "shard"), ) all_submeshes = _mesh_resources._get_all_submeshes(mesh_2d, "replicate") self.assertEqual(len(all_submeshes), 4) diff --git a/test/distributed/test_dynamo_distributed.py b/test/distributed/test_dynamo_distributed.py index d3436bbe47548..af07e50435a81 100644 --- a/test/distributed/test_dynamo_distributed.py +++ b/test/distributed/test_dynamo_distributed.py @@ -43,11 +43,12 @@ DynamoDistributedMultiProcTestCase, DynamoDistributedSingleProcTestCase, import_transformers_or_skip, - requires_nccl, + requires_accelerator_dist_backend, skip_if_lt_x_gpu, ) -from torch.testing._internal.common_utils import requires_cuda +from torch.testing._internal.common_utils import skipIfXpu from torch.testing._internal.inductor_utils import HAS_GPU +from torch.testing._internal.triton_utils import requires_cuda_and_triton def reset_rng_state(): @@ -270,7 +271,15 @@ def get_hf_bert(rank): except ImportError as e: raise unittest.SkipTest("Unable to import transformers") from e - batch_size, max_length, config, device = 4, 512, BertConfig(), f"cuda:{rank}" + device_type = ( + acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + ) + batch_size, max_length, config, device = ( + 4, + 512, + BertConfig(), + f"{device_type}:{rank}", + ) model = AutoModelForMaskedLM.from_config(config).to(device) input_ids = torch.randint(0, config.vocab_size, (batch_size, max_length)).to(device) decoder_ids = torch.randint(0, config.vocab_size, (batch_size, max_length)).to( @@ -550,8 +559,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # Are these tests failing? Check and see if TestFakeDistributedSingleProc has a # single process version; if it's just a problem in the Dynamo distributed -# optimizer, you should be able to repro it single process! -@requires_nccl() +# # optimizer, you should be able to repro it single process! +@requires_accelerator_dist_backend(["nccl", "xccl"]) class TestMultiProc(DynamoDistributedMultiProcTestCase): """ Note: MultiProcTestCase spawns processes per test and is slow. @@ -559,12 +568,16 @@ class TestMultiProc(DynamoDistributedMultiProcTestCase): sparingly for integration tests. """ + device_type = ( + acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + ) + @skip_if_lt_x_gpu(2) @config.patch(optimize_ddp=False, enable_compiler_collectives=True) def test_ddp_baseline_aot_eager_multiprocess(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): self.assertFalse(config.optimize_ddp) - m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") + m, inputs, correct_outputs = get_model(f"{self.device_type}:{self.rank}") m = DDP(m, device_ids=[self.rank]) m = torch.compile(m, backend="aot_eager") outputs = m(inputs) @@ -632,7 +645,7 @@ def forward(self, inp): with _dynamo_dist_per_rank_init(self.rank, self.world_size): self.assertFalse(config.optimize_ddp) - model = MyModel().to(device="cuda") + model = MyModel().to(device=self.device_type) # Activation checkpointing for Linear layers. non_reentrant_wrapper = functools.partial( @@ -647,7 +660,7 @@ def forward(self, inp): ) model = DDP(model) - x = torch.randn(10, 64).cuda() + x = torch.randn(10, 64).to(self.device_type) correct_outputs = model(x) opt_model = torch.compile(model) @@ -659,14 +672,14 @@ def forward(self, inp): def test_fsdp_aot_eager(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): # Test with basic FSDP wrapping (outer wrap around whole model) - m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") + m, inputs, correct_outputs = get_model(f"{self.device_type}:{self.rank}") fsdp_m = FSDP(m, use_orig_params=True) fsdp_m = torch.compile(fsdp_m, backend="aot_eager") outputs = fsdp_m(inputs) self.assertTrue(same(correct_outputs, outputs)) # Test with recursive wrapping, nested FSDP around each Linear - m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") + m, inputs, correct_outputs = get_model(f"{self.device_type}:{self.rank}") fsdp_m = FSDP( m, auto_wrap_policy=functools.partial( @@ -680,6 +693,7 @@ def test_fsdp_aot_eager(self): @skip_if_lt_x_gpu(2) @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") + @requires_cuda_and_triton def test_ddp_optimizer_cudagraph(self): class Net(nn.Module): def __init__(self): @@ -730,7 +744,9 @@ def test_fsdp_setattr(self): from torch._dynamo.utils import counters counters.clear() - m, inputs, correct_outputs = get_mutating_model(f"cuda:{self.rank}") + m, inputs, correct_outputs = get_mutating_model( + f"{self.device_type}:{self.rank}" + ) fsdp_m = FSDP(m, use_orig_params=True) fsdp_m = torch.compile(fsdp_m, backend="eager", fullgraph=False) outputs = fsdp_m(inputs) @@ -748,7 +764,9 @@ def test_fsdp_unspecialized_forced_getattr_no_inline(self): from torch._dynamo.utils import counters counters.clear() - m, inputs, correct_outputs = get_forced_getattr_module(f"cuda:{self.rank}") + m, inputs, correct_outputs = get_forced_getattr_module( + f"{self.device_type}:{self.rank}" + ) fsdp_m = FSDP(m, use_orig_params=True) fsdp_m = torch.compile(fsdp_m, backend="eager", fullgraph=False) outputs = fsdp_m(inputs) @@ -762,7 +780,9 @@ def test_fsdp_unspecialized_forced_getattr_inline(self): from torch._dynamo.utils import counters counters.clear() - m, inputs, correct_outputs = get_forced_getattr_module(f"cuda:{self.rank}") + m, inputs, correct_outputs = get_forced_getattr_module( + f"{self.device_type}:{self.rank}" + ) fsdp_m = FSDP(m, use_orig_params=True) fsdp_m = torch.compile(fsdp_m, backend="eager", fullgraph=False) outputs = fsdp_m(inputs) @@ -774,14 +794,14 @@ def test_fsdp_unspecialized_forced_getattr_inline(self): def test_fsdp_inductor(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): # Test with basic FSDP wrapping (outer wrap around whole model) - m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") + m, inputs, correct_outputs = get_model(f"{self.device_type}:{self.rank}") fsdp_m = FSDP(m, use_orig_params=True) fsdp_m = torch.compile(fsdp_m, backend="inductor") outputs = fsdp_m(inputs) self.assertTrue(same(correct_outputs, outputs)) # Test with recursive wrapping, nested FSDP around each Linear - m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") + m, inputs, correct_outputs = get_model(f"{self.device_type}:{self.rank}") fsdp_m = FSDP( m, auto_wrap_policy=functools.partial( @@ -799,7 +819,7 @@ def test_fsdp_inductor(self): def test_fsdp_activation_checkpointing(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): model, inputs = get_toy_model_for_activation_checkpointing( - f"cuda:{self.rank}" + f"{self.device_type}:{self.rank}" ) is_inner = lambda module: isinstance(module, ToyInnerModel) # noqa: E731 wrap_policy = functools.partial(lambda_auto_wrap_policy, lambda_fn=is_inner) @@ -961,7 +981,7 @@ def test_compiler_collectives_automatic_dynamic_scalar(self): torch._dynamo.utils.clear_compilation_metrics() # TODO: This should be possible to do inside the function, but - device = f"cuda:{self.rank}" + device = f"{self.device_type}:{self.rank}" @torch.compile() def f(x, y): @@ -1181,7 +1201,7 @@ def test_get_pg_attr(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): pg = dist.distributed_c10d._get_default_group() - device = f"cuda:{self.rank}" + device = f"{self.device_type}:{self.rank}" @torch.compile(fullgraph=True) def f(x): @@ -1196,6 +1216,7 @@ def f(x): pg = dist.distributed_c10d.GroupMember.NON_GROUP_MEMBER self.assertEqual(f(x), x + 1) + @skipIfXpu # ProcessGroupXCCL doesn't support _set_default_timeout yet. @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @patch.object(torch._inductor.config, "fx_graph_cache", False) @patch.object(torch._inductor.config, "fx_graph_remote_cache", False) @@ -1205,7 +1226,7 @@ def test_asymmetric_compilation(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): torch._dynamo.utils.clear_compilation_metrics() - device = f"cuda:{self.rank}" + device = f"{self.device_type}:{self.rank}" pg = dist.distributed_c10d._get_default_group() @@ -1238,7 +1259,7 @@ def f(x): w = pg.allreduce(x) w.wait() - torch.cuda.synchronize(device) + torch.accelerator.synchronize(device) metrics = torch._dynamo.utils.get_compilation_metrics() # Number of compiles same on all nodes @@ -1247,6 +1268,7 @@ def f(x): for r in res[1:]: self.assertEqual(res[0], r) + @skipIfXpu # ProcessGroupXCCL doesn't support _set_default_timeout yet. @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @patch.object(torch._inductor.config, "fx_graph_cache", True) @patch.object(torch._inductor.config, "fx_graph_remote_cache", False) @@ -1258,7 +1280,7 @@ def test_asymmetric_compilation_with_fx_cache(self): with fresh_cache(), _dynamo_dist_per_rank_init(self.rank, self.world_size): torch._dynamo.utils.clear_compilation_metrics() - device = f"cuda:{self.rank}" + device = f"{self.device_type}:{self.rank}" pg = dist.distributed_c10d._get_default_group() @@ -1281,7 +1303,7 @@ def f(x): w = pg.allreduce(x) w.wait() - torch.cuda.synchronize(device) + torch.accelerator.synchronize(device) torch._dynamo.reset() if self.rank == 0: @@ -1298,11 +1320,11 @@ def f(x): w = pg.allreduce(x) w.wait() - torch.cuda.synchronize(device) + torch.accelerator.synchronize(device) -@requires_nccl() -@requires_cuda +@requires_accelerator_dist_backend(["nccl", "xccl"]) +@unittest.skipUnless(torch.accelerator.is_available(), "Requires accelerator") class TestSingleProc(DynamoDistributedSingleProcTestCase): """ Test harness initializes dist process group. @@ -1311,6 +1333,10 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase): Use TestMultiProc for things that really need to run on multiple nodes """ + device_type = ( + acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + ) + def get_model( self, bsz=20, in_feat=10, hidden_feat=5000, out_feat=5, ctx_manager=None ): @@ -1428,6 +1454,7 @@ def opt_fn(inputs): self.assertEqual(len(break_reasons), 4) self.assertTrue(all("DDPOptimizer" in r.reason for r in break_reasons)) + @skipIfXpu # XPU device doesn't support flex_attention yet. @patch.object(config, "optimize_ddp", True) def test_compiled_flex_attention_full_model_ddp(self): class Model(torch.nn.Module): @@ -1474,16 +1501,16 @@ def alibi_score_mod(self, score, b, h, q_idx, kv_idx): S = 512 D = 64 - device = "cuda" model = Model(S, H, D) - model.to(device) + model.to(self.device_type) model = torch.compile(model) model = DDP(model, device_ids=self.device_ids) - hidden_states = torch.randn(B, S, H * D).to(device) + hidden_states = torch.randn(B, S, H * D).to(self.device_type) model(hidden_states) - torch.cuda.synchronize() + torch.accelerator.synchronize() + @skipIfXpu # XPU device doesn't support flex_attention yet. @patch.object(config, "optimize_ddp", True) def test_compiled_flex_attention_local_ddp(self): class Model(torch.nn.Module): @@ -1530,15 +1557,14 @@ def alibi_score_mod(self, score, b, h, q_idx, kv_idx): S = 512 D = 64 - device = "cuda" model = Model(S, H, D) - model.to(device) + model.to(self.device_type) model = torch.compile(model) model = DDP(model, device_ids=self.device_ids) - hidden_states = torch.randn(B, S, H * D).to(device) + hidden_states = torch.randn(B, S, H * D).to(self.device_type) model(hidden_states) - torch.cuda.synchronize() + torch.accelerator.synchronize() @patch.object(config, "optimize_ddp", True) @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @@ -1787,9 +1813,9 @@ def forward(self, x): a = torch.cos(a) return a - mod = MockModule().cuda() + mod = MockModule().to(self.device_type) mod = DDP(mod, bucket_cap_mb=1) - x = torch.randn(N, N, device="cuda", requires_grad=True) + x = torch.randn(N, N, device=self.device_type, requires_grad=True) args = (x,) backend = "aot_eager" @@ -1799,7 +1825,7 @@ def forward(self, x): def test_fsdp_orig_params_assert(self): # Test with basic FSDP wrapping (outer wrap around whole model) - m, inputs, _ = get_model(f"cuda:{self.rank}") + m, inputs, _ = get_model(f"{self.device_type}:{self.rank}") fsdp_m = FSDP(m, use_orig_params=False) # Test is that this function call does not throw an exception. fsdp_m = torch.compile(fsdp_m) @@ -1845,7 +1871,7 @@ def _(ctx): return out - device = f"cuda:{self.rank}" + device = f"{self.device_type}:{self.rank}" m = ToyModel( in_feat=10, hidden_feat=5000, @@ -1892,7 +1918,7 @@ def forward(self, inputs): torch._dynamo.reset() - device = f"cuda:{self.rank}" + device = f"{self.device_type}:{self.rank}" m = ToyModel( in_feat=10, hidden_feat=5000, @@ -1933,9 +1959,14 @@ def test_fsdp_dup_tensors_same_source(self): class DuplicateModule(nn.Module): def __init__(self) -> None: super().__init__() - self._param = torch.randn((3,), device="cuda") + device_type = ( + acc.type + if (acc := torch.accelerator.current_accelerator()) + else "cpu" + ) + self._param = torch.randn((3,), device=device_type) self._buf = torch.nn.Buffer( - torch.randn((3,), requires_grad=False, device="cuda") + torch.randn((3,), requires_grad=False, device=device_type) ) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -1948,7 +1979,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: model = DuplicateModule() fsdp_model = FSDP(copy.deepcopy(model), use_orig_params=True) fsdp_model = torch.compile(fsdp_model, backend="aot_eager") - inp = torch.randn((2, 3), device="cuda") + inp = torch.randn((2, 3), device=self.device_type) local_out = model(inp) fsdp_out = fsdp_model(inp) self.assertEqual(local_out, fsdp_out) @@ -1965,8 +1996,13 @@ def test_fsdp_dup_tensors_diff_source(self): class BufModule(nn.Module): def __init__(self) -> None: super().__init__() + device_type = ( + acc.type + if (acc := torch.accelerator.current_accelerator()) + else "cpu" + ) self._buf = nn.Buffer( - torch.randn((3,), requires_grad=False, device="cuda") + torch.randn((3,), requires_grad=False, device=device_type) ) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -1975,7 +2011,12 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class Model(nn.Module): def __init__(self) -> None: super().__init__() - self._param = nn.Parameter(torch.randn((1,), device="cuda")) + device_type = ( + acc.type + if (acc := torch.accelerator.current_accelerator()) + else "cpu" + ) + self._param = nn.Parameter(torch.randn((1,), device=device_type)) self._buf_module = BufModule() # Share the buffer, meaning same tensor but different source self._buf = self._buf_module._buf @@ -1992,7 +2033,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: fsdp_model = FSDP(Model(), use_orig_params=True) cnt = torch._dynamo.testing.CompileCounterWithBackend("aot_eager") fsdp_model = torch.compile(fsdp_model, backend=cnt) - inp = torch.randn((2, 3), device="cuda") + inp = torch.randn((2, 3), device=self.device_type) for _ in range(15): fsdp_model(inp) # Check for no recompiles (if there were incorrect de-dup guards, then @@ -2011,7 +2052,12 @@ def __init__(self, use_self: bool): super().__init__() self._use_self = use_self torch.manual_seed(42) # force `_param` to be deterministic - self._param = nn.Parameter(torch.randn((3,), device="cuda")) + device_type = ( + acc.type + if (acc := torch.accelerator.current_accelerator()) + else "cpu" + ) + self._param = nn.Parameter(torch.randn((3,), device=device_type)) def forward(self, x: torch.Tensor) -> torch.Tensor: if self._use_self: @@ -2026,7 +2072,7 @@ def _add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: return x + y model = ModuleWithStaticMethod(False) - x = torch.randn((2, 3), device="cuda") + x = torch.randn((2, 3), device=self.device_type) ref_out = model(x) test_outs: list[torch.Tensor] = [] diff --git a/test/distributed/test_inductor_collectives.py b/test/distributed/test_inductor_collectives.py index 4c14d497234f7..a0197eb89ebc9 100644 --- a/test/distributed/test_inductor_collectives.py +++ b/test/distributed/test_inductor_collectives.py @@ -10,6 +10,7 @@ import torch._dynamo import torch._dynamo.logging import torch._dynamo.test_case +import torch.distributed as c10d # for some reason importing functional collectives after dynamo breaks collectives handling! import torch.distributed._functional_collectives as _functional_collectives @@ -37,14 +38,14 @@ DynamoDistributedMultiProcTestCase, DynamoDistributedSingleProcTestCase, MultiProcessTestCase, - requires_nccl, + requires_accelerator_dist_backend, skip_if_lt_x_gpu, ) from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, parametrize, - requires_cuda, skipIfRocm, + skipIfXpu, TEST_XPU, xfailIf, ) @@ -59,13 +60,15 @@ def _tolist_with_constrain_as_size(tensor): return lst -@requires_nccl() +@requires_accelerator_dist_backend(["nccl", "xccl"]) @instantiate_parametrized_tests class TestCollectivesMultiProc(DynamoDistributedMultiProcTestCase): """ Run correctness checks in multi-proc runner, mark with minimum # GPUs to run under """ + device = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + def get_world_trs(self): return { "tag": "", @@ -102,8 +105,11 @@ def compile(func, example_inputs): example, **self.get_world_trs(), ) - t = torch.randn(4, 4, device="cuda") - inputs = (t if self.rank == 0 else torch.zeros(4, 4, device="cuda"), 0) + t = torch.randn(4, 4, device=self.device) + inputs = ( + t if self.rank == 0 else torch.zeros(4, 4, device=self.device), + 0, + ) eager_out = example(*inputs) self.assertTrue(same(t, eager_out)) @@ -137,7 +143,7 @@ def compile(func, example_inputs): matmul_cat_col, **self.get_world_trs(), ) - inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 6 + inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 6 eager_out = matmul_cat_col(*inputs) compiled_matmul_cat_col = compile(matmul_cat_col, inputs) @@ -179,7 +185,7 @@ def func(x): for nelem in [1024, 2048, 4096]: # CI (Tesla T4) does not support bfloat16 compilation natively, # using float - x = torch.randn(nelem, device="cuda", dtype=torch.float) + x = torch.randn(nelem, device=self.device, dtype=torch.float) golden_out = eager_func(x) for _ in range(3): @@ -217,8 +223,8 @@ def compile(func, example_inputs): eager_func, **self.get_world_trs(), ) - eager_inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 4 - inductor_inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 2 + eager_inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 4 + inductor_inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 2 eager_out = inductor_func(eager_func(*eager_inputs), *inductor_inputs) compiled_inductor_func = compile( @@ -256,8 +262,8 @@ def compile(func, example_inputs): inductor_func, **self.get_world_trs(), ) - inductor_inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 4 - eager_inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 2 + inductor_inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 4 + eager_inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 2 eager_out = eager_func(inductor_func(*inductor_inputs), *eager_inputs) compiled_inductor_func = compile(inductor_func, inductor_inputs) @@ -270,6 +276,7 @@ def compile(func, example_inputs): @skip_if_lt_x_gpu(2) @xfailIf(TEST_XPU) # https://github.com/intel/torch-xpu-ops/issues/1728 @skipIfRocm + @xfailIf(TEST_XPU) # https://github.com/intel/torch-xpu-ops/issues/1728 def test_eager_async_allreduce_inductor_wait(self): import torch.distributed as dist from torch._inductor.utils import run_and_get_code @@ -292,7 +299,7 @@ def all_reduce_wait(work, y): # potentially compiled return y * y with _dynamo_dist_per_rank_init(self.rank, self.world_size): - x = torch.ones(12800, 12800, device="cuda") + self.rank + x = torch.ones(12800, 12800, device=self.device) + self.rank self.assertEqual(torch._C._distributed_c10d._get_work_registry_size(), 0) # NOTE: We run for 10 iterations each, to ensure that the GPU execution is way behind CPU @@ -363,7 +370,7 @@ def func(a, *, tag, ranks, group_size): return (e,) with _dynamo_dist_per_rank_init(self.rank, self.world_size): - inputs = torch.ones(4, 4, device="cuda") + self.rank + inputs = torch.ones(4, 4, device=self.device) + self.rank compiled = torch.compile(func) out = compiled(inputs, **self.get_world_trs()) correct = func(inputs, **self.get_world_trs()) @@ -380,7 +387,8 @@ def func(tensor, src_dst_pairs, *, tag, ranks, group_size): with _dynamo_dist_per_rank_init(self.rank, self.world_size): inputs = ( # rank0: [0., 1.], rank1: [2., 3.] - torch.arange(2, dtype=torch.float32, device="cuda") + 2 * self.rank, + torch.arange(2, dtype=torch.float32, device=self.device) + + 2 * self.rank, [1, 0], ) compiled = torch.compile(func) @@ -389,7 +397,7 @@ def func(tensor, src_dst_pairs, *, tag, ranks, group_size): self.assertTrue(same(out, correct)) # rank0: [2., 3.], rank1: [0., 1.] - expected = torch.arange(2, dtype=torch.float32, device="cuda") + 2 * ( + expected = torch.arange(2, dtype=torch.float32, device=self.device) + 2 * ( (self.rank - 1 + self.world_size) % self.world_size ) self.assertEqual(out, expected) @@ -412,9 +420,9 @@ def forward(self, x, world_size, tag, ranks, group_size): return out with _dynamo_dist_per_rank_init(self.rank, self.world_size): - model = Model().cuda() + model = Model().to(self.device) model_compiled = torch.compile(model) - inp = torch.tensor([[2, 1, 3, 0]], dtype=torch.long, device="cuda") + inp = torch.tensor([[2, 1, 3, 0]], dtype=torch.long, device=self.device) out = model_compiled(inp, self.world_size, **self.get_world_trs()) correct = model(inp, self.world_size, **self.get_world_trs()) self.assertTrue(same(out, correct)) @@ -429,7 +437,7 @@ def func(tensor, world_size): with _dynamo_dist_per_rank_init(self.rank, self.world_size): func_compiled = torch.compile(func) - inp = torch.tensor(self.rank, dtype=torch.long, device="cuda") + inp = torch.tensor(self.rank, dtype=torch.long, device=self.device) out = func_compiled(inp, self.world_size) correct = func(inp, self.world_size) self.assertTrue(same(out, correct)) @@ -451,9 +459,9 @@ def forward(self, x, world_size, tag, ranks, group_size): return out with _dynamo_dist_per_rank_init(self.rank, self.world_size): - model = Model().cuda() + model = Model().to(self.device) model_compiled = torch.compile(model) - inp = torch.tensor([[2, 1, 3, 0]], dtype=torch.long, device="cuda") + inp = torch.tensor([[2, 1, 3, 0]], dtype=torch.long, device=self.device) out = model_compiled(inp, self.world_size, **self.get_world_trs()) correct = model(inp, self.world_size, **self.get_world_trs()) self.assertTrue(same(out, correct)) @@ -482,7 +490,7 @@ def compile(func, example_inputs): example, **self.get_world_trs(), ) - inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 2 + inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 2 eager_out = example(*inputs) compiled_matmul_cat_col = compile(example, inputs) @@ -509,7 +517,7 @@ def compile(func, example_inputs): example, **self.get_world_trs(), ) - inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 2 + inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 2 eager_out = example(*inputs) compiled_fn = compile(example, inputs) @@ -563,7 +571,7 @@ def example( dtype=torch.int64, ) inputs = ( - torch.ones(int(row), 5, device="cuda") * (self.rank + 1), + torch.ones(int(row), 5, device=self.device) * (self.rank + 1), input_split_sizes_tensor, output_split_sizes_tensor, ) @@ -732,7 +740,7 @@ def example( dtype=torch.int64, ) inputs = ( - torch.ones(int(row), 5, device="cuda", requires_grad=True) + torch.ones(int(row), 5, device=self.device, requires_grad=True) * (self.rank + 1), input_split_sizes_tensor, output_split_sizes_tensor, @@ -795,7 +803,7 @@ def example(inp, *, tag, ranks, group_size): with _dynamo_dist_per_rank_init(self.rank, self.world_size): inputs = ( - torch.ones(self.world_size, self.world_size, device="cuda") + torch.ones(self.world_size, self.world_size, device=self.device) * (self.rank + 1), ) trs = self.get_world_trs() @@ -819,8 +827,11 @@ def example(inp, *, tag, ranks, group_size): @instantiate_parametrized_tests -@requires_nccl() -@requires_cuda +@requires_accelerator_dist_backend(["nccl", "xccl"]) +@unittest.skipIf( + not torch.accelerator.is_available(), + "No accelerator is available", +) class TestCollectivesInductor(DynamoDistributedSingleProcTestCase): """ Prefer single-proc test runner for basic tests as it is easier to work with. @@ -843,7 +854,7 @@ def func(inp, *, tag, ranks, group_size): ar = torch.ops.c10d_functional.wait_tensor(ar) return ar - inputs = torch.ones(4, 4, device="cuda") + inputs = torch.ones(4, 4, device=self.device) compiled = torch.compile(func) out = compiled(inputs, **self.get_world_trs()) @@ -878,7 +889,7 @@ def func(inp, *, tag, ranks, group_size): other = torch.ones_like(inp) + 22 return ar, other - inputs = torch.ones(4, 4, device="cuda") + inputs = torch.ones(4, 4, device=self.device) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs()) @@ -911,7 +922,7 @@ def func(inp, *, tag, ranks, group_size): other = torch.ones_like(inp) + 22 return ar, y, other - inputs = torch.ones(4, 4, device="cuda") + inputs = torch.ones(4, 4, device=self.device) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs()) @@ -952,7 +963,7 @@ def func(inp): ar = _functional_collectives.all_reduce(inp, "sum", "0") return ar - inputs = torch.ones(4, 4, device="cuda") + inputs = torch.ones(4, 4, device=self.device) counter = CompileCounter() compiled = torch.compile(func, backend=counter) out = compiled(inputs) @@ -963,12 +974,13 @@ def func(inp): self.assertEqual(counter.op_count, 2) self.assertTrue(same(out, correct)) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_trace_all_gather_tensor(self): def func(inp): ar = _functional_collectives.all_gather_tensor(inp, 0, "0") return ar - inputs = torch.ones(4, 4, device="cuda") + inputs = torch.ones(4, 4, device=self.device) counter = CompileCounter() compiled = torch.compile(func, backend=counter) out = compiled(inputs) @@ -979,6 +991,7 @@ def func(inp): self.assertEqual(counter.op_count, 2) self.assertTrue(same(out, correct)) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_trace_all_gather_tensor_pg(self): def func(inp, *, pg): ar = _functional_collectives.all_gather_tensor(inp, 0, pg) @@ -995,6 +1008,7 @@ def func(inp, *, pg): self.assertEqual(counter.op_count, 2) self.assertTrue(same(out, correct)) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_rewrite_dist_all_gather(self): def func(inp, out, *, pg): torch.distributed.all_gather_into_tensor( @@ -1020,6 +1034,7 @@ def func(inp, out, *, pg): assert counter.op_count == 3 assert same(outputs, correct_outputs) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_rewrite_dist_all_gather_list(self): def func(inp, out, *, pg): torch.distributed.all_gather( @@ -1042,6 +1057,7 @@ def func(inp, out, *, pg): assert counter.frame_count == 1 assert same(outputs, correct_outputs) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_rewrite_dist_all_gather_args_match(self): # Duplicated most of the structure from test_dynamo_rewrite_dist_all_gather # except uses kwargs to ensure rewrite has matching arg names @@ -1070,6 +1086,7 @@ def func(inp, out, *, pg): assert counter.op_count == 3 assert same(outputs, correct_outputs) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_rewrite_dist_reduce_scatter(self): def func(inp, out, *, pg): torch.distributed.reduce_scatter_tensor( @@ -1237,6 +1254,7 @@ def verify(gm, _): input = torch.ones(2, device=self.device) compiled(input) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_support_collective_op_with_async_op_False(self): def func(inp, out, *, pg): # user explicitly set the attribute `async_op` to False, @@ -1296,12 +1314,13 @@ def func(inp, *, pg): assert counter.op_count == 1 assert same(outputs, correct_outputs) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_trace_reduce_scatter_tensor(self): def func(inp): ar = _functional_collectives.reduce_scatter_tensor(inp, "sum", 0, "0") return ar - inputs = torch.ones(4, 4, device="cuda") + inputs = torch.ones(4, 4, device=self.device) counter = CompileCounter() compiled = torch.compile(func, backend=counter) out = compiled(inputs) @@ -1312,6 +1331,7 @@ def func(inp): self.assertEqual(counter.op_count, 2) self.assertTrue(same(out, correct)) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_trace_allgather_coalesced(self): def func(inp, *, tag, ranks, group_size): ar = torch.ops.c10d_functional.all_gather_into_tensor_coalesced( @@ -1319,7 +1339,10 @@ def func(inp, *, tag, ranks, group_size): ) return ar - inputs = [torch.ones(4, 4, device="cuda"), torch.ones(6, 6, device="cuda")] + inputs = [ + torch.ones(4, 4, device=self.device), + torch.ones(6, 6, device=self.device), + ] counter = CompileCounter() compiled = torch.compile(func, backend=counter) out = compiled(inputs, **self.get_world_trs()) @@ -1339,7 +1362,7 @@ def func(inp): ar = _functional_collectives.all_reduce(inp, "sum", "0") return ar - input = torch.ones(4, 4, device="cuda", requires_grad=True) + input = torch.ones(4, 4, device=self.device, requires_grad=True) compiled = torch.compile( func, backend="aot_eager" ) # inductor bug with single-op allreduce graph @@ -1357,6 +1380,7 @@ def test_meta(self): out = torch.ops.c10d_functional.all_reduce(x, "sum", **self.get_world_trs()) self.assertEqual(x.size(), out.size()) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @torch._inductor.config.patch({"debug": True, "triton.descriptive_names": False}) def test_inductor_all_gather_coalesced(self): @@ -1376,7 +1400,7 @@ def func(inp, *, tag, ranks, group_size): other = torch.ones_like(inp) + 22 return ar0, y, other, ar1 - inputs = torch.ones(4, 4, device="cuda") + inputs = torch.ones(4, 4, device=self.device) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs()) @@ -1403,6 +1427,7 @@ def func(inp, *, tag, ranks, group_size): correct = func(inputs, **self.get_world_trs()) assert same(out, correct), f"{out} va {correct}" + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @torch._inductor.config.patch({"debug": True, "triton.descriptive_names": False}) def test_inductor_reduce_scatter_coalesced(self): @@ -1422,7 +1447,7 @@ def func(inp, *, tag, ranks, group_size): other = torch.ones_like(inp) + 22 return ar0, y, other, ar1 - inputs = torch.ones(4, 4, device="cuda") + inputs = torch.ones(4, 4, device=self.device) compiled = torch.compile(func) code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs()) @@ -1449,6 +1474,7 @@ def func(inp, *, tag, ranks, group_size): correct = func(inputs, **self.get_world_trs()) assert same(out, correct), f"{out} va {correct}" + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") def test_reorder_peak_memory(self): """ @@ -1470,7 +1496,7 @@ def func(inp, *, tag, ranks, group_size): other = torch.ones_like(inp) + 22 return ar0, y, other, ar1 - inputs = torch.ones(4, 4, device="cuda") + inputs = torch.ones(4, 4, device=self.device) # get stats directly from the internal helper without affecting the real pass's signature node_stats: Optional[dict[BaseSchedulerNode, ReorderInfo]] = None @@ -1642,10 +1668,10 @@ def func(x, w, ag_0, ag_1, *, tag, ranks, group_size): return y, ag_0_out, ag_1_out - x = torch.ones(4, 384, device="cuda", dtype=torch.float32) - w = torch.ones(384, 512, device="cuda", dtype=torch.float32) - ag_0 = torch.ones(384, 512, device="cuda", dtype=torch.float32) - ag_1 = torch.ones(384, 512, device="cuda", dtype=torch.float32) + x = torch.ones(4, 384, device=self.device, dtype=torch.float32) + w = torch.ones(384, 512, device=self.device, dtype=torch.float32) + ag_0 = torch.ones(384, 512, device=self.device, dtype=torch.float32) + ag_1 = torch.ones(384, 512, device=self.device, dtype=torch.float32) inputs = [x, w, ag_0, ag_1] with torch._inductor.config.patch( @@ -1810,12 +1836,12 @@ def func(x, w, ag_0, ag_1, ag_2, ag_3, *, tag, ranks, group_size): rs_3_out, ) - x = torch.ones(4, 384, device="cuda", dtype=torch.float32) - w = torch.ones(384, 512, device="cuda", dtype=torch.float32) - ag_0 = torch.ones(1024, 512, device="cuda", dtype=torch.float32) - ag_1 = torch.ones(512, 1024, device="cuda", dtype=torch.float32) - ag_2 = torch.ones(1024, 512, device="cuda", dtype=torch.float32) - ag_3 = torch.ones(512, 1024, device="cuda", dtype=torch.float32) + x = torch.ones(4, 384, device=self.device, dtype=torch.float32) + w = torch.ones(384, 512, device=self.device, dtype=torch.float32) + ag_0 = torch.ones(1024, 512, device=self.device, dtype=torch.float32) + ag_1 = torch.ones(512, 1024, device=self.device, dtype=torch.float32) + ag_2 = torch.ones(1024, 512, device=self.device, dtype=torch.float32) + ag_3 = torch.ones(512, 1024, device=self.device, dtype=torch.float32) inputs = [x, w, ag_0, ag_1, ag_2, ag_3] # get stats directly from the internal helper without affecting the real pass's signature @@ -1917,6 +1943,7 @@ def _reorder_communication_preserving_peak_memory( node_stat1 = next(it) self.assertTrue("collective ordering" in node_stat1.limiting_factor) + @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") def test_reorder_respects_wait_dep(self): """ @@ -1939,7 +1966,7 @@ def func(inp, *, tag, ranks, group_size): # ensure other is not incorrectly aliasing ar's buffer return ag_1_wait - inputs = torch.ones(4, 4, device="cuda") + inputs = torch.ones(4, 4, device=self.device) # get stats directly from the internal helper without affecting the real pass's signature node_stats: Optional[dict[BaseSchedulerNode, ReorderInfo]] = None @@ -1988,7 +2015,7 @@ def _reorder_communication_preserving_peak_memory( self.assertEqual(stats.moves, 0) -@requires_nccl() +@requires_accelerator_dist_backend(["nccl", "xccl"]) class TestSyncDecisionCrossRanks(MultiProcessTestCase): def setUp(self) -> None: super().setUp() @@ -2004,16 +2031,21 @@ def ranks(self) -> list[int]: @property def device(self) -> torch.device: - return torch.device(f"cuda:{self.rank}") + device_type = torch.accelerator.current_accelerator().type + return torch.device(f"{device_type}:{self.rank}") def _init_process_group(self) -> None: torch._inductor.config.triton.store_cubin = True torch._inductor.config.debug = True - torch.cuda.set_device(self.device) + torch.get_device_module(self.device).set_device(self.device) store = torch.distributed.FileStore(self.file_name, self.world_size) + backend = c10d.get_default_backend_for_device( + torch.accelerator.current_accelerator().type + ) + torch.distributed.init_process_group( - backend="nccl", + backend=backend, world_size=self.world_size, rank=self.rank, store=store, diff --git a/test/distributed/test_store.py b/test/distributed/test_store.py index 870805eec75e8..e557a48359623 100644 --- a/test/distributed/test_store.py +++ b/test/distributed/test_store.py @@ -54,6 +54,8 @@ torch.backends.cuda.matmul.allow_tf32 = False +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + def gpus_for_rank(world_size): """Multigpu tests are designed to simulate the multi nodes with multi @@ -61,8 +63,8 @@ def gpus_for_rank(world_size): On a single node, all visible GPUs are evenly divided to subsets, each process only uses a subset. """ - visible_devices = list(range(torch.cuda.device_count())) - gpus_per_process = torch.cuda.device_count() // world_size + visible_devices = list(range(torch.accelerator.device_count())) + gpus_per_process = torch.accelerator.device_count() // world_size gpus_for_rank = [] for rank in range(world_size): gpus_for_rank.append( @@ -1174,8 +1176,8 @@ def listen() -> None: if __name__ == "__main__": - assert not torch.cuda._initialized, ( - "test_distributed must not have initialized CUDA context on main process" - ) - + if device_type != "cpu": + assert not torch.get_device_module()._initialized, ( + "test_distributed must not have initialized {device_type} context on main process" + ) run_tests() diff --git a/test/distributions/test_distributions.py b/test/distributions/test_distributions.py index 7cb8cc678136f..aaae775f191cf 100644 --- a/test/distributions/test_distributions.py +++ b/test/distributions/test_distributions.py @@ -115,10 +115,13 @@ set_default_dtype, set_rng_seed, skipIfTorchDynamo, + TEST_XPU, TestCase, ) +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + # load_tests from torch.testing._internal.common_utils is used to automatically filter tests for # sharding on sandcastle. This line silences flake warnings load_tests = load_tests @@ -1788,18 +1791,21 @@ def test_negative_binomial_log_prob_vectorized_count(self): ).logpmf(sample) self.assertEqual(log_prob, expected, atol=1e-4, rtol=0) - @unittest.skipIf(not TEST_CUDA, "CUDA not found") + @unittest.skipIf(not TEST_CUDA and not TEST_XPU, "CUDA and XPU not found") def test_zero_excluded_binomial(self): vals = Binomial( - total_count=torch.tensor(1.0).cuda(), probs=torch.tensor(0.9).cuda() + total_count=torch.tensor(1.0).to(device_type), + probs=torch.tensor(0.9).to(device_type), ).sample(torch.Size((100000000,))) self.assertTrue((vals >= 0).all()) vals = Binomial( - total_count=torch.tensor(1.0).cuda(), probs=torch.tensor(0.1).cuda() + total_count=torch.tensor(1.0).to(device_type), + probs=torch.tensor(0.1).to(device_type), ).sample(torch.Size((100000000,))) self.assertTrue((vals < 2).all()) vals = Binomial( - total_count=torch.tensor(1.0).cuda(), probs=torch.tensor(0.5).cuda() + total_count=torch.tensor(1.0).to(device_type), + probs=torch.tensor(0.5).to(device_type), ).sample(torch.Size((10000,))) # vals should be roughly half zeroes, half ones assert (vals == 0.0).sum() > 4000 @@ -2050,15 +2056,15 @@ def test_poisson_sample(self): ) torch.set_default_dtype(saved_dtype) - @unittest.skipIf(not TEST_CUDA, "CUDA not found") + @unittest.skipIf(not TEST_CUDA and not TEST_XPU, "CUDA and XPU not found") @unittest.skipIf(not TEST_NUMPY, "Numpy not found") def test_poisson_gpu_sample(self): set_rng_seed(1) for rate in [0.12, 0.9, 4.0]: self._check_sampler_discrete( - Poisson(torch.tensor([rate]).cuda()), + Poisson(torch.tensor([rate]).to(device_type)), scipy.stats.poisson(rate), - f"Poisson(lambda={rate}, cuda)", + f"Poisson(lambda={rate}, {device_type})", failure_rate=1e-3, ) @@ -3490,13 +3496,13 @@ def ref_log_prob(idx, x, log_prob): self._check_log_prob(Gamma(alpha, beta), ref_log_prob) - @unittest.skipIf(not TEST_CUDA, "CUDA not found") + @unittest.skipIf(not TEST_CUDA and not TEST_XPU, "CUDA and XPU not found") @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_gamma_gpu_shape(self): - alpha = torch.randn(2, 3).cuda().exp().requires_grad_() - beta = torch.randn(2, 3).cuda().exp().requires_grad_() - alpha_1d = torch.randn(1).cuda().exp().requires_grad_() - beta_1d = torch.randn(1).cuda().exp().requires_grad_() + alpha = torch.randn(2, 3).to(device_type).exp().requires_grad_() + beta = torch.randn(2, 3).to(device_type).exp().requires_grad_() + alpha_1d = torch.randn(1).to(device_type).exp().requires_grad_() + beta_1d = torch.randn(1).to(device_type).exp().requires_grad_() self.assertEqual(Gamma(alpha, beta).sample().size(), (2, 3)) self.assertEqual(Gamma(alpha, beta).sample((5,)).size(), (5, 2, 3)) self.assertEqual(Gamma(alpha_1d, beta_1d).sample((1,)).size(), (1, 1)) @@ -3527,7 +3533,10 @@ def test_gamma_sample(self): def test_gamma_gpu_sample(self): set_rng_seed(0) for alpha, beta in product([0.1, 1.0, 5.0], [0.1, 1.0, 10.0]): - a, b = torch.tensor([alpha]).cuda(), torch.tensor([beta]).cuda() + a, b = ( + torch.tensor([alpha]).to(device_type), + torch.tensor([beta]).to(device_type), + ) self._check_sampler_sampler( Gamma(a, b), scipy.stats.gamma(alpha, scale=1.0 / beta), @@ -3973,11 +3982,11 @@ def test_beta_underflow(self): self.assertEqual(frac_zeros, 0.5, atol=0.05, rtol=0) self.assertEqual(frac_ones, 0.5, atol=0.05, rtol=0) - @unittest.skipIf(not TEST_CUDA, "CUDA not found") + @unittest.skipIf(not TEST_CUDA and not TEST_XPU, "CUDA and XPU not found") def test_beta_underflow_gpu(self): set_rng_seed(1) num_samples = 50000 - conc = torch.tensor(1e-2, dtype=torch.float64).cuda() + conc = torch.tensor(1e-2, dtype=torch.float64).to(device_type) beta_samples = Beta(conc, conc).sample([num_samples]) self.assertEqual((beta_samples == 0).sum(), 0) self.assertEqual((beta_samples == 1).sum(), 0) diff --git a/test/inductor/test_snode_runtime.py b/test/inductor/test_snode_runtime.py index c57393d993eab..cee78592153db 100644 --- a/test/inductor/test_snode_runtime.py +++ b/test/inductor/test_snode_runtime.py @@ -258,8 +258,6 @@ def _verify_runtime_estimation(self, fn, inps): finally: dist.destroy_process_group() - # lack of profiler on XPU - @expectedFailureXPU def test_legacy_all_reduce(self): def fn(x): r = c10d.all_reduce(x, "sum", "", self.RANKS, self.WORLD_SIZE) @@ -268,8 +266,6 @@ def fn(x): inp = T(10, 10) self._verify_runtime_estimation(fn, (inp,)) - # lack of profiler on XPU - @expectedFailureXPU def test_legacy_all_reduce_coalesced(self): def fn(x): rs = c10d.all_reduce_coalesced(x, "sum", "", self.RANKS, self.WORLD_SIZE) @@ -278,8 +274,6 @@ def fn(x): inp = [T(10, 10), T(15, 15)] self._verify_runtime_estimation(fn, (inp,)) - # lack of profiler on XPU - @expectedFailureXPU def test_legacy_all_gather_into_tensor_coalesced(self): def fn(x): rs = c10d.all_gather_into_tensor_coalesced( @@ -293,8 +287,6 @@ def fn(x): inp = [T(10, 10), T(15, 15)] self._verify_runtime_estimation(fn, (inp,)) - # lack of profiler on XPU - @expectedFailureXPU def test_all_reduce(self): def fn(x): r = _c10d.all_reduce(x, "sum", "0") @@ -303,8 +295,6 @@ def fn(x): inp = T(10, 10) self._verify_runtime_estimation(fn, (inp,)) - # lack of profiler on XPU - @expectedFailureXPU def test_all_reduce_coalesced(self): def fn(x): rs = _c10d.all_reduce_coalesced(x, "sum", "0") @@ -313,8 +303,6 @@ def fn(x): inp = [T(10, 10), T(15, 15)] self._verify_runtime_estimation(fn, (inp,)) - # lack of profiler on XPU - @expectedFailureXPU def test_all_gather_into_tensor(self): def fn(x): rs = _c10d.all_gather_into_tensor( @@ -327,8 +315,6 @@ def fn(x): inp = T(10, 10) self._verify_runtime_estimation(fn, (inp,)) - # lack of profiler on XPU - @expectedFailureXPU def test_all_gather_into_tensor_coalesced(self): def fn(x): rs = _c10d.all_gather_into_tensor_coalesced( @@ -341,8 +327,6 @@ def fn(x): inp = [T(10, 10), T(15, 15)] self._verify_runtime_estimation(fn, (inp,)) - # lack of profiler on XPU - @expectedFailureXPU def test_reduce_scatter_tensor(self): def fn(x): rs = _c10d.reduce_scatter_tensor( @@ -356,8 +340,6 @@ def fn(x): inp = T(self.WORLD_SIZE, 10) self._verify_runtime_estimation(fn, (inp,)) - # lack of profiler on XPU - @expectedFailureXPU def test_reduce_scatter_tensor_coalesced(self): def fn(x): rs = _c10d.reduce_scatter_tensor_coalesced( diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 14790e5dba8af..2f60cbe13abcf 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -350,10 +350,12 @@ def register_backend( # assume default devices "cpu" and "cuda", but warn warnings.warn( f"Device capability of {name} unspecified, assuming `cpu` and " - "`cuda`. Please specify it via the `devices` argument of " + "`cuda` or `xpu`. Please specify it via the `devices` argument of " "`register_backend`." ) - Backend.backend_capability[name.lower()] = ["cpu", "cuda"] + Backend.backend_capability[name.lower()] = ( + ["cpu", "cuda", "xpu"] if torch.xpu.is_available() else ["cpu", "cuda"] + ) elif isinstance(devices, str): # Single device string specified. Simply convert to list. Backend.backend_capability[name.lower()] = [devices] diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py index c1f75697fe889..0e74d9049b94b 100644 --- a/torch/testing/_internal/common_distributed.py +++ b/torch/testing/_internal/common_distributed.py @@ -338,26 +338,17 @@ def requires_gloo(): def requires_nccl_version(version, msg): - if TEST_CUDA: - if not c10d.is_nccl_available(): - return skip_but_pass_in_sandcastle( - "c10d was not compiled with the NCCL backend", - ) - else: - return skip_but_pass_in_sandcastle_if( - torch.cuda.nccl.version() < version, - f"Requires NCCL version greater than or equal to: {version}, found: {torch.cuda.nccl.version()}, reason: {msg}", - ) + if not TEST_CUDA: + return lambda f: f + if not c10d.is_nccl_available(): + return skip_but_pass_in_sandcastle( + "c10d was not compiled with the NCCL backend", + ) else: - - def decorator(func): - @wraps(func) - def wrapper(*args, **kwargs): - return func(*args, **kwargs) - - return wrapper - - return decorator + return skip_but_pass_in_sandcastle_if( + torch.cuda.nccl.version() < version, + f"Requires NCCL version greater than or equal to: {version}, found: {torch.cuda.nccl.version()}, reason: {msg}", + ) def requires_nccl(): diff --git a/torch/testing/_internal/distributed/fake_pg.py b/torch/testing/_internal/distributed/fake_pg.py index 0a2814c246459..e160f2fe50611 100644 --- a/torch/testing/_internal/distributed/fake_pg.py +++ b/torch/testing/_internal/distributed/fake_pg.py @@ -28,5 +28,5 @@ def _create_fake_pg(common_opts, backend_opts): dist.Backend.register_backend( - "fake", _create_fake_pg, extended_api=True, devices=["cpu", "cuda", "hpu"] + "fake", _create_fake_pg, extended_api=True, devices=["cpu", "cuda", "hpu", "xpu"] ) From f68f76d8c714c8b38429f8a1b1d5b1877aab6792 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 10 Sep 2025 21:21:10 -0700 Subject: [PATCH 090/693] Remove logger.debug statements in DTensor dispatch (#161596) These seem to have been costing us 5-10 usec per detach (out of ~~95 usec total). If they need to ship let's talk about requirements and how we can make this more efficient given that we would prefer if an entire DTensor op could finish in 10 usec. Differential Revision: [D81530106](https://our.internmc.facebook.com/intern/diff/D81530106) Pull Request resolved: https://github.com/pytorch/pytorch/pull/161596 Approved by: https://github.com/ezyang, https://github.com/Skylion007 ghstack dependencies: #161591, #161595, #161633, #161634, #161692, #162219, #162220, #162218 --- test/distributed/tensor/test_dtensor.py | 33 ------------------------- torch/distributed/tensor/_dispatch.py | 2 -- 2 files changed, 35 deletions(-) diff --git a/test/distributed/tensor/test_dtensor.py b/test/distributed/tensor/test_dtensor.py index f5ddb1a4222c6..083f6d459c7e0 100644 --- a/test/distributed/tensor/test_dtensor.py +++ b/test/distributed/tensor/test_dtensor.py @@ -1,7 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates # Owner(s): ["oncall: distributed"] -import os import pathlib import tempfile import unittest @@ -33,7 +32,6 @@ DTensorTestBase, with_comms, ) -from torch.testing._internal.logging_utils import LoggingTestCase c10d_functional = torch.ops.c10d_functional @@ -1012,36 +1010,5 @@ def test_split_tensor_1D(self) -> None: assert_array_equal(expected_is_tensor_empty, is_tensor_empty) -class DTensorLogTest(LoggingTestCase): - def test_dtensor_log(self): - if not torch.distributed.is_available() or not torch.cuda.is_available(): - return - - env = dict(os.environ) - env["TORCH_LOGS"] = "+dtensor" - env["RANK"] = "0" - env["WORLD_SIZE"] = "1" - env["MASTER_PORT"] = "12345" - env["MASTER_ADDR"] = "localhost" - - _, stderr = self.run_process_no_exception( - """\ -import logging -import torch -from torch.distributed.device_mesh import init_device_mesh -from torch.distributed.tensor import distribute_tensor, Shard - -mesh = init_device_mesh("cuda", (1,), mesh_dim_names=("dp",)) -placements = [Shard(0)] -tensor = torch.randn(12, 8, 8) -dtensor = distribute_tensor(tensor, mesh, placements) -dtensor.max() -""", - env=env, - ) - self.assertIn("_dispatch.py", stderr.decode("utf-8")) - self.assertIn("redistribute=False", stderr.decode("utf-8")) - - if __name__ == "__main__": run_tests() diff --git a/torch/distributed/tensor/_dispatch.py b/torch/distributed/tensor/_dispatch.py index 7ac7801b50bca..2f0a77d4d27d3 100644 --- a/torch/distributed/tensor/_dispatch.py +++ b/torch/distributed/tensor/_dispatch.py @@ -150,7 +150,6 @@ def dispatch( # extract local tensor and sharding infos to a OpInfo op_info = self.unwrap_to_op_info(op_call, args, kwargs) - logger.debug("Dispatching op_call: %s", op_info.schema) try: self.sharding_propagator.propagate(op_info) @@ -171,7 +170,6 @@ def dispatch( ) from e output_sharding = op_info.output_sharding - logger.debug("output_sharding for %s: %s", op_call, output_sharding) assert output_sharding is not None, "output sharding should not be None" mesh = op_info.compute_mesh From 1274297e0620892d49e03cb59cafec8e8966f2a7 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Wed, 10 Sep 2025 21:21:11 -0700 Subject: [PATCH 091/693] Remove __torch_dispatch__ check in THPVariable_make_dtensor (#162337) We control DTensor, so we can just guarantee there isn't a programming error with __torch_dispatch__. (The guard is already less-than-perfect; see the note that the deleted comment refers to.) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162337 Approved by: https://github.com/Skylion007 ghstack dependencies: #161591, #161595, #161633, #161634, #161692, #162219, #162220, #162218, #161596 --- torch/csrc/autograd/python_variable.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/torch/csrc/autograd/python_variable.cpp b/torch/csrc/autograd/python_variable.cpp index 712719304ad63..7ec4bf28e1604 100644 --- a/torch/csrc/autograd/python_variable.cpp +++ b/torch/csrc/autograd/python_variable.cpp @@ -808,14 +808,18 @@ static PyObject* THPVariable_make_dtensor( "cls must be a type (got ", Py_TYPE(cls)->tp_name, ")"); - // See note about the __torch_dispatch__ check in - // THPVariable_make_wrapper_subclass above. + +#ifndef NDEBUG + // This is specifically for making a DTensor, which we know defines + // __torch_dispatch__. Check anyway in debug builds in case somebody + // removes it. py::object attr = PyObject_FastGetAttrString(cls, "__torch_dispatch__"); TORCH_CHECK_TYPE( attr.ptr() != nullptr && attr.ptr() != torch::disabled_torch_dispatch_impl(), ((PyTypeObject*)cls)->tp_name, " must define __torch_dispatch__"); +#endif const auto& local_tensor = r.tensor(3); const auto options = TensorOptions() From 6b9b7ce6fe242377a2060a4b3a70514a02617be1 Mon Sep 17 00:00:00 2001 From: "Sun, Jiayi" Date: Wed, 10 Sep 2025 10:28:39 +0000 Subject: [PATCH 092/693] fix torch.sparse.log_softmax on CPU (#161959) Fix https://github.com/pytorch/pytorch/issues/152293. **Example:** ``` import torch from torch.sparse import log_softmax as sparse_log_softmax def test_bug(): a = torch.rand(4, 3) b = a - 10000000.0 b_sparse = b.to_sparse() cpu_out_sparse = sparse_log_softmax(b_sparse, dim=1).to_dense() print('cpu_out_sparse =', cpu_out_sparse) b_sparse_double = b.double().to_sparse() cpu_out_sparse_double = sparse_log_softmax(b_sparse_double, dim=1).to_dense() print('cpu_out_sparse_double =', cpu_out_sparse_double) if __name__ == '__main__': test_bug() ``` **Output:** - before ``` cpu_out_sparse = tensor([[-2., -1., -2.], [-1., -1., -1.], [-1., -2., -2.], [-1., -1., -2.]]) cpu_out_sparse_double = tensor([[-1.5514, -0.5514, -1.5514], [-1.0986, -1.0986, -1.0986], [-0.5514, -1.5514, -1.5514], [-0.8620, -0.8620, -1.8620]], dtype=torch.float64) ``` - after ``` cpu_out_sparse = tensor([[-0.8620, -1.8620, -0.8620], [-1.0986, -1.0986, -1.0986], [-1.8620, -0.8620, -0.8620], [-1.0986, -1.0986, -1.0986]]) cpu_out_sparse_double = tensor([[-0.8620, -1.8620, -0.8620], [-1.0986, -1.0986, -1.0986], [-1.8620, -0.8620, -0.8620], [-1.0986, -1.0986, -1.0986]], dtype=torch.float64) ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/161959 Approved by: https://github.com/Skylion007, https://github.com/malfet, https://github.com/mingfeima --- aten/src/ATen/native/sparse/SoftMax.cpp | 8 +++++--- test/test_sparse.py | 9 +++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/aten/src/ATen/native/sparse/SoftMax.cpp b/aten/src/ATen/native/sparse/SoftMax.cpp index 24fc7be1697be..a321074f60ea1 100644 --- a/aten/src/ATen/native/sparse/SoftMax.cpp +++ b/aten/src/ATen/native/sparse/SoftMax.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -295,6 +296,7 @@ void cpu_sparse_coo_softmax(Tensor output, const Tensor& input, const int64_t di to exp functions as well as reuse of softmax implementation for log_softmax. */ + using accscalar_t = at::acc_type; auto sparse_dim = input.sparse_dim(); auto indices = input._indices().contiguous(); auto values = input._values().contiguous(); @@ -340,14 +342,14 @@ void cpu_sparse_coo_softmax(Tensor output, const Tensor& input, const int64_t di continue; /* Prepare scratch space */ - std::vector mx_row(nvalues, -std::numeric_limits::infinity()); - std::vector exp_sums_row(nvalues, 0); + std::vector mx_row(nvalues, -std::numeric_limits::infinity()); + std::vector exp_sums_row(nvalues, 0); /* Compute mx */ for (int64_t i : pool_indices) { auto values_row = values_accessor[i]; for (const auto j : c10::irange(nvalues)) { - mx_row[j] = std::max(mx_row[j], values_row[j]); + mx_row[j] = std::max(mx_row[j], accscalar_t(values_row[j])); } } diff --git a/test/test_sparse.py b/test/test_sparse.py index 727c3a5f6bcdd..3568c6bc7d194 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -3694,6 +3694,15 @@ def test_log_softmax_zero_nnz(self, device, dtype): self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 1, device, dtype) self._check_zero_nnz_softmax_op(torch.sparse.log_softmax, 10, device, dtype) + @dtypes(torch.float) + @expectedFailureMPS + def test_log_softmax_float(self, device, dtype): + x = (torch.rand(4, 3, dtype=dtype, device=device) - 10000000.0).to_sparse() + out = torch.sparse.log_softmax(x, dim=1).to_dense() + x_double = x.double() + out_double = torch.sparse.log_softmax(x_double, dim=1).to_dense() + self.assertEqual(out, out_double.to(dtype=dtype)) + # TODO: Check after why ROCm's cusparseXcsrgemm2Nnz function doesn't return the same nnz value as CUDA @coalescedonoff @dtypes(*floating_and_complex_types()) From 22df9332da8ab151f5db0b987e85ac5b4d594189 Mon Sep 17 00:00:00 2001 From: Lucy Qiu Date: Thu, 11 Sep 2025 07:59:08 +0000 Subject: [PATCH 093/693] [serialization] Add pte file to archive (#162520) Summary: Add _package_executorch_files to archive apis. Allow us to package a PTE file into the archive. I don't think there's a use-case to have more than one PTE file at the moment, but left it as `EXECUTORCH_FILES` just in case. Test Plan: Tested in D81992612 Rollback Plan: Differential Revision: D81977483 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162520 Approved by: https://github.com/angelayi --- torch/_C/_export/pt2_archive_constants.pyi | 1 + torch/csrc/export/pt2_archive_constants.h | 2 ++ torch/export/pt2_archive/_package.py | 16 ++++++++++++++++ torch/export/pt2_archive/constants.py | 1 + 4 files changed, 20 insertions(+) diff --git a/torch/_C/_export/pt2_archive_constants.pyi b/torch/_C/_export/pt2_archive_constants.pyi index ce225f0f1880b..f7a92ddd0c961 100644 --- a/torch/_C/_export/pt2_archive_constants.pyi +++ b/torch/_C/_export/pt2_archive_constants.pyi @@ -18,6 +18,7 @@ TENSOR_CONSTANT_FILENAME_PREFIX: str = ... CUSTOM_OBJ_FILENAME_PREFIX: str = ... SAMPLE_INPUTS_DIR: str = ... SAMPLE_INPUTS_FILENAME_FORMAT: str = ... +EXECUTORCH_DIR: str = ... EXTRA_DIR: str = ... MODULE_INFO_PATH: str = ... XL_MODEL_WEIGHTS_DIR: str = ... diff --git a/torch/csrc/export/pt2_archive_constants.h b/torch/csrc/export/pt2_archive_constants.h index 1583f759acb65..8e4e2653265e3 100644 --- a/torch/csrc/export/pt2_archive_constants.h +++ b/torch/csrc/export/pt2_archive_constants.h @@ -47,6 +47,8 @@ namespace torch::_export::archive_spec { DO(SAMPLE_INPUTS_DIR, "data/sample_inputs/") \ DO(SAMPLE_INPUTS_FILENAME_FORMAT, \ "data/sample_inputs/{}.pt") /* {model_name} */ \ + /* ExecuTorch artifacts, including PTE files */ \ + DO(EXECUTORCH_DIR, "data/executorch/") \ /* extra folder */ \ DO(EXTRA_DIR, "extra/") \ DO(MODULE_INFO_PATH, "extra/module_info.json") \ diff --git a/torch/export/pt2_archive/_package.py b/torch/export/pt2_archive/_package.py index db147e2fb8094..eab67a092e1cd 100644 --- a/torch/export/pt2_archive/_package.py +++ b/torch/export/pt2_archive/_package.py @@ -43,6 +43,7 @@ CONSTANTS_CONFIG_FILENAME_FORMAT, CONSTANTS_DIR, CUSTOM_OBJ_FILENAME_PREFIX, + EXECUTORCH_DIR, EXTRA_DIR, MODELS_DIR, MODELS_FILENAME_FORMAT, @@ -529,6 +530,16 @@ def _package_extra_files( archive_writer.write_string(f"{EXTRA_DIR}{extra_file_name}", content) +def _package_executorch_files( + archive_writer: PT2ArchiveWriter, executorch_files: Optional[dict[str, bytes]] +) -> None: + if executorch_files is None: + return + + for file_name, content in executorch_files.items(): + archive_writer.write_bytes(f"{EXECUTORCH_DIR}{file_name}", content) + + def package_pt2( f: FileLike, *, @@ -539,6 +550,7 @@ def package_pt2( extra_files: Optional[dict[str, Any]] = None, opset_version: Optional[dict[str, int]] = None, pickle_protocol: int = DEFAULT_PICKLE_PROTOCOL, + executorch_files: Optional[dict[str, bytes]] = None, ) -> FileLike: r""" Saves the artifacts to a PT2Archive format. The artifact can then be loaded @@ -569,6 +581,9 @@ def package_pt2( pickle_protocol: can be specified to override the default protocol + executorch_files (Optional[dict[str, bytes]]): Optional executorch + artifacts to save. + """ assert not ( exported_programs is None and aoti_files is None and extra_files is None @@ -602,6 +617,7 @@ def package_pt2( pickle_protocol=pickle_protocol, ) _package_extra_files(archive_writer, extra_files) + _package_executorch_files(archive_writer, executorch_files) if isinstance(f, (io.IOBase, IO)): f.seek(0) diff --git a/torch/export/pt2_archive/constants.py b/torch/export/pt2_archive/constants.py index 772c3c0708412..4b05e257b8f3d 100644 --- a/torch/export/pt2_archive/constants.py +++ b/torch/export/pt2_archive/constants.py @@ -13,6 +13,7 @@ pt2_archive_constants.CONSTANTS_CONFIG_FILENAME_FORMAT ) CUSTOM_OBJ_FILENAME_PREFIX: str = pt2_archive_constants.CUSTOM_OBJ_FILENAME_PREFIX +EXECUTORCH_DIR: str = pt2_archive_constants.EXECUTORCH_DIR EXTRA_DIR: str = pt2_archive_constants.EXTRA_DIR MODELS_DIR: str = pt2_archive_constants.MODELS_DIR MODELS_FILENAME_FORMAT: str = pt2_archive_constants.MODELS_FILENAME_FORMAT From afdd4247a2251b3f4c2f4b402cb625f46d6784ba Mon Sep 17 00:00:00 2001 From: Naveen Suda Date: Thu, 11 Sep 2025 07:59:19 +0000 Subject: [PATCH 094/693] [torchao][pt2e] Make prepare and convert faster by caching (#162550) Summary: D79674759 tried to fix the expensive prepare and convert steps, as `assert_and_get_unique_device` was called multiple times. This change fixes that issue by using `functools.cache` decorator. Test Plan: Verified on llm export to QNN. LLM Quantization prepare time of ~20min reduced to ~3min. Rollback Plan: Differential Revision: D82073679 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162550 Approved by: https://github.com/andrewor14 --- torch/ao/quantization/fx/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch/ao/quantization/fx/utils.py b/torch/ao/quantization/fx/utils.py index f8445da5fea19..6e68bfd4648e3 100644 --- a/torch/ao/quantization/fx/utils.py +++ b/torch/ao/quantization/fx/utils.py @@ -1,5 +1,6 @@ # mypy: allow-untyped-defs import copy +import functools import operator import warnings from collections import namedtuple @@ -245,6 +246,7 @@ def load_arg(a): # TODO: delete +@functools.cache def assert_and_get_unique_device(module: torch.nn.Module) -> Any: """ Returns the unique device for a module, or None if no device is found. From 7c39b2ecbe635dd10608ce4c2921b5c7d922e80e Mon Sep 17 00:00:00 2001 From: "Sun,Jiabin" <41134681+sunjiabin17@users.noreply.github.com> Date: Thu, 11 Sep 2025 10:04:23 +0000 Subject: [PATCH 095/693] use torch.accelerator and device_module instead of cuda to make DataParallel more device agnostic. (#162573) use torch.accelerator and `_get_device_module` instead of cuda to make DataParallel more device agnostic. Fixes #162152 recently, I've done some works to support my own privateuse1 backend in DataParallel module, but I found some cuda related APIs exist in parallel_apply.py file, that makes me have to monkey patch DataParallel module to support DP on my own backend. so I make some small changes to replace cuda.xxx to accelerator.xxx, and acquire device module by `_get_device_module`. this is my first time to contribute to pytorch, please let me know if there is any problem about the change. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162573 Approved by: https://github.com/ezyang, https://github.com/guangyey Co-authored-by: Yu, Guangye <106960996+guangyey@users.noreply.github.com> Co-authored-by: Edward Z. Yang --- torch/nn/parallel/parallel_apply.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/torch/nn/parallel/parallel_apply.py b/torch/nn/parallel/parallel_apply.py index 4d66a7a71d895..aa8db823e1185 100644 --- a/torch/nn/parallel/parallel_apply.py +++ b/torch/nn/parallel/parallel_apply.py @@ -58,7 +58,9 @@ def parallel_apply( else: devices = [None] * len(modules) devices = [_get_device_index(x, True) for x in devices] - streams = [torch.cuda.current_stream(x) for x in devices] + streams = [torch.accelerator.current_stream(x) for x in devices] + assert torch.accelerator.is_available(), "No available accelerator found." + device_type = torch.accelerator.current_accelerator().type # type: ignore[union-attr] lock = threading.Lock() results = {} grad_enabled, autocast_enabled = ( @@ -72,7 +74,7 @@ def _worker( input: Any, kwargs: dict[str, Any], device: Optional[Union[int, torch.device]] = None, - stream: Optional[torch.cuda.Stream] = None, + stream: Optional[torch.Stream] = None, ) -> None: torch.set_grad_enabled(grad_enabled) if device is None: @@ -85,13 +87,15 @@ def _worker( ) return device = t.get_device() + if isinstance(device, torch.device): + device = device.index if stream is None: - stream = torch.cuda.current_stream(device) + stream = torch.accelerator.current_stream(device) try: with ( - torch.cuda.device(device), - torch.cuda.stream(stream), - torch.amp.autocast("cuda", enabled=autocast_enabled), + torch.accelerator.device_index(device), + stream, + torch.amp.autocast(device_type, enabled=autocast_enabled), ): # this also avoids accidental slicing of `input` if it is a Tensor if not isinstance(input, (list, tuple)): From 1e710552c1d6f7aceaf778e27e0e8eb04cc73892 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Thu, 11 Sep 2025 14:10:51 +0000 Subject: [PATCH 096/693] [ROCm][CI] benchmark must patch fbgemm_gpu with tbb dep (#162649) fbgemm adds tbb as a dep only for rocm to avoid missing tbb symbols at import. But the way it was done was in setup.py to add the linker flag to CMAKE_CXX_FLAGS and it wasn't working for reasons unknown to me. But what did work was to add tbb as a dep in the cmake file. [We have a PR against upstream fbgemm](https://github.com/pytorch/FBGEMM/pull/4859) for that. Meanwhile, a much smaller patch is applied here in this PR until the fbgemm rocm ci commit hash is moved forward to include the tbb patch from upstream. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162649 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily --- .ci/pytorch/common_utils.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh index 6d79a4517edf6..ff9d8ad41cc92 100644 --- a/.ci/pytorch/common_utils.sh +++ b/.ci/pytorch/common_utils.sh @@ -258,11 +258,19 @@ function install_torchrec_and_fbgemm() { git clone --recursive https://github.com/pytorch/fbgemm pushd fbgemm/fbgemm_gpu git checkout "${fbgemm_commit}" --recurse-submodules - python setup.py bdist_wheel \ - --build-variant=rocm \ - -DHIP_ROOT_DIR="${ROCM_PATH}" \ - -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \ - -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA" + # until the fbgemm_commit includes the tbb patch + patch <<'EOF' +--- a/FbgemmGpu.cmake ++++ b/FbgemmGpu.cmake +@@ -184,5 +184,6 @@ gpu_cpp_library( + fbgemm_gpu_tbe_cache + fbgemm_gpu_tbe_optimizers + fbgemm_gpu_tbe_utils ++ tbb + DESTINATION + fbgemm_gpu) +EOF + python setup.py bdist_wheel --build-variant=rocm popd # Save the wheel before cleaning up From c3f30eca9e65f0dda2dbe05680281d1185cc3b7a Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Thu, 11 Sep 2025 14:36:07 +0000 Subject: [PATCH 097/693] Remove tests-to-include from rocm-mi300 workflow (#162721) Accidentally introduced by https://github.com/pytorch/pytorch/pull/162288 (was meant to be a temporary change) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162721 Approved by: https://github.com/jeffdaily --- .github/workflows/rocm-mi300.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/rocm-mi300.yml b/.github/workflows/rocm-mi300.yml index 8ffd58cb9811c..7e3ba43bf9845 100644 --- a/.github/workflows/rocm-mi300.yml +++ b/.github/workflows/rocm-mi300.yml @@ -70,5 +70,4 @@ jobs: build-environment: linux-noble-rocm-py3.12-mi300 docker-image: ${{ needs.linux-noble-rocm-py3_12-build.outputs.docker-image }} test-matrix: ${{ needs.linux-noble-rocm-py3_12-build.outputs.test-matrix }} - tests-to-include: "inductor/test_ck_backend" secrets: inherit From c924c675d068de6e9a5ef5ad6e0cced1dd50e297 Mon Sep 17 00:00:00 2001 From: Tugsbayasgalan Manlaibaatar Date: Wed, 10 Sep 2025 20:21:06 -0700 Subject: [PATCH 098/693] Fix persistent buffer bug (#162190) For non-persistent buffers, we should properly register them. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162190 Approved by: https://github.com/zhxchen17 --- test/export/test_export.py | 2 -- torch/_dynamo/functional_export.py | 2 +- torch/fx/graph_module.py | 6 ------ 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/test/export/test_export.py b/test/export/test_export.py index c3bf82c8fe906..7efab79cb08f4 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -12073,8 +12073,6 @@ def test(ep, swap=None): test(export(M(), inp)) - # Preserving signature hook is messing with dynamo tracing - @testing.expectedFailureStrictV2 def test_unflatten_multiple_graphs_state(self): class N(torch.nn.Module): def __init__(self): diff --git a/torch/_dynamo/functional_export.py b/torch/_dynamo/functional_export.py index 037577998ac22..ba89627453849 100644 --- a/torch/_dynamo/functional_export.py +++ b/torch/_dynamo/functional_export.py @@ -61,7 +61,7 @@ def clean_name(name) -> str: # Move the parameter to the new name if hasattr(graph_module, old_target): param = torch.fx.graph_module._get_attr(graph_module, old_target) - torch.fx.graph_module._set_attr(graph_module, new_target, param) + torch.fx.graph_module._assign_attr(param, graph_module, new_target) torch.fx.graph_module._del_attr(graph_module, old_target) diff --git a/torch/fx/graph_module.py b/torch/fx/graph_module.py index e8f68d78dfcd0..4c067c0e76e4c 100644 --- a/torch/fx/graph_module.py +++ b/torch/fx/graph_module.py @@ -302,12 +302,6 @@ def _has_attr(model: torch.nn.Module, attr_name: str): return hasattr(t, field) -def _set_attr(model: torch.nn.Module, attr_name: str, value): - attr_names = attr_name.split(".") - t = _get_attr_via_attr_list(model, attr_names[:-1]) - setattr(t, attr_names[-1], value) - - def _print_readable( module, module_name, From a3a40cb741f5b2d72cd86f55d385cae44dca6c6b Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Wed, 10 Sep 2025 22:22:56 -0700 Subject: [PATCH 099/693] [dynamo][guards] Do not consturct framelocals to dict on GlobalsGuardAccessor (#162694) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162694 Approved by: https://github.com/williamwen42 --- torch/csrc/dynamo/guards.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/torch/csrc/dynamo/guards.cpp b/torch/csrc/dynamo/guards.cpp index 44e95d6910028..ea72631d8ad40 100644 --- a/torch/csrc/dynamo/guards.cpp +++ b/torch/csrc/dynamo/guards.cpp @@ -5674,6 +5674,11 @@ class GlobalsGuardAccessor : public GuardAccessor { return "GlobalsGuardAccessor"; } + bool check_nopybind(FrameLocalsMapping* map, bool matches_dict_tag) override { + // Ensure that we don't construct the framelocals to dict here. + return _guard_manager->check_nopybind(_globals_dict); + } + public: // cloning functions GlobalsGuardAccessor(GuardManager* guard_manager, GlobalsGuardAccessor* from) : GuardAccessor(guard_manager, from) { From a8432bcaadd6dea52a94429dced1fb4550f2f560 Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Wed, 10 Sep 2025 22:23:00 -0700 Subject: [PATCH 100/693] [dynamo][guards] Fail on an unknown framelocals to dict conversion (#162695) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162695 Approved by: https://github.com/williamwen42 ghstack dependencies: #162694 --- torch/csrc/dynamo/guards.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/torch/csrc/dynamo/guards.cpp b/torch/csrc/dynamo/guards.cpp index ea72631d8ad40..6682e91cd08b3 100644 --- a/torch/csrc/dynamo/guards.cpp +++ b/torch/csrc/dynamo/guards.cpp @@ -2648,9 +2648,7 @@ class GuardAccessor { // subtree on immutable dict getitems. virtual bool check_nopybind(PyObject* obj, bool matches_dict_tag = false) = 0; virtual bool check_nopybind(FrameLocalsMapping* map, bool matches_dict_tag) { - // throw std::runtime_error("fallback to python"); - // Could fallback to running check on the Python dict (lazily constructed) - return check_nopybind((PyObject*)map->to_dict(), matches_dict_tag); + throw std::runtime_error("fallback to python"); } virtual GuardDebugInfo check_verbose_nopybind(PyObject* obj) = 0; virtual std::string repr() const = 0; From 9f783e172d7fadc8ddfbcc6fd4b1adabb74934e0 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Thu, 11 Sep 2025 15:25:24 +0000 Subject: [PATCH 101/693] Revert "Build and Install Arm Compute Library in manylinux docker image (#159737)" This reverts commit 582d278983b28a91ac0cedd035183f2495bb6887. Reverted https://github.com/pytorch/pytorch/pull/159737 on behalf of https://github.com/atalman due to Sorry reverting this broke linux aarch64 CUDA nightlies [pytorch/pytorch/actions/runs/17637486681/job/50146967503](https://github.com/pytorch/pytorch/actions/runs/17637486681/job/50146967503) ([comment](https://github.com/pytorch/pytorch/pull/159737#issuecomment-3281398272)) --- .ci/aarch64_linux/aarch64_wheel_ci_build.py | 55 ++++++++++++++++++-- .ci/aarch64_linux/build_aarch64_wheel.py | 48 +++++++++++++---- .ci/docker/common/install_acl.sh | 27 +++------- .ci/docker/common/install_openblas.sh | 12 ++--- .ci/docker/manywheel/Dockerfile_2_28_aarch64 | 10 +--- .ci/docker/manywheel/build.sh | 5 +- .ci/pytorch/build.sh | 2 +- 7 files changed, 107 insertions(+), 52 deletions(-) mode change 100755 => 100644 .ci/docker/common/install_acl.sh mode change 100755 => 100644 .ci/docker/common/install_openblas.sh diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py index b7a895fe05869..d4afea81ac0b4 100755 --- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py +++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py @@ -13,6 +13,49 @@ def list_dir(path: str) -> list[str]: return check_output(["ls", "-1", path]).decode().split("\n") +def build_ArmComputeLibrary() -> None: + """ + Using ArmComputeLibrary for aarch64 PyTorch + """ + print("Building Arm Compute Library") + acl_build_flags = [ + "debug=0", + "neon=1", + "opencl=0", + "os=linux", + "openmp=1", + "cppthreads=0", + "arch=armv8a", + "multi_isa=1", + "fixed_format_kernels=1", + "build=native", + ] + acl_install_dir = "/acl" + acl_checkout_dir = os.getenv("ACL_SOURCE_DIR", "ComputeLibrary") + if os.path.isdir(acl_install_dir): + shutil.rmtree(acl_install_dir) + if not os.path.isdir(acl_checkout_dir) or not len(os.listdir(acl_checkout_dir)): + check_call( + [ + "git", + "clone", + "https://github.com/ARM-software/ComputeLibrary.git", + "-b", + "v25.02", + "--depth", + "1", + "--shallow-submodules", + ] + ) + + check_call( + ["scons", "Werror=1", f"-j{os.cpu_count()}"] + acl_build_flags, + cwd=acl_checkout_dir, + ) + for d in ["arm_compute", "include", "utils", "support", "src", "build"]: + shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") + + def replace_tag(filename) -> None: with open(filename) as f: lines = f.readlines() @@ -313,13 +356,19 @@ def parse_arguments(): build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " if enable_mkldnn: + build_ArmComputeLibrary() print("build pytorch with mkldnn+acl backend") - build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " - build_vars += "ACL_ROOT_DIR=/acl " + build_vars += ( + "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " + "ACL_ROOT_DIR=/acl " + "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " + "ACL_INCLUDE_DIR=/acl/build " + "ACL_LIBRARY=/acl/build " + ) if enable_cuda: build_vars += "BLAS=NVPL " else: - build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/opt/OpenBLAS " + build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS " else: print("build pytorch without mkldnn backend") diff --git a/.ci/aarch64_linux/build_aarch64_wheel.py b/.ci/aarch64_linux/build_aarch64_wheel.py index dca0427a45d77..7a4715d330060 100755 --- a/.ci/aarch64_linux/build_aarch64_wheel.py +++ b/.ci/aarch64_linux/build_aarch64_wheel.py @@ -299,6 +299,40 @@ def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None: ) +def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None: + print("Building OpenBLAS") + host.run_cmd( + f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.28 {git_clone_flags}" + ) + make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8" + host.run_cmd( + f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS" + ) + + +def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None: + print("Building Arm Compute Library") + acl_build_flags = " ".join( + [ + "debug=0", + "neon=1", + "opencl=0", + "os=linux", + "openmp=1", + "cppthreads=0", + "arch=armv8a", + "multi_isa=1", + "fixed_format_kernels=1", + "build=native", + ] + ) + host.run_cmd( + f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}" + ) + + host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}") + + def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None: host.run_cmd("pip3 install auditwheel") host.run_cmd( @@ -666,6 +700,7 @@ def start_build( configure_system( host, compiler=compiler, use_conda=use_conda, python_version=python_version ) + build_OpenBLAS(host, git_clone_flags) if host.using_docker(): print("Move libgfortant.a into a standard location") @@ -688,8 +723,6 @@ def start_build( f"git clone --recurse-submodules -b {branch} https://github.com/pytorch/pytorch {git_clone_flags}" ) - host.run_cmd("pytorch/.ci/docker/common/install_openblas.sh") - print("Building PyTorch wheel") build_opts = "" if pytorch_build_number is not None: @@ -710,18 +743,15 @@ def start_build( if host.using_docker(): build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" if enable_mkldnn: - host.run_cmd("pytorch/.ci/docker/common/install_acl.sh") + build_ArmComputeLibrary(host, git_clone_flags) print("build pytorch with mkldnn+acl backend") build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON" - build_vars += " BLAS=OpenBLAS" - build_vars += " OpenBLAS_HOME=/opt/OpenBLAS" - build_vars += " ACL_ROOT_DIR=/acl" host.run_cmd( - f"cd $HOME/pytorch && {build_vars} python3 setup.py bdist_wheel{build_opts}" + f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}" ) print("Repair the wheel") pytorch_wheel_name = host.list_dir("pytorch/dist")[0] - ld_library_path = "/acl/build:$HOME/pytorch/build/lib" + ld_library_path = "$HOME/acl/build:$HOME/pytorch/build/lib" host.run_cmd( f"export LD_LIBRARY_PATH={ld_library_path} && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}" ) @@ -877,7 +907,7 @@ def terminate_instances(instance_type: str) -> None: def parse_arguments(): from argparse import ArgumentParser - parser = ArgumentParser("Build and test AARCH64 wheels using EC2") + parser = ArgumentParser("Builid and test AARCH64 wheels using EC2") parser.add_argument("--key-name", type=str) parser.add_argument("--debug", action="store_true") parser.add_argument("--build-only", action="store_true") diff --git a/.ci/docker/common/install_acl.sh b/.ci/docker/common/install_acl.sh old mode 100755 new mode 100644 index 0b865e5bc6f8d..bf41a03b28063 --- a/.ci/docker/common/install_acl.sh +++ b/.ci/docker/common/install_acl.sh @@ -1,27 +1,16 @@ -#!/bin/bash -# Script used only in CD pipeline +set -euo pipefail -set -eux - -ACL_VERSION=${ACL_VERSION:-"v25.02"} -ACL_INSTALL_DIR="/acl" +readonly version=v25.02 +readonly src_host=https://github.com/ARM-software +readonly src_repo=ComputeLibrary # Clone ACL -git clone https://github.com/ARM-software/ComputeLibrary.git -b "${ACL_VERSION}" --depth 1 --shallow-submodules +[[ ! -d ${src_repo} ]] && git clone ${src_host}/${src_repo}.git +cd ${src_repo} + +git checkout $version -ACL_CHECKOUT_DIR="ComputeLibrary" # Build with scons -pushd $ACL_CHECKOUT_DIR scons -j8 Werror=0 debug=0 neon=1 opencl=0 embed_kernels=0 \ os=linux arch=armv8a build=native multi_isa=1 \ fixed_format_kernels=1 openmp=1 cppthreads=0 -popd - -# Install ACL -sudo mkdir -p ${ACL_INSTALL_DIR} -for d in arm_compute include utils support src build -do - sudo cp -r ${ACL_CHECKOUT_DIR}/${d} ${ACL_INSTALL_DIR}/${d} -done - -rm -rf $ACL_CHECKOUT_DIR \ No newline at end of file diff --git a/.ci/docker/common/install_openblas.sh b/.ci/docker/common/install_openblas.sh old mode 100755 new mode 100644 index 2f386c6bd523a..3c795acf2220b --- a/.ci/docker/common/install_openblas.sh +++ b/.ci/docker/common/install_openblas.sh @@ -3,10 +3,8 @@ set -ex -OPENBLAS_VERSION=${OPENBLAS_VERSION:-"v0.3.30"} - -# Clone OpenBLAS -git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION}" --depth 1 --shallow-submodules +cd / +git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION:-v0.3.30}" --depth 1 --shallow-submodules OPENBLAS_CHECKOUT_DIR="OpenBLAS" OPENBLAS_BUILD_FLAGS=" @@ -19,7 +17,5 @@ CFLAGS=-O3 BUILD_BFLOAT16=1 " -make -j8 ${OPENBLAS_BUILD_FLAGS} -C $OPENBLAS_CHECKOUT_DIR -sudo make install -C $OPENBLAS_CHECKOUT_DIR - -rm -rf $OPENBLAS_CHECKOUT_DIR \ No newline at end of file +make -j8 ${OPENBLAS_BUILD_FLAGS} -C ${OPENBLAS_CHECKOUT_DIR} +make -j8 ${OPENBLAS_BUILD_FLAGS} install -C ${OPENBLAS_CHECKOUT_DIR} diff --git a/.ci/docker/manywheel/Dockerfile_2_28_aarch64 b/.ci/docker/manywheel/Dockerfile_2_28_aarch64 index 5ff4d98e51aa6..da7ab4d3fd154 100644 --- a/.ci/docker/manywheel/Dockerfile_2_28_aarch64 +++ b/.ci/docker/manywheel/Dockerfile_2_28_aarch64 @@ -62,13 +62,6 @@ ARG OPENBLAS_VERSION ADD ./common/install_openblas.sh install_openblas.sh RUN bash ./install_openblas.sh && rm install_openblas.sh -# Install Arm Compute Library -FROM base as arm_compute -# use python3.9 to install scons -RUN python3.9 -m pip install scons==4.7.0 -RUN ln -sf /opt/python/cp39-cp39/bin/scons /usr/local/bin -COPY ./common/install_acl.sh install_acl.sh -RUN bash ./install_acl.sh && rm install_acl.sh FROM base as final # remove unnecessary python versions @@ -77,5 +70,4 @@ RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/ -COPY --from=arm_compute /acl /acl -ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:/acl/build/:$LD_LIBRARY_PATH \ No newline at end of file +ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH diff --git a/.ci/docker/manywheel/build.sh b/.ci/docker/manywheel/build.sh index ea4be0128eb00..5dee4325857fb 100755 --- a/.ci/docker/manywheel/build.sh +++ b/.ci/docker/manywheel/build.sh @@ -28,7 +28,6 @@ fi MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-} DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-} OPENBLAS_VERSION=${OPENBLAS_VERSION:-} -ACL_VERSION=${ACL_VERSION:-} case ${image} in manylinux2_28-builder:cpu) @@ -42,6 +41,7 @@ case ${image} in GPU_IMAGE=arm64v8/almalinux:8 DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13 --build-arg NINJA_VERSION=1.12.1" MANY_LINUX_VERSION="2_28_aarch64" + OPENBLAS_VERSION="v0.3.30" ;; manylinuxcxx11-abi-builder:cpu-cxx11-abi) TARGET=final @@ -121,8 +121,7 @@ tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]') DOCKER_BUILDKIT=1 docker build \ ${DOCKER_GPU_BUILD_ARG} \ --build-arg "GPU_IMAGE=${GPU_IMAGE}" \ - --build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION:-}" \ - --build-arg "ACL_VERSION=${ACL_VERSION:-}" \ + --build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION}" \ --target "${TARGET}" \ -t "${tmp_tag}" \ $@ \ diff --git a/.ci/pytorch/build.sh b/.ci/pytorch/build.sh index b3601b17b7af0..1c88554c2af96 100755 --- a/.ci/pytorch/build.sh +++ b/.ci/pytorch/build.sh @@ -89,7 +89,7 @@ fi if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then export USE_MKLDNN=1 export USE_MKLDNN_ACL=1 - export ACL_ROOT_DIR=/acl + export ACL_ROOT_DIR=/ComputeLibrary fi if [[ "$BUILD_ENVIRONMENT" == *riscv64* ]]; then From 94db2ad51d44f203cdb9df124712b3f0a2837f43 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Thu, 11 Sep 2025 15:29:29 +0000 Subject: [PATCH 102/693] Revert "Move prioritized text linker optimization code from setup.py to cmake (#160078)" This reverts commit 26b3ae58908becbb03b28636f7384d2972a8c9a5. Reverted https://github.com/pytorch/pytorch/pull/160078 on behalf of https://github.com/atalman due to Sorry reverting this broke linux aarch64 CUDA nightlies [pytorch/pytorch/actions/runs/17637486681/job/50146967503](https://github.com/pytorch/pytorch/actions/runs/17637486681/job/50146967503) ([comment](https://github.com/pytorch/pytorch/pull/160078#issuecomment-3281426631)) --- .ci/aarch64_linux/aarch64_ci_build.sh | 6 +- .ci/aarch64_linux/aarch64_wheel_ci_build.py | 2 +- .gitignore | 3 - CMakeLists.txt | 66 ------------------- cmake/Summary.cmake | 1 - cmake/public/utils.cmake | 20 ------ setup.py | 24 +++++++ tools/setup_helpers/generate_linker_script.py | 23 ------- 8 files changed, 29 insertions(+), 116 deletions(-) diff --git a/.ci/aarch64_linux/aarch64_ci_build.sh b/.ci/aarch64_linux/aarch64_ci_build.sh index a0eb0b72df2b3..178db42a609ab 100644 --- a/.ci/aarch64_linux/aarch64_ci_build.sh +++ b/.ci/aarch64_linux/aarch64_ci_build.sh @@ -31,7 +31,8 @@ pip install -r /pytorch/requirements.txt pip install auditwheel==6.2.0 wheel if [ "$DESIRED_CUDA" = "cpu" ]; then echo "BASE_CUDA_VERSION is not set. Building cpu wheel." - python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn + #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files + USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn else echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA" export USE_SYSTEM_NCCL=1 @@ -45,5 +46,6 @@ else export USE_NVIDIA_PYPI_LIBS=1 fi - python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda + #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files + USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda fi diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py index d4afea81ac0b4..1b6429fa8c06e 100755 --- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py +++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py @@ -317,7 +317,7 @@ def parse_arguments(): ).decode() print("Building PyTorch wheel") - build_vars = "" + build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " # MAX_JOB=5 is not required for CPU backend (see commit 465d98b) if enable_cuda: build_vars += "MAX_JOBS=5 " diff --git a/.gitignore b/.gitignore index 774ce14f17d63..d1fa4cd3caf28 100644 --- a/.gitignore +++ b/.gitignore @@ -259,9 +259,6 @@ gen .pytest_cache aten/build/* -# Linker scripts for prioritized text optimization -cmake/linker_script.ld - # Bram plsdontbreak diff --git a/CMakeLists.txt b/CMakeLists.txt index d367b078604e7..5a43e0da8f2ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -379,13 +379,6 @@ cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler" OFF "USE_CUDA" OFF) cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON "CPU_AARCH64" OFF) -# prioritized text linker, ON by default for AArch64+Linux, option visible to all AArch64, x86 and ppc64le. -set(USE_PRIORITIZED_TEXT_DEFAULT OFF) -if(LINUX AND CPU_AARCH64) - set(USE_PRIORITIZED_TEXT_DEFAULT ON) -endif() -cmake_dependent_option(USE_PRIORITIZED_TEXT_FOR_LD "Use prioritized text linker for ld." - "${USE_PRIORITIZED_TEXT_DEFAULT}" "CPU_INTEL OR CPU_AARCH64 OR CPU_POWER" OFF) option(USE_MIMALLOC "Use mimalloc" OFF) # Enable third party mimalloc library to improve memory allocation performance @@ -664,11 +657,6 @@ endif(MSVC) string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all") -# Set linker max-page-size to 64KiB on AArch64 Linux -if(LINUX AND CPU_AARCH64) - add_link_options_if_supported("-z,max-page-size=0x10000") -endif() - # Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not # applicable to mobile are disabled by this variable. Setting # `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it @@ -1433,57 +1421,3 @@ if(BUILD_BUNDLE_PTXAS AND USE_CUDA) install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas" DESTINATION "${CMAKE_INSTALL_BINDIR}") endif() - -if(USE_PRIORITIZED_TEXT_FOR_LD) - add_compile_options( - $<$:-ffunction-sections> - $<$:-fdata-sections> - ) - set(LINKER_SCRIPT_FILE_OUT "${CMAKE_SOURCE_DIR}/cmake/linker_script.ld") - set(LINKER_SCRIPT_FILE_IN "${CMAKE_SOURCE_DIR}/cmake/prioritized_text.txt") - - add_custom_command( - OUTPUT "${LINKER_SCRIPT_FILE_OUT}" - COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py --filein "${LINKER_SCRIPT_FILE_IN}" --fout "${LINKER_SCRIPT_FILE_OUT}" - DEPENDS ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py "${LINKER_SCRIPT_FILE_IN}" - COMMENT "Generating prioritized text linker files" - VERBATIM - ) - - add_custom_target(generate_linker_script DEPENDS "${LINKER_SCRIPT_FILE_OUT}") - - if(BUILD_PYTHON) - set(LINKER_OPT_TARGETS torch_python) - endif() - - if(NOT BUILD_LIBTORCHLESS) - list(APPEND LINKER_OPT_TARGETS torch_cpu c10) - if(USE_CUDA) - list(APPEND LINKER_OPT_TARGETS torch_cuda c10_cuda) - endif() - if(USE_XPU) - list(APPEND LINKER_OPT_TARGETS torch_xpu c10_xpu) - endif() - if(USE_ROCM) - list(APPEND LINKER_OPT_TARGETS torch_hip c10_hip) - endif() - endif() - - foreach(tgt IN LISTS LINKER_OPT_TARGETS) - if(TARGET ${tgt}) - add_dependencies("${tgt}" generate_linker_script) - target_link_options_if_supported(${tgt} "-T,${LINKER_SCRIPT_FILE_OUT}") - set_property(TARGET ${tgt} APPEND PROPERTY LINK_DEPENDS "${LINKER_SCRIPT_FILE_OUT}") - else() - message(WARNING "Requested target '${tgt}' for linker script optimization was not found.") - endif() - endforeach() - -else() - if(LINUX AND CPU_AARCH64) - message(WARNING [[ - It is strongly recommend to enable linker script optimization for all AArch64 Linux builds. - To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1 - ]]) - endif() -endif() \ No newline at end of file diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 2e2fd370a994f..ffd4b5298a890 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -158,7 +158,6 @@ function(caffe2_print_configuration_summary) if(${USE_KLEIDIAI}) message(STATUS " USE_KLEIDIAI : ${USE_KLEIDIAI}") endif() - message(STATUS " USE_PRIORITIZED_TEXT_FOR_LD : ${USE_PRIORITIZED_TEXT_FOR_LD}") message(STATUS " USE_UCC : ${USE_UCC}") if(${USE_UCC}) message(STATUS " USE_SYSTEM_UCC : ${USE_SYSTEM_UCC}") diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake index c96ffebf858e3..68e66bb3fc386 100644 --- a/cmake/public/utils.cmake +++ b/cmake/public/utils.cmake @@ -482,7 +482,6 @@ function(torch_update_find_cuda_flags) endfunction() include(CheckCXXCompilerFlag) -include(CheckLinkerFlag) ############################################################################## # CHeck if given flag is supported and append it to provided outputvar @@ -512,22 +511,3 @@ function(target_compile_options_if_supported target flag) target_compile_options(${target} PRIVATE ${flag}) endif() endfunction() - -# Check if a global link option is supported -function(add_link_options_if_supported flag) - check_linker_flag(C "LINKER:${flag}" _supported) - if("${_supported}") - add_link_options("LINKER:${flag}") - else() - message(WARNING "Attempted to use unsupported link option : ${flag}.") - endif() -endfunction() - -function(target_link_options_if_supported tgt flag) - check_linker_flag(C "LINKER:${flag}" _supported) - if("${_supported}") - target_link_options("${tgt}" PRIVATE "LINKER:${flag}") - else() - message(WARNING "Attempted to use unsupported link option : ${flag}.") - endif() -endfunction() \ No newline at end of file diff --git a/setup.py b/setup.py index 2bb63a93cec83..c0523a1b5c601 100644 --- a/setup.py +++ b/setup.py @@ -227,6 +227,9 @@ # Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free. # By default, It is only enabled on Windows. # +# USE_PRIORITIZED_TEXT_FOR_LD +# Uses prioritized text form cmake/prioritized_text.txt for LD +# # BUILD_LIBTORCH_WHL # Builds libtorch.so and its dependencies as a wheel # @@ -320,6 +323,7 @@ IS_LINUX, IS_WINDOWS, ) +from tools.setup_helpers.generate_linker_script import gen_linker_script def str2bool(value: str | None) -> bool: @@ -1623,6 +1627,26 @@ def main() -> None: if BUILD_PYTHON_ONLY: install_requires += [f"{LIBTORCH_PKG_NAME}=={TORCH_VERSION}"] + if str2bool(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD")): + gen_linker_script( + filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld" + ) + linker_script_path = os.path.abspath("cmake/linker_script.ld") + os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}" + os.environ["CFLAGS"] = ( + os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections" + ) + os.environ["CXXFLAGS"] = ( + os.getenv("CXXFLAGS", "") + " -ffunction-sections -fdata-sections" + ) + elif platform.system() == "Linux" and platform.processor() == "aarch64": + print_box( + """ + WARNING: we strongly recommend enabling linker script optimization for ARM + CUDA. + To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1 + """ + ) + # Parse the command line and check the arguments before we proceed with # building deps and setup. We need to set values so `--help` works. dist = Distribution() diff --git a/tools/setup_helpers/generate_linker_script.py b/tools/setup_helpers/generate_linker_script.py index b5a7a4ce7dec9..e66fc197062ad 100644 --- a/tools/setup_helpers/generate_linker_script.py +++ b/tools/setup_helpers/generate_linker_script.py @@ -1,7 +1,5 @@ -import argparse import os import subprocess -from pathlib import Path def gen_linker_script( @@ -30,10 +28,6 @@ def gen_linker_script( assert len(text_line_start) == 1, "The linker script has multiple text sections!" text_line_start = text_line_start[0] - # ensure that parent directory exists before writing - fout = Path(fout) - fout.parent.mkdir(parents=True, exist_ok=True) - with open(fout, "w") as f: for lineid, line in enumerate(linker_script_lines): if lineid == text_line_start + 2: @@ -42,20 +36,3 @@ def gen_linker_script( f.write(f" .text.{plines}\n") f.write(" )\n") f.write(f"{line}\n") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Generate linker file based on prioritized symbols. Used for link-time optimization.", - ) - parser.add_argument( - "--filein", - help="Path to prioritized_text.txt input file", - default=argparse.SUPPRESS, - ) - parser.add_argument( - "--fout", help="Output path for linker ld file", default=argparse.SUPPRESS - ) - # convert args to a dict to pass to gen_linker_script - kwargs = vars(parser.parse_args()) - gen_linker_script(**kwargs) From 3f6d88f04c6d24fa582b04c7bdc6181b191f356d Mon Sep 17 00:00:00 2001 From: Avik Chaudhuri Date: Thu, 11 Sep 2025 15:34:03 +0000 Subject: [PATCH 103/693] paths to exclude shape guards (#162684) Summary: Easier to land than https://www.internalfb.com/diff/D82030581 Test Plan: everything blamed by https://www.internalfb.com/diff/D80713603 (except some old exir tests) Rollback Plan: Differential Revision: D82180349 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162684 Approved by: https://github.com/tugsbayasgalan --- torch/export/_unlift.py | 31 ++++++++++++++++++--------- torch/fx/experimental/proxy_tensor.py | 2 +- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/torch/export/_unlift.py b/torch/export/_unlift.py index af746c4e3b7dd..3b6f2f0266f69 100644 --- a/torch/export/_unlift.py +++ b/torch/export/_unlift.py @@ -640,20 +640,31 @@ def handle_symint(expr, src): return new_guards_code -def _unlift_exported_program_lifted_states( - ep: ExportedProgram, check_guards=True -) -> torch.fx.GraphModule: - # force check_guards=False for executorch because - # its pass infra has too many calls to .module() - # and but does not like call modules in the graph - # TODO: update executorch to check_guards=False +def _ok_to_generate_guards_fn(): + patterns = [ + "executorch", + "modai", + "on_device_ai", + "torchao", + ] + # force check_guards=False for files matching `patterns` + # because they have too many calls to .module() and + # do not like any call modules in the graph + # TODO: fix these files to handle guard fns frame = inspect.currentframe() while frame is not None: - if "executorch" in frame.f_code.co_filename: - check_guards = False - break + if any(path in frame.f_code.co_filename for path in patterns): + return False frame = frame.f_back + return True + + +def _unlift_exported_program_lifted_states( + ep: ExportedProgram, check_guards=True +) -> torch.fx.GraphModule: + check_guards = check_guards and _ok_to_generate_guards_fn() + # TODO T206340015 if ep.verifiers[0].dialect != "TRAINING": ep = _remove_effect_tokens(ep) diff --git a/torch/fx/experimental/proxy_tensor.py b/torch/fx/experimental/proxy_tensor.py index ae4d1c59823a2..d9f687f4d24e1 100644 --- a/torch/fx/experimental/proxy_tensor.py +++ b/torch/fx/experimental/proxy_tensor.py @@ -2020,7 +2020,7 @@ def create_node(self, *args: object, **kwargs: object) -> fx.node.Node: # nn_module_stack if node.op not in ["placeholder", "output"]: - if "nn_module_stack" not in node.meta: + if node.meta.get("nn_module_stack") is None: node.meta["nn_module_stack"] = self.module_stack.copy() # convert nn_module_stack from Dict[key, (FQN, class)] -> Dict[str, Tuple[str, str]] for key, (fqn, mod_cls) in node.meta["nn_module_stack"].items(): From 24492cbab2f0e842c4cf03993517ff2338c22521 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Tue, 2 Sep 2025 14:06:36 -0700 Subject: [PATCH 104/693] [BE] Cleanup stale comments/copy from `gemm` (#162001) Followup after https://github.com/pytorch/pytorch/pull/154012 Since the introduction of `gemm_no_downcast_stub` it's no longer necessary to allocate temporary array and then manually implement the `beta` logic in the codebase Pull Request resolved: https://github.com/pytorch/pytorch/pull/162001 Approved by: https://github.com/drisspg ghstack dependencies: #161999 --- aten/src/ATen/native/CPUBlas.cpp | 34 ++------------------------------ 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/aten/src/ATen/native/CPUBlas.cpp b/aten/src/ATen/native/CPUBlas.cpp index e06afddd05aa7..20be0d6fe017a 100644 --- a/aten/src/ATen/native/CPUBlas.cpp +++ b/aten/src/ATen/native/CPUBlas.cpp @@ -457,24 +457,9 @@ void gemm( return; } #endif - // for the fallback path, first compute gemm with beta = 0, - // and then add c in full precision. - int64_t c_size = n * m; - std::vector float_c(c_size, 0.f); gemm_no_downcast_stub( at::kCPU, at::kBFloat16, - transa, transb, m, n, k, alpha, a, lda, b, ldb, 0.f, float_c.data(), m); - for (const auto j : c10::irange(n)) { - for (const auto i : c10::irange(m)) { - auto offset = j * ldc + i; - // beta == 0 won't propagate NaN from C - if (beta == 0.f) { - c[offset] = float_c[j * m + i]; - } else { - c[offset] = beta * c[offset] + float_c[j * m + i]; - } - } - } + transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm( @@ -493,24 +478,9 @@ void gemm( return; } #endif - // for the fallback path, first compute gemm with beta = 0, - // and then add c in full precision. - int64_t c_size = n * m; - std::vector float_c(c_size, 0.f); gemm_no_downcast_stub( at::kCPU, at::kHalf, - transa, transb, m, n, k, alpha, a, lda, b, ldb, 0.f, float_c.data(), m); - for (const auto j : c10::irange(n)) { - for (const auto i : c10::irange(m)) { - auto offset = j * ldc + i; - // beta == 0 won't propagate NaN from C - if (beta == 0.f) { - c[offset] = float_c[j * m + i]; - } else { - c[offset] = beta * c[offset] + float_c[j * m + i]; - } - } - } + transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm( From 2f5a24c2a2d66156ffd0ae8785a8ebc589ddf6f6 Mon Sep 17 00:00:00 2001 From: atalman Date: Thu, 11 Sep 2025 16:09:20 +0000 Subject: [PATCH 105/693] Smoke tests don't run nvshmem on Windows (#162646) Only available for linux x86 and aarch64 : https://pypi.org/project/nvidia-nvshmem-cu13/#files nvshmem is available only on linux: `` "nvidia-nvshmem-cu12==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | " `` https://github.com/pytorch/pytorch/blob/main/.github/scripts/generate_binary_build_matrix.py#L57 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162646 Approved by: https://github.com/kwen2501 --- .ci/pytorch/smoke_test/smoke_test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.ci/pytorch/smoke_test/smoke_test.py b/.ci/pytorch/smoke_test/smoke_test.py index 305ad15d98e7e..675d58a3e283d 100644 --- a/.ci/pytorch/smoke_test/smoke_test.py +++ b/.ci/pytorch/smoke_test/smoke_test.py @@ -386,8 +386,8 @@ def foo(x: torch.Tensor) -> torch.Tensor: def smoke_test_nvshmem() -> None: - if not torch.cuda.is_available(): - print("CUDA is not available, skipping NVSHMEM test") + if not torch.cuda.is_available() or target_os == "windows": + print("Windows platform or CUDA is not available, skipping NVSHMEM test") return # Check if NVSHMEM is compiled in current build @@ -396,7 +396,9 @@ def smoke_test_nvshmem() -> None: except ImportError: # Not built with NVSHMEM support. # torch is not compiled with NVSHMEM prior to 2.9 - if torch.__version__ < "2.9": + from torch.torch_version import TorchVersion + + if TorchVersion(torch.__version__) < (2, 9): return else: # After 2.9: NVSHMEM is expected to be compiled in current build From fe8cc619b8c593a02bcbcfff939db3b650286bc6 Mon Sep 17 00:00:00 2001 From: suo Date: Thu, 11 Sep 2025 16:29:32 +0000 Subject: [PATCH 106/693] [torch][c10d] fix split_group in mixed backend case (#162424) Today we can initialize a mixed-backend process group (e.g. "cpu:gloo,cuda:nccl") but we can only pass one set of process group options. However, when we call `split_group`, we retrieve that set of options from the parent PG and pass it to the ProcessGroup::groupSplit C++ API, which then attempts to propagate that set of options to all backends. This leads to an assert on some user code, where ProcessGroupGloo::split is expecting gloo options but receives nccl options instead. Arguably the APIs as currently designed are just broken; we should not ever expect a single set of backend options to apply across multiple backends. However, fixing this would require changing quite a few public APIs. As a quick fix, since user-provided options really only exist for NCCL, just warn and fall-back to defaulted options for Gloo if non-gloo options are detected. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162424 Approved by: https://github.com/d4l3k, https://github.com/fduwjj, https://github.com/H-Huang --- test/distributed/test_c10d_nccl.py | 56 +++++++++++++++++++ torch/csrc/distributed/c10d/ProcessGroup.hpp | 4 +- .../distributed/c10d/ProcessGroupGloo.cpp | 40 ++++++++++++- .../distributed/c10d/ProcessGroupGloo.hpp | 3 + torch/csrc/distributed/c10d/init.cpp | 30 ++-------- torch/distributed/distributed_c10d.py | 6 +- 6 files changed, 109 insertions(+), 30 deletions(-) diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py index b234c907a6658..0d55845228da7 100644 --- a/test/distributed/test_c10d_nccl.py +++ b/test/distributed/test_c10d_nccl.py @@ -1087,6 +1087,62 @@ def test_comm_split_group(self): dist.destroy_process_group() + @requires_nccl_version((2, 18), "Need NCCL 2.18+ for ncclCommSplit") + @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") + def test_comm_split_group_mixed_backend(self): + # Test `ncclCommSplit` for smaller subgroups of the world when + # we've passed a specific device_id to init_process_group. + store = c10d.FileStore(self.file_name, self.world_size) + device = torch.device(f"cuda:{self.rank}") + # pg = self._create_process_group_nccl(store, self.opts(), device_id=device) + # create nccl processgroup with opts + c10d.init_process_group( + "cpu:gloo,cuda:nccl", + world_size=self.world_size, + rank=self.rank, + store=store, + pg_options=self.opts(), + device_id=device, + ) + pg = c10d.distributed_c10d._get_default_group() + backend = pg._get_backend(torch.device(device)) + + cuda_tensor = torch.full((1,), self.rank).cuda(device) + cpu_tensor = torch.full((1,), self.rank) + # Create subgroup between ranks 0, 1 + subg_ranks = [0, 1] + ng1 = c10d.split_group(pg, [subg_ranks]) + backend1 = ng1._get_backend(torch.device(device)) + + # check basic options are the same between parent and child + self.assertEqual(backend.options._timeout, backend1.options._timeout) + self.assertEqual( + backend.options.is_high_priority_stream, + backend1.options.is_high_priority_stream, + ) + self.assertEqual(ng1.group_desc, "default_pg:split:0") + + # comm split happens eagerly since device_id is passed to init_process_group. + self.assertEqual(backend.comm_split_count(), 1) + # dist.get_process_group_ranks returns the global ranks in the subgroup. + self.assertEqual( + dist.get_process_group_ranks(ng1), + subg_ranks if self.rank in subg_ranks else [], + ) + + # is part of ng1; otherwise, -1 + if dist.get_rank(ng1) >= 0: + dist.broadcast(cuda_tensor, dist.get_global_rank(ng1, 0), group=ng1) + self.assertEqual(cuda_tensor, torch.full((1,), 0)) + dist.broadcast(cpu_tensor, dist.get_global_rank(ng1, 0), group=ng1) + self.assertEqual(cpu_tensor, torch.full((1,), 0)) + + ng2 = c10d.split_group(pg, [subg_ranks]) + self.assertEqual(ng2.group_desc, "default_pg:split:1") + self.assertEqual(backend.comm_split_count(), 2) + + dist.destroy_process_group() + @requires_nccl_version((2, 18), "Need NCCL 2.18+ for ncclCommSplit") @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") def test_non_blocking_init(self): diff --git a/torch/csrc/distributed/c10d/ProcessGroup.hpp b/torch/csrc/distributed/c10d/ProcessGroup.hpp index 4fb2d566e9a76..5a06a386d5ca8 100644 --- a/torch/csrc/distributed/c10d/ProcessGroup.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroup.hpp @@ -1015,7 +1015,9 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder { // Backend classes for this ProcessGroup std::unordered_set deviceTypes_; - std::unordered_map deviceTypeToBackendType_; + // This mapping is ordered, as splitGroup must call split on the underlying + // backends in a consistent order. + std::map deviceTypeToBackendType_; std::unordered_map> deviceTypeToBackend_; std::unordered_map> diff --git a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp index fbd8a403b97dc..74063ff579e80 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp +++ b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp @@ -551,6 +551,32 @@ std::shared_ptr<::gloo::transport::Device> ProcessGroupGloo:: static std::atomic process_group_id = 0; +c10::intrusive_ptr ProcessGroupGloo::Options:: + create_default(std::chrono::milliseconds timeout) { + auto options = ::c10d::ProcessGroupGloo::Options::create(); + bool lazyInit = ::c10d::getDefaultGlooLazyInit(); + + // Use interfaces listed in "GLOO_SOCKET_IFNAME", if set. + auto ifnameEnv = c10::utils::get_env("GLOO_SOCKET_IFNAME"); + if (ifnameEnv && ifnameEnv->size() > 1) { + for (const auto& iface : ::c10d::split(',', ifnameEnv->c_str())) { + options->devices.push_back( + ::c10d::ProcessGroupGloo::createDeviceForInterface(iface, lazyInit)); + } + } else { + // If no hostname is specified, this function looks up + // the machine's hostname and returns a device instance + // associated with the address that the hostname resolves to. + options->devices.push_back( + ::c10d::ProcessGroupGloo::createDefaultDevice(lazyInit)); + } + + options->timeout = timeout; + // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) + options->threads = options->devices.size() * 2; + return options; +} + ProcessGroupGloo::ProcessGroupGloo( const c10::intrusive_ptr& store, int rank, @@ -710,7 +736,12 @@ c10::intrusive_ptr ProcessGroupGloo::split( } auto glooOpts = c10::dynamic_intrusive_pointer_cast(opts); - TORCH_CHECK(glooOpts != nullptr, "opts not a ProcessGroupGloo::Options."); + if (glooOpts == nullptr) { + TORCH_WARN_ONCE( + "Tried to pass options to ProcessGroupGloo::split that are not ProcessGroupGloo::Options." + "Falling back to default options."); + glooOpts = ProcessGroupGloo::Options::create_default(); + } // TODO: we need to get rid of globalRanksInGroup eventually. std::vector globalRanksInGroup; @@ -729,7 +760,12 @@ c10::intrusive_ptr ProcessGroupGloo::merge( const int& rank, const int& size) { auto glooOpts = c10::dynamic_intrusive_pointer_cast(opts); - TORCH_CHECK(glooOpts != nullptr, "opts not a ProcessGroupGloo::Options."); + if (glooOpts == nullptr) { + TORCH_WARN_ONCE( + "Tried to pass options to ProcessGroupGloo::merge that are not ProcessGroupGloo::Options." + "Falling back to default options."); + glooOpts = ProcessGroupGloo::Options::create_default(); + } auto pg = c10::make_intrusive( store->clone(), rank, size, glooOpts); return c10::static_intrusive_pointer_cast(pg); diff --git a/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp b/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp index 4297807f2e8b9..b2cc6993528bf 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp @@ -255,6 +255,9 @@ class TORCH_API ProcessGroupGloo : public Backend { return c10::make_intrusive(timeout); } + static c10::intrusive_ptr create_default( + std::chrono::milliseconds timeout = kBackendDefaultTimeout); + std::vector> devices; int threads; }; diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp index a5270354cf61d..7e79fef8392f6 100644 --- a/torch/csrc/distributed/c10d/init.cpp +++ b/torch/csrc/distributed/c10d/init.cpp @@ -3106,8 +3106,6 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`). .def_readwrite("group_name", &::c10d::Backend::Options::group_name); #ifdef USE_C10D_GLOO - static const std::string GLOO_SOCKET_IFNAME_ENV = "GLOO_SOCKET_IFNAME"; - auto processGroupGloo = intrusive_ptr_no_gil_destructor_class_<::c10d::ProcessGroupGloo>( module, "ProcessGroupGloo", backend); @@ -3184,31 +3182,11 @@ options :class:`~torch.distributed.ProcessGroupNCCL.Options`). // https://github.com/pybind/pybind11/issues/5473 py::gil_scoped_release nogil{}; - auto options = ::c10d::ProcessGroupGloo::Options::create(); - bool lazyInit = ::c10d::getDefaultGlooLazyInit(); - - // Use interfaces listed in "GLOO_SOCKET_IFNAME", if set. - auto ifnameEnv = - c10::utils::get_env(GLOO_SOCKET_IFNAME_ENV.c_str()); - if (ifnameEnv && ifnameEnv->size() > 1) { - for (const auto& iface : ::c10d::split(',', ifnameEnv->c_str())) { - options->devices.push_back( - ::c10d::ProcessGroupGloo::createDeviceForInterface( - iface, lazyInit)); - } - } else { - // If no hostname is specified, this function looks up - // the machine's hostname and returns a device instance - // associated with the address that the hostname resolves to. - options->devices.push_back( - ::c10d::ProcessGroupGloo::createDefaultDevice(lazyInit)); - } - - options->timeout = timeout; - // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) - options->threads = options->devices.size() * 2; return c10::make_intrusive<::c10d::ProcessGroupGloo>( - store, rank, size, options); + store, + rank, + size, + ::c10d::ProcessGroupGloo::Options::create_default(timeout)); }), py::arg("store"), py::arg("rank"), diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 2f60cbe13abcf..498cc50eb9cf5 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -5160,7 +5160,11 @@ def split_group( my_group = split_group break - group_name = _process_group_name(my_group, use_hashed_name=False) + # use_hashed_name is True to ensure that subgroups have unique names. + # This is needed as some backends (e.g. Gloo) use the group name as a + # PrefixStore prefix for initialization of splits. Thus, names have to be + # unique to avoid key collisions. + group_name = _process_group_name(my_group, use_hashed_name=True) split_pg = parent_pg.split_group( my_group, timeout=timeout, From 8be8b94793792dc3830c13c6bdee325faefc4bd7 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 11 Sep 2025 16:30:26 +0000 Subject: [PATCH 107/693] Update SECURITY.md with reporting guidelines (#162608) Added clarification that all reports will be disclosed within 90 days Pull Request resolved: https://github.com/pytorch/pytorch/pull/162608 Approved by: https://github.com/seemethere, https://github.com/albanD --- SECURITY.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SECURITY.md b/SECURITY.md index 3baa145df7953..16d72ef1ea08e 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -16,6 +16,8 @@ However, if you believe you have found a security vulnerability in PyTorch, we e Please report security issues using https://github.com/pytorch/pytorch/security/advisories/new +All reports submitted thru the security advisories mechanism would **either be made public or dismissed by the team within 90 days of the submission**. If advisory has been closed on the grounds that it is not a security issue, please do not hesitate to create an [new issue](https://github.com/pytorch/pytorch/issues/new?template=bug-report.yml) as it is still likely a valid issue within the framework. + Please refer to the following page for our responsible disclosure policy, reward guidelines, and those things that should not be reported: https://www.facebook.com/whitehat From fccddf02b6d6116114ffb76a6031cfcb77a2cc73 Mon Sep 17 00:00:00 2001 From: Avik Chaudhuri Date: Thu, 11 Sep 2025 16:35:20 +0000 Subject: [PATCH 108/693] repro 161902 (#162416) Summary: Sometimes `ShapeEnv.create_symbol` can return a `sympy.Integer`. This messes up our phantom symbol infra for derived dims. Fixes #161902 Test Plan: added test based on repro Rollback Plan: Differential Revision: D81960709 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162416 Approved by: https://github.com/tugsbayasgalan --- test/export/test_export.py | 20 ++++++++++++++++++++ torch/fx/experimental/symbolic_shapes.py | 22 ++++++++++++---------- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/test/export/test_export.py b/test/export/test_export.py index 7efab79cb08f4..bdcc63034abdd 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -4755,6 +4755,26 @@ def forward(self, x, y): self.assertEqual(range_lower_bounds, [1, 2]) self.assertEqual(range_upper_bounds, [2, 3]) + def test_issue_161902(self): + class Add(torch.nn.Module): + def forward(self, x, y): + return x + y + + m = Add() + x = torch.randn(2, 3) + y = torch.randn(2, 3) + + dx = Dim("dx", min=1, max=2) + conflicting = {"x": (2 * dx, Dim.STATIC), "y": (dx + 1, Dim.STATIC)} + + with self.assertRaisesRegex( + torch._dynamo.exc.UserError, + r"Constraints violated.*" + r"\n.*You marked 2\*dx as dynamic but your code specialized it to be a constant \(2\).*" + r"\n.*You marked dx \+ 1 as dynamic but your code specialized it to be a constant \(2\).*", + ): + export(m, (x, y), dynamic_shapes=conflicting) + def test_range_constraints_with_replacement(self): class M(torch.nn.Module): def forward(self, x, y): diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index b5758fdfa24d1..5c4ed77bab919 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -1943,7 +1943,7 @@ def __post_init__(self) -> None: for source, root, fn in self.derived_equalities: # preprocess into a transitively-closed map # NOTE(avik): we reuse the union-find forest for canonicalizing input sources - if isinstance(root, sympy.Symbol): + if isinstance(root, (sympy.Symbol, sympy.Integer)): self._defs[self._find(source)] = fn(root) else: self._defs[self._find(source)] = fn(self._rewrite(root)) @@ -5427,11 +5427,12 @@ def get_expression(tensor_dim_src: Source) -> sympy.Expr: for srcEq, root, fn in equalities_inputs.derived_equalities: expr1 = get_expression(srcEq) # recall that root is either a phantom symbol or an input source - expr2, debug_name = ( - (root, self.var_to_sources[root][0].name()) - if isinstance(root, sympy.Symbol) - else (get_expression(root), self._debug_name(root)) - ) + if isinstance(root, sympy.Symbol): + expr2, debug_name = root, self.var_to_sources[root][0].name() + elif isinstance(root, sympy.Integer): + expr2, debug_name = root, str(root) + else: + expr2, debug_name = get_expression(root), self._debug_name(root) expr2_ = fn(expr2) # Check whether given input shape values satisfy a specified equation s = fn(s'). # - Raise when the equation was violated by the given input shape values. @@ -5446,10 +5447,11 @@ def get_expression(tensor_dim_src: Source) -> sympy.Expr: ) for phantom_symbol in equalities_inputs.phantom_symbols: - # we created additional phantom symbols that are not input shape dimensions - symbol_to_source[phantom_symbol].extend( - self.var_to_sources[phantom_symbol] - ) + if isinstance(phantom_symbol, sympy.Symbol): + # we created additional phantom symbols that are not input shape dimensions + symbol_to_source[phantom_symbol].extend( + self.var_to_sources[phantom_symbol] + ) # How do we know what the value of s0 is? Fresh variables can only be # bound by inputs, so there MUST be some other input which binds the From 2f533959430c2a41fe16ef79fe4d680a5c4e0585 Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Thu, 11 Sep 2025 16:45:41 +0000 Subject: [PATCH 109/693] [ez][CI] Fix docs push in nightly workflow (#162657) HUD metrics page says docs push hasn't happened in 21 days image I guess main branch docs just haven't been updated? Did anyone notice? Do we care? Either way I think this should fix it Likely started after https://github.com/pytorch/pytorch/pull/161182 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162657 Approved by: https://github.com/huydhn --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 65b8781be7585..696c5b68b475b 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -54,7 +54,7 @@ jobs: - get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-jammy-py3.9-gcc11 + build-environment: linux-jammy-py3.10-gcc11 docker-image: ${{ needs.docs-build.outputs.docker-image }} push: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || startsWith(github.event.ref, 'refs/tags/v') }} run-doxygen: true From 463fbc8ca0537e5635236190d2ca38ce6fcef831 Mon Sep 17 00:00:00 2001 From: Tugsbayasgalan Manlaibaatar Date: Wed, 10 Sep 2025 20:56:53 -0700 Subject: [PATCH 110/693] Support vmap + custom autograd function/improve DTensor constructor inefficiency (#162240) This makes gemma3 exportable on transformers=4.55.4 In HF, there is a torch funciton mode called TransformGetItemToIndex which internally calls custom autograd function. When this custom autograd function is called under vmap, It triggers CustomFunctionHigherOrderOP which error-ed because there was no pre-dispatch proxy mode implementation. Since there are number of requests lately to add various operators in pre-dispatch IR, I introduce a decorator in export that works similar to `allow_in_graph`. Basically: 1) We intercept custom_autograd_function.apply at pre-dispatch mode when this decorator is applied 2) We apply `flat_apply` HOP to hide the pytree spec for this autograd function. Note that this adds restriction that this custom autograd function needs to take in fx-able types. 3) subclass constructor decorator is implemented similarly, so we just refactor it to use similar implementation as this new decorator. eventually we should delete the subclass constructor decorator. 4) Move some code in subclass constructor decorator to exit early in non-export environment which should shave off some inefficiency (around 1% according to @swolchok 's benchmark) Fixes: https://github.com/pytorch/pytorch/issues/161563#issuecomment-3246309758 Differential Revision: [D82141316](https://our.internmc.facebook.com/intern/diff/D82141316) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162240 Approved by: https://github.com/ydwu4 --- test/export/test_export.py | 52 +++++- .../_dynamo/_trace_wrapped_higher_order_op.py | 5 + torch/_export/verifier.py | 1 + torch/_export/wrappers.py | 174 ++++++++++++++---- torch/export/custom_ops.py | 23 +++ 5 files changed, 212 insertions(+), 43 deletions(-) diff --git a/test/export/test_export.py b/test/export/test_export.py index bdcc63034abdd..c35cd8fee3852 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -26,6 +26,7 @@ from functorch.experimental.control_flow import cond, map from torch import Tensor from torch._decomp import decomposition_table, get_decompositions +from torch._dynamo._trace_wrapped_higher_order_op import mod_index from torch._dynamo.test_case import TestCase from torch._dynamo.testing import normalize_gm from torch._export.pass_base import _ExportPassBaseDeprecatedDoNotUse @@ -13615,6 +13616,52 @@ def forward(self, x): ): _ = export(Foo(), (torch.randn(4, 4),), strict=False) + def test_vmap_custom_autograd_function(self): + from torch._dynamo._trace_wrapped_higher_order_op import TransformGetItemToIndex + + class IndexingModule(torch.nn.Module): + def __init__(self, base_size: int = 10): + super().__init__() + self.register_buffer("base", torch.arange(base_size)) + + def forward(self, indices: torch.Tensor) -> torch.Tensor: + with TransformGetItemToIndex(): + # Each element of `indices` is a scalar tensor, so our override kicks in + return torch.vmap(lambda i: self.base[i])(indices) + + m = IndexingModule(10) + idxs = torch.tensor([0, 3, 7, 9]) + ep = torch.export.export(m, (idxs,), strict=False) + self.assertExpectedInline( + ep.graph, + """\ +graph(): + %b_base : [num_users=1] = placeholder[target=b_base] + %indices : [num_users=1] = placeholder[target=indices] + %lazy_load_decompositions : [num_users=0] = call_function[target=torch._functorch.predispatch.lazy_load_decompositions](args = (), kwargs = {}) + %_vmap_increment_nesting : [num_users=0] = call_function[target=torch._functorch.predispatch._vmap_increment_nesting](args = (4, error), kwargs = {}) + %_add_batch_dim : [num_users=1] = call_function[target=torch._functorch.predispatch._add_batch_dim](args = (%indices, 0, 1), kwargs = {}) + %torch__dynamo__trace_wrapped_higher_order_op_mod_index0 : [num_users=1] = get_attr[target=torch__dynamo__trace_wrapped_higher_order_op_ModIndex0] + %function_const_func_spec0 : [num_users=1] = get_attr[target=function_const_func_spec0] + %flat_apply : [num_users=1] = call_function[target=torch.ops.higher_order.flat_apply](args = (%function_const_func_spec0, %torch__dynamo__trace_wrapped_higher_order_op_mod_index0, torch._dynamo._trace_wrapped_higher_order_op.ModIndex, %b_base, %_add_batch_dim), kwargs = {}) + %_remove_batch_dim : [num_users=1] = call_function[target=torch._functorch.predispatch._remove_batch_dim](args = (%flat_apply, 1, 4, 0), kwargs = {}) + %_vmap_decrement_nesting : [num_users=0] = call_function[target=torch._functorch.predispatch._vmap_decrement_nesting](args = (), kwargs = {}) + return (_remove_batch_dim,)""", + ) + + self.assertEqual(m(idxs), ep.module()(idxs)) + ep = ep.run_decompositions({}) + self.assertExpectedInline( + ep.graph, + """\ +graph(): + %b_base : [num_users=1] = placeholder[target=b_base] + %indices : [num_users=1] = placeholder[target=indices] + %index : [num_users=1] = call_function[target=torch.ops.aten.index.Tensor](args = (%b_base, [%indices]), kwargs = {}) + return (index,)""", + ) + self.assertEqual(m(idxs), ep.module()(idxs)) + def test_unbacked_deferred_runtime_retrace(self): class Foo(torch.nn.Module): def forward(self, x, y): @@ -14412,10 +14459,7 @@ def __init__(self): def forward(self, x): return x.cos() - with self.assertRaisesRegex( - RuntimeError, "TestExport.test_capture_subclass_wrong..Foo" - ): - export(Foo(), (torch.randn(4, 4),)) + export(Foo(), (torch.randn(4, 4),)) def test_capture_subclass_constructor_torch_ir(self): class Foo(torch.nn.Module): diff --git a/torch/_dynamo/_trace_wrapped_higher_order_op.py b/torch/_dynamo/_trace_wrapped_higher_order_op.py index 17b664fc5e0ed..9b000ee926a1b 100644 --- a/torch/_dynamo/_trace_wrapped_higher_order_op.py +++ b/torch/_dynamo/_trace_wrapped_higher_order_op.py @@ -116,6 +116,11 @@ def backward(ctx, gradOut): # type: ignore[no-untyped-def] None, ) + @classmethod + @torch._export.wrappers.allow_in_pre_dispatch_graph + def apply(cls, *args, **kwargs): # type: ignore[no-untyped-def] + return super().apply(*args, **kwargs) + mod_index = ModIndex.apply diff --git a/torch/_export/verifier.py b/torch/_export/verifier.py index 58c0f1771a1ee..28593291b22cc 100644 --- a/torch/_export/verifier.py +++ b/torch/_export/verifier.py @@ -216,6 +216,7 @@ def _allowed_op_types() -> tuple[type[Any], ...]: torch.sym_not, torch.sym_sqrt, torch.sym_sum, + torch.export.custom_ops._call_custom_autograd_function_in_pre_dispatch, # TODO (tmanlaibaatar) # Predispatch export is able to contain autograd ops. # These will be modeled as HOO later diff --git a/torch/_export/wrappers.py b/torch/_export/wrappers.py index b851847bada81..e023169403937 100644 --- a/torch/_export/wrappers.py +++ b/torch/_export/wrappers.py @@ -1,5 +1,7 @@ # mypy: allow-untyped-defs +import inspect from contextlib import contextmanager +from functools import wraps import torch import torch._custom_ops @@ -15,7 +17,6 @@ from torch._ops import HigherOrderOperator from torch._subclasses.fake_tensor import FakeTensorMode from torch.fx.experimental.proxy_tensor import ( - get_proxy_slot, PreDispatchTorchFunctionMode, ProxyTorchDispatchMode, track_tensor_tree, @@ -129,7 +130,7 @@ def call(self, *args): return cls -def _register_subclass_spec_proxy_in_tracer(tracer, name, spec): +def _register_func_spec_proxy_in_tracer(tracer, name, spec): """ This is a wrapper utility method on top of tracer to cache the already registered subclass spec attribute. This is useful because @@ -146,6 +147,41 @@ def _register_subclass_spec_proxy_in_tracer(tracer, name, spec): return tracer.create_proxy("get_attr", qualname, (), {}) +def _emit_flat_apply_call( + *, + tracer, + spec_name: str, + const_target_for_apply, + graphable_args, + track_value, + call_spec_cache_key: str, +): + # Flatten to graphable form and record the spec on the FX root + flat_args, in_spec = to_graphable(graphable_args) + qualname = tracer.get_fresh_qualname(spec_name) # type: ignore[union-attr] + setattr(tracer.root, qualname, in_spec) # type: ignore[union-attr] + spec_proxy = tracer.create_proxy("get_attr", qualname, (), {}) + + # Reuse/cached ConstantFunction spec on the root + _, func_spec = pytree.tree_flatten(_ConstantFunction(const_target_for_apply)) + func_spec_proxy = _register_func_spec_proxy_in_tracer( + tracer, f"{call_spec_cache_key}_const_func_spec", func_spec + ) + + # Map runtime args -> proxies (always via tracer.unwrap_proxy now) + flat_proxy_args = pytree.tree_map(tracer.unwrap_proxy, flat_args) + + # Emit flat_apply and track result structure + out_proxy = tracer.create_proxy( + "call_function", flat_apply, (func_spec_proxy, spec_proxy, *flat_proxy_args), {} + ) + track_tensor_tree(track_value, out_proxy, constant=None, tracer=tracer) + + +def _is_init(fn): + return callable(fn) and fn.__name__ == "__init__" + + def mark_subclass_constructor_exportable_experimental(constructor_subclass): """ Experimental decorator that makes subclass to be traceable in export @@ -167,10 +203,6 @@ def __new__(cls, elem, *, requires_grad=False): def __init__(self, elem, ...): # ... """ - - def _is_init(fn): - return callable(fn) and fn.__name__ == "__init__" - if not _is_init(constructor_subclass): raise RuntimeError( f"torch._export.wrappers.mark_constructor_exportable_experimental can only be applied on subclass tensor.__init__" @@ -179,14 +211,18 @@ def _is_init(fn): ) def wrapper(*args, **kwargs): + constructor_subclass(*args, **kwargs) + + if not torch.compiler.is_exporting(): + return + if not is_traceable_wrapper_subclass_type(type(args[0])): assert constructor_subclass.__qualname__.endswith("__init__") obj_name = constructor_subclass.__qualname__[: -len("__init__")] raise RuntimeError( - f"Applying mark_constructor_exportable_experimental on {obj_name} is not valid as it is not a traceable " + f"Can't intercept {obj_name} in export because this object is not a traceable " f"tensor subclass. Please look at DTensor.__init__ implementation as an example of proper usage of this API." ) - constructor_subclass(*args, **kwargs) mode = _maybe_find_pre_dispatch_tf_mode_for_export() if mode is None: @@ -196,46 +232,106 @@ def wrapper(*args, **kwargs): tracer = mode.tracer subclass = args[0] + graphable = (tuple(args[1:]), kwargs) + + spec_name = "_".join(constructor_subclass.__qualname__.lower().split(".")) + call_spec_cache_key = type(subclass).__name__.lower() + + _emit_flat_apply_call( + tracer=tracer, + spec_name=spec_name, + const_target_for_apply=type(subclass), + graphable_args=graphable, + track_value=subclass, # track the constructed subclass instance + call_spec_cache_key=call_spec_cache_key, + ) + return - flat_args, in_spec = to_graphable((tuple(args[1:]), kwargs)) + return wrapper - constructor_spec_name = "_".join( - constructor_subclass.__qualname__.lower().split(".") - ) - qualname = tracer.get_fresh_qualname(constructor_spec_name) # type: ignore[union-attr] - setattr(tracer.root, qualname, in_spec) # type: ignore[union-attr] - spec_proxy = tracer.create_proxy("get_attr", qualname, (), {}) - flat_proxy_args = pytree.tree_map_only( - torch.Tensor, lambda x: get_proxy_slot(x, tracer).proxy, flat_args - ) - _, func_spec = torch.utils._pytree.tree_flatten( - _ConstantFunction(type(subclass)) +def allow_in_pre_dispatch_graph(func): + """ + Experimental decorator that adds user function to export pre-dispatch graph. Note that + we only support custom autograd function/subclass constructors today. To use this function: + 1. For subclasses: + 1. refer to instructions in mark_subclass_constructor_exportable_experimental + 2. Define apply method on your custom autograd function and apply this decorator. + + Example: + + class MyCoolCustomAutogradFunc(autograd.Function): + @classmethod + @torch._export.wrappers.allow_in_pre_dispatch_graph + def apply(cls, *args, **kwargs): + return super(MyCoolCustomAutogradFunc, cls).apply(*args, **kwargs) + + """ + if _is_init(func): + return mark_subclass_constructor_exportable_experimental(func) + + if not (_is_init(func) or func.__name__ == "apply"): + raise RuntimeError( + f"torch._export.wrappers.allow_in_pre_dispatch_graph can only be applied on subclass tensor.__init_ " + f"or custom_autograd_function.apply. " + f"But, you are adding it on {func.__name__} which is not supported. " + f"If __init__ doesn't exist on your subclass, please add it. Look at DTensor.__init__ implementation for example. " + f"If you are adding it on custom autograd function, please add it on apply method. " + f"If anything else, file an issue on github and we may consider extending our support. " ) - # We actually don't want to create a new spec for each instance - # In fx graph, it will look like dtensor_const_func_spec - # We can't directly shove DTensor.__init__ into fx as it is not - # allowed type. - fxable_constructor_call_spec_name = ( - type(subclass).__name__.lower() + "_const_func_spec" + @wraps(func) + def wrapper(*args, **kwargs): + if not torch.compiler.is_exporting(): + return func(*args, **kwargs) + + if not inspect.isclass(args[0]): + return func(*args, **kwargs) + + if not issubclass(args[0], torch.autograd.Function): + return func(*args, **kwargs) + + from torch._ops import _get_dispatch_mode_pre_dispatch + + mode = _get_dispatch_mode_pre_dispatch(torch._C._TorchDispatchModeKey.PROXY) + if mode is None: + return func(*args, **kwargs) + + # Sometimes custom autograd functions can call into HOPs that don't have proxy impl + # at PreDispatch level, so we just dispatch it below to get the concrete result. + include_to_set = torch._C._dispatch_tls_local_include_set().remove( + torch._C.DispatchKey.PreDispatch + ) + exclude_to_set = ( + torch._C._dispatch_tls_local_exclude_set() + | torch._C.DispatchKeySet(torch._C.DispatchKey.PreDispatch) ) - # We should try to reuse the constructor call spec as it is guaranteed to be same - # for each subclass type. This is different from proxy-ing the init arguments which - # can't be reused because for example, DTensor can receive different DeviceMesh etc - # as it's arguments - func_spec_proxy = _register_subclass_spec_proxy_in_tracer( - tracer, fxable_constructor_call_spec_name, func_spec + with torch._C._ForceDispatchKeyGuard(include_to_set, exclude_to_set): + out = func(*args, **kwargs) + + assert mode.pre_dispatch, "Should only do this in predispatch" + tracer = mode.tracer + + function_cls_name = f"{args[0].__module__}.{args[0].__qualname__}" + graphable = ((function_cls_name, *args[1:]), kwargs) + + from torch.export.custom_ops import ( + _call_custom_autograd_function_in_pre_dispatch, ) - inner_proxy = tracer.create_proxy( - "call_function", - flat_apply, - (func_spec_proxy, spec_proxy, *flat_proxy_args), - {}, + spec_name = "_".join(function_cls_name.split(".")) + call_spec_cache_key = type( + _call_custom_autograd_function_in_pre_dispatch + ).__name__.lower() + _emit_flat_apply_call( + tracer=tracer, + spec_name=spec_name, + const_target_for_apply=_call_custom_autograd_function_in_pre_dispatch, + graphable_args=graphable, + track_value=out, + call_spec_cache_key=call_spec_cache_key, ) - track_tensor_tree(subclass, inner_proxy, constant=None, tracer=tracer) - return + return out return wrapper diff --git a/torch/export/custom_ops.py b/torch/export/custom_ops.py index 57288fa344c10..9df7988da9314 100644 --- a/torch/export/custom_ops.py +++ b/torch/export/custom_ops.py @@ -1,3 +1,6 @@ +# mypy: allow-untyped-defs +import importlib + import torch @@ -24,3 +27,23 @@ def _access_subclass_inner_tensor( f"Attribute {attr} is not a tensor or doesn't exist in {src_subclass_tensor}" ) return val + + +def _call_custom_autograd_function_in_pre_dispatch(function_cls_name, *args, **kwargs): + """ + Import a custom autograd function by string name and call it. This is pretty bad + because: + 1) There is no schema + + Ideally we should automatically wrap custom autograd functions with a custom op, but + that is too much work because we need to schematize custom autograd functions. For now, + we just hackily put it in the IR. + """ + # Parse module and class name + module_name, class_name = function_cls_name.rsplit(".", 1) + + # Import the module and get the class + module = importlib.import_module(module_name) + function_cls = getattr(module, class_name) + assert hasattr(function_cls, "apply") + return function_cls.apply(*args, **kwargs) From 43d9b5ecaa68c19422c5697165b2277bd67572ba Mon Sep 17 00:00:00 2001 From: justinchuby <11205048+justinchuby@users.noreply.github.com> Date: Thu, 11 Sep 2025 18:09:58 +0000 Subject: [PATCH 111/693] [ONNX] Set fallback=False by default (#162726) This change addresses confusing error messages users encounter when using the ONNX exporter with default settings. Previously, `fallback=True` was the default, which would attempt to fall back to the TorchScript exporter when the dynamo path failed, leading to mixed error messages that obscured the actual issues. ## Problem When `fallback=True` by default: - Users get confusing error messages mixing dynamo and TorchScript export failures - Error messages tell users to provide the `f` argument unnecessarily - Dynamo error messages get flushed with TorchScript errors when both paths fail - Users expecting the dynamo path get unexpected fallback behavior ## Solution Changed the default from `fallback=True` to `fallback=False` in both: - `torch.onnx.export()` function - `torch.onnx._internal.exporter._compat.export_compat()` function ## Impact **Before:** ```python # Would fallback to TorchScript on dynamo failure, causing mixed error messages torch.onnx.export(model, args) ``` **After:** ```python # Clean dynamo-only errors by default torch.onnx.export(model, args) # Advanced users can still opt-in to fallback behavior torch.onnx.export(model, args, fallback=True) ``` Fixes #162697 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162726 Approved by: https://github.com/titaiwangms, https://github.com/xadupre --- torch/onnx/__init__.py | 2 +- torch/onnx/_internal/exporter/_compat.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/torch/onnx/__init__.py b/torch/onnx/__init__.py index 10d0ba23463de..668f47c15bc82 100644 --- a/torch/onnx/__init__.py +++ b/torch/onnx/__init__.py @@ -81,7 +81,7 @@ def export( profile: bool = False, dump_exported_program: bool = False, artifacts_dir: str | os.PathLike = ".", - fallback: bool = True, + fallback: bool = False, # BC options export_params: bool = True, keep_initializers_as_inputs: bool = False, diff --git a/torch/onnx/_internal/exporter/_compat.py b/torch/onnx/_internal/exporter/_compat.py index fe18f42e17b92..0bc0c6182fca0 100644 --- a/torch/onnx/_internal/exporter/_compat.py +++ b/torch/onnx/_internal/exporter/_compat.py @@ -67,7 +67,7 @@ def export_compat( profile: bool = False, dump_exported_program: bool = False, artifacts_dir: str | os.PathLike = ".", - fallback: bool = True, + fallback: bool = False, # Legacy export parameters for fallback legacy_export_kwargs: dict[str, Any] | None = None, ) -> _onnx_program.ONNXProgram: From 799471d92b15c433a0a92e5c4cfb11350841b123 Mon Sep 17 00:00:00 2001 From: David Berard Date: Thu, 11 Sep 2025 09:28:16 -0700 Subject: [PATCH 112/693] [triton] Update 3.5 pin (AMD compilation fix + warp spec) (#162733) Fixes #162390 Also adds warp spec (thanks @manman-ren!) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162733 Approved by: https://github.com/atalman --- .ci/docker/ci_commit_pins/triton.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/docker/ci_commit_pins/triton.txt b/.ci/docker/ci_commit_pins/triton.txt index f313c2efae473..1269a7801411f 100644 --- a/.ci/docker/ci_commit_pins/triton.txt +++ b/.ci/docker/ci_commit_pins/triton.txt @@ -1 +1 @@ -fccfc522864cf8bc172abe0cd58ae5581e2d44b9 +70cbcaca84471df49e81ddc56873c9241b671f8d From 9bc648235d1112907706d5dafff59289d3d6658b Mon Sep 17 00:00:00 2001 From: Isalia20 Date: Thu, 11 Sep 2025 18:36:24 +0000 Subject: [PATCH 113/693] [MPS] mps sparse mul op implementation (#162349) Implements mps sparse mul operation as well as enables other operations such as: 1. copy_ 2. div 3. sum 4. floor 5. power 6. sub 7. floor_divide Pull Request resolved: https://github.com/pytorch/pytorch/pull/162349 Approved by: https://github.com/pearu, https://github.com/malfet Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com> --- aten/src/ATen/native/native_functions.yaml | 39 +-- .../native/sparse/mps/SparseMPSTensorMath.mm | 260 +++++++++++++++++- .../ATen/native/sparse/mps/kernels/Mul.metal | 150 ++++++++++ test/test_sparse.py | 18 +- 4 files changed, 436 insertions(+), 31 deletions(-) create mode 100644 aten/src/ATen/native/sparse/mps/kernels/Mul.metal diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index abb061afc5c95..65ed5f402c2a3 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -1798,7 +1798,7 @@ device_guard: False dispatch: MkldnnCPU: copy_mkldnn_ - SparseCPU, SparseCUDA: copy_sparse_wrapper_ + SparseCPU, SparseCUDA, SparseMPS: copy_sparse_wrapper_ CompositeExplicitAutograd: copy_ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: copy_sparse_compressed_ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: copy_nested_ @@ -2160,7 +2160,7 @@ variants: function, method structured_delegate: div.out dispatch: - SparseCPU, SparseCUDA: div_sparse + SparseCPU, SparseCUDA, SparseMPS: div_sparse ZeroTensor: div_zerotensor NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_div_Tensor tags: [core, pointwise] @@ -2170,7 +2170,7 @@ variants: method structured_delegate: div.out dispatch: - SparseCPU, SparseCUDA: div_sparse_ + SparseCPU, SparseCUDA, SparseMPS: div_sparse_ tags: pointwise - func: div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) @@ -2179,7 +2179,7 @@ structured_inherits: TensorIteratorBase dispatch: CPU, CUDA, MPS, MTIA: div_out - SparseCPU, SparseCUDA: div_out_sparse_zerodim + SparseCPU, SparseCUDA, SparseMPS: div_out_sparse_zerodim tags: pointwise - func: div.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor @@ -2187,7 +2187,7 @@ variants: function, method structured_delegate: div.out_mode dispatch: - SparseCPU, SparseCUDA: div_sparse + SparseCPU, SparseCUDA, SparseMPS: div_sparse tags: [core, pointwise] - func: div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!) @@ -2195,7 +2195,7 @@ variants: method structured_delegate: div.out_mode dispatch: - SparseCPU, SparseCUDA: div_sparse_ + SparseCPU, SparseCUDA, SparseMPS: div_sparse_ tags: pointwise - func: div.out_mode(Tensor self, Tensor other, *, str? rounding_mode, Tensor(a!) out) -> Tensor(a!) @@ -2204,7 +2204,7 @@ structured_inherits: TensorIteratorBase dispatch: CPU, CUDA, MPS: div_out_mode - SparseCPU, SparseCUDA: div_out_sparse_zerodim + SparseCPU, SparseCUDA, SparseMPS: div_out_sparse_zerodim tags: pointwise # For C++ only, until we have conversion from C++ numbers to Tensor @@ -2768,20 +2768,20 @@ variants: function, method dispatch: CPU, CUDA, MPS, MTIA: floor_divide - SparseCPU, SparseCUDA: floor_divide_sparse + SparseCPU, SparseCUDA, SparseMPS: floor_divide_sparse - func: floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) device_check: NoCheck # TensorIterator variants: method dispatch: CPU, CUDA, MPS: floor_divide_ - SparseCPU, SparseCUDA: floor_divide_sparse_ + SparseCPU, SparseCUDA, SparseMPS: floor_divide_sparse_ - func: floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) device_check: NoCheck # TensorIterator dispatch: CPU, CUDA, MPS: floor_divide_out - SparseCPU, SparseCUDA: floor_divide_out_sparse_zerodim + SparseCPU, SparseCUDA, SparseMPS: floor_divide_out_sparse_zerodim - func: floor_divide.Scalar(Tensor self, Scalar other) -> Tensor device_check: NoCheck # TensorIterator @@ -4273,7 +4273,7 @@ structured_delegate: mul.out variants: function, method dispatch: - SparseCPU, SparseCUDA: mul_sparse + SparseCPU, SparseCUDA, SparseMPS: mul_sparse SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr MkldnnCPU: mkldnn_mul ZeroTensor: mul_zerotensor @@ -4285,7 +4285,7 @@ structured_delegate: mul.out variants: method dispatch: - SparseCPU, SparseCUDA: mul_sparse_ + SparseCPU, SparseCUDA, SparseMPS: mul_sparse_ SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_sparse_csr_ MkldnnCPU: mkldnn_mul_ NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_mul__Tensor @@ -4299,6 +4299,7 @@ CPU, CUDA, MPS, MTIA: mul_out SparseCPU: mul_out_sparse_cpu SparseCUDA: mul_out_sparse_cuda + SparseMPS: mul_out_sparse_mps SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: mul_out_sparse_csr MkldnnCPU: mkldnn_mul_out tags: pointwise @@ -5848,7 +5849,7 @@ variants: function, method dispatch: CompositeExplicitAutograd: sum - SparseCPU, SparseCUDA, SparseMeta: sum_coo + SparseCPU, SparseCUDA, SparseMPS, SparseMeta: sum_coo SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_csr autogen: sum.out @@ -5859,7 +5860,7 @@ variants: function, method dispatch: NestedTensorCPU: NestedTensor_sum_dim_CPU - SparseCPU, SparseCUDA: sum_sparse_coo + SparseCPU, SparseCUDA, SparseMPS: sum_sparse_coo SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: sum_sparse_compressed tags: core @@ -6975,7 +6976,7 @@ CPU, CUDA: sub_out MPS: sub_out_mps MTIA: sub_out_mtia - SparseCPU, SparseCUDA: sub_out_sparse + SparseCPU, SparseCUDA, SparseMPS: sub_out_sparse tags: pointwise - func: sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor @@ -6983,7 +6984,7 @@ variants: function, method structured_delegate: sub.out dispatch: - SparseCPU, SparseCUDA: sub_sparse + SparseCPU, SparseCUDA, SparseMPS: sub_sparse ZeroTensor: sub_zerotensor NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: NestedTensor_sub_Tensor tags: [core, pointwise] @@ -6993,7 +6994,7 @@ variants: method structured_delegate: sub.out dispatch: - SparseCPU, SparseCUDA: sub_sparse_ + SparseCPU, SparseCUDA, SparseMPS: sub_sparse_ tags: pointwise # For C++ only, until we have conversion from C++ numbers to Tensor @@ -10342,7 +10343,7 @@ structured_inherits: TensorIteratorBase dispatch: CPU, CUDA: pow_Tensor_Scalar_out - SparseCPU, SparseCUDA: pow_out_sparse_scalar + SparseCPU, SparseCUDA, SparseMPS: pow_out_sparse_scalar MPS: pow_tensor_scalar_out_mps tags: pointwise @@ -10351,7 +10352,7 @@ structured_delegate: pow.Tensor_Scalar_out variants: function, method dispatch: - SparseCPU, SparseCUDA: pow_sparse_scalar + SparseCPU, SparseCUDA, SparseMPS: pow_sparse_scalar tags: [core, pointwise] - func: pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!) diff --git a/aten/src/ATen/native/sparse/mps/SparseMPSTensorMath.mm b/aten/src/ATen/native/sparse/mps/SparseMPSTensorMath.mm index 07ee2e097b49e..589d000ab3187 100644 --- a/aten/src/ATen/native/sparse/mps/SparseMPSTensorMath.mm +++ b/aten/src/ATen/native/sparse/mps/SparseMPSTensorMath.mm @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -20,10 +21,265 @@ namespace at::native { using namespace at::sparse; +using namespace mps; -Tensor& add_out_dense_sparse_mps(Tensor& out, const Tensor& dense, const SparseTensor& sparse, const Scalar& alpha); +#ifndef PYTORCH_JIT_COMPILE_SHADERS +static auto& lib = MetalShaderLibrary::getBundledLibrary(); +#else +#include +#endif + +static SparseTensor& mul_out_dense_sparse_mps( + const Tensor& dense, + const Tensor& sparse, + SparseTensor& out) { + + TORCH_CHECK(sparse.is_sparse(), "mul: expected 'sparse' to be sparse COO"); + TORCH_CHECK(sparse.is_mps(), "mul: expected 'sparse' to be MPS, got ", sparse.device()); + TORCH_CHECK(out.is_mps(), "mul: expected 'out' to be MPS, got ", out.device()); + + const bool scalar_like = (dense.dim() == 0) || (dense.numel() == 1); + TORCH_CHECK(dense.is_mps() || scalar_like, + "mul: expected 'dense' to be MPS or scalar-like, got ", dense.device()); + + const int64_t nnz = sparse._nnz(); + out.resize_as_(sparse); + + auto commonDtype = at::result_type(dense, sparse); + TORCH_CHECK(canCast(commonDtype, out.scalar_type()), + "Can't convert result type ", commonDtype, " to output ", out.scalar_type()); + + auto indices = sparse._indices().contiguous(); + auto values = sparse._values().to(commonDtype).contiguous(); + + if (nnz == 0) { + auto empty_vals = values.narrow(0, 0, 0); + alias_into_sparse(out, + indices.narrow(1, 0, 0), + (out.scalar_type() == commonDtype) ? empty_vals + : empty_vals.to(out.scalar_type())); + out._coalesced_(sparse.is_coalesced()); + return out; + } + + if (scalar_like) { + auto scalar = dense; + if (dense.numel() == 1 && dense.dim() > 0) { + scalar = dense.view({}); + } + scalar = scalar.to(values.options()); + auto out_vals = values.mul(scalar); + if (out.scalar_type() != commonDtype) { + out_vals = out_vals.to(out.scalar_type()); + } + + alias_into_sparse(out, indices, out_vals); + out._coalesced_(sparse.is_coalesced()); + return out; + } + + TORCH_CHECK(dense.sizes().equals(sparse.sizes()), + "mul(dense, sparse): sizes must match exactly (no broadcasting): ", + dense.sizes(), " vs ", sparse.sizes()); + + const int64_t ndim_i = sparse.sparse_dim(); + const int64_t ndim = dense.dim(); + TORCH_CHECK( + ndim_i <= ndim, + "mul(dense, sparse): sparse_dim=", ndim_i, " exceeds dense.dim()=", ndim); + + // Prepare shapes + int64_t view_rows = 1, view_cols = 1; + for (int64_t i = 0; i < ndim_i; ++i) view_rows *= sparse.size(i); + for (int64_t i = ndim_i; i < ndim; ++i) view_cols *= sparse.size(i); + + auto dense_mps = dense.to(commonDtype).contiguous().reshape({view_rows, view_cols}); + auto out_vals = at::empty_like(values, values.options()); + + const uint32_t u_view_cols = static_cast(view_cols); + const uint32_t u_nnz = static_cast(nnz); + const uint32_t u_ndim_i = static_cast(ndim_i); + + auto stream = getCurrentMPSStream(); + dispatch_sync_with_rethrow(stream->queue(), ^() { + @autoreleasepool { + auto pso = lib.getPipelineStateForFunc("dense_sparse_mul_kernel_" + mps::scalarToMetalTypeString(values)); + auto computeEncoder = stream->commandEncoder(); + [computeEncoder setComputePipelineState:pso]; + + const uint32_t gridWidth = u_view_cols; + const uint32_t gridDepth = u_nnz; + MTLSize gridSize = MTLSizeMake(gridWidth, 1, gridDepth); + + const uint32_t maxThreadsPerGroup = pso.maxTotalThreadsPerThreadgroup; + const uint32_t tew = pso.threadExecutionWidth; + uint32_t tgWidth = std::min(gridWidth, tew); + MTLSize threadgroupSize = MTLSizeMake(tgWidth, 1, 1); + + mtl_setArgs( + computeEncoder, + dense_mps, + values, + out_vals, + indices, + sparse.sizes(), + std::array{u_nnz, u_ndim_i, u_view_cols} + ); + + [computeEncoder dispatchThreads:gridSize threadsPerThreadgroup:threadgroupSize]; + } + }); + + Tensor final_vals = out_vals; + if (out.scalar_type() != commonDtype) { + final_vals = final_vals.to(out.scalar_type()); + } + + alias_into_sparse(out, indices, final_vals); + out._coalesced_(sparse.is_coalesced()); + return out; +} + + +SparseTensor& mul_out_sparse_mps(const Tensor& t_, const Tensor& src_, SparseTensor& r_) { + TORCH_CHECK(r_.is_mps(), "mul: expected 'out' to be MPS, but got ", r_.device()); + + // Dense x sparse fallback (keep dense first) + if (!t_.is_sparse() || !src_.is_sparse()) { + const Tensor& dense = t_.is_sparse() ? src_ : t_; + const Tensor& sparse = t_.is_sparse() ? t_ : src_; + return mul_out_dense_sparse_mps(dense, sparse, r_); + } + + TORCH_CHECK(t_.is_mps(), "mul: expected 'self' to be MPS, but got ", t_.device()); + TORCH_CHECK(src_.is_mps(), "mul: expected 'other' to be MPS, but got ", src_.device()); + TORCH_CHECK(t_.sparse_dim() == src_.sparse_dim(), + "mul(sparse, sparse): must have same sparse_dim, got ", + t_.sparse_dim(), " vs ", src_.sparse_dim()); + TORCH_CHECK(t_.sizes().equals(src_.sizes()), + "mul(sparse, sparse): sizes must match exactly (no broadcasting)."); + + // Coalesce and early-exit on structurally empty operands + auto lhs = t_.coalesce(); + auto rhs = src_.coalesce(); + const int64_t lhs_nnz = lhs._nnz(); + const int64_t rhs_nnz = rhs._nnz(); + if (!lhs_nnz || !rhs_nnz) { + r_.resize_as_(lhs); + return r_.zero_(); + } + + // dtype checks and promotion + auto commonDtype = at::result_type(lhs, rhs); + TORCH_CHECK(canCast(commonDtype, r_.scalar_type()), + "Can't convert result type ", commonDtype, " to output ", r_.scalar_type()); + + const int64_t ndim_i = lhs.sparse_dim(); + + // ndim_i == 0, at most one structural entry + if (ndim_i == 0) { + r_.resize_as_(lhs); + const bool has = (lhs_nnz && rhs_nnz); + + auto out_indices = lhs._indices().narrow(1, 0, has ? 1 : 0); + + Tensor lhs_vals = lhs._values().to(commonDtype); + Tensor rhs_vals = rhs._values().to(commonDtype); + lhs_vals = lhs_vals.narrow(0, 0, has ? 1 : 0); + rhs_vals = rhs_vals.narrow(0, 0, has ? 1 : 0); + + Tensor out_values = lhs_vals.mul(rhs_vals); + if (r_.scalar_type() != commonDtype) { + out_values = out_values.to(r_.scalar_type()); + } + + alias_into_sparse(r_, out_indices, out_values); + r_._coalesced_(true); + return r_; + } + + // General path, intersect keys, then gather + multiply on GPU + const auto device = r_.device(); + auto stream = getCurrentMPSStream(); + + auto lhs_indices = lhs._indices(); + auto rhs_indices = rhs._indices(); + auto lhs_values = lhs._values().to(commonDtype); + auto rhs_values = rhs._values().to(commonDtype); + + // Flatten sparse indices to keys + auto lhs_keys = flatten_indices(lhs_indices, lhs.sizes()); + auto rhs_keys = flatten_indices(rhs_indices, rhs.sizes()); + + // Intersect sorted keys (search the shorter in the longer) + const bool A_is_lhs = (lhs_nnz <= rhs_nnz); + const int64_t lenA = A_is_lhs ? lhs_nnz : rhs_nnz; + const int64_t lenB = A_is_lhs ? rhs_nnz : lhs_nnz; + auto A_keys = A_is_lhs ? lhs_keys : rhs_keys; + auto B_keys = A_is_lhs ? rhs_keys : lhs_keys; + + auto outA_idx = at::empty({lenA}, at::device(device).dtype(kLong)); + auto outB_idx = at::empty({lenA}, at::device(device).dtype(kLong)); + auto counter = at::zeros({1}, at::device(device).dtype(kInt)); + + dispatch_sync_with_rethrow(stream->queue(), ^() { + @autoreleasepool { + auto pso = lib.getPipelineStateForFunc("intersect_binary_search"); + auto enc = stream->commandEncoder(); + [enc setComputePipelineState:pso]; + mtl_setArgs(enc, A_keys, B_keys, outA_idx, outB_idx, counter, + static_cast(lenB), A_is_lhs); + mtl_dispatch1DJob(enc, pso, static_cast(lenA)); + } + }); + + const uint32_t M = counter.item(); // number of structural matches + + r_.resize_as_(lhs); + + auto out_indices = at::empty({ndim_i, static_cast(M)}, at::device(device).dtype(at::kLong)); + auto lhs_match = outA_idx.narrow(0, 0, M); + auto rhs_match = outB_idx.narrow(0, 0, M); + auto out_val_sizes = lhs_values.sizes().vec(); + out_val_sizes[0] = static_cast(M); + auto out_values = at::empty(out_val_sizes, lhs_values.options()); + + const uint32_t cols = static_cast( + lhs_values.numel() / std::max(1, lhs_nnz)); + + dispatch_sync_with_rethrow(stream->queue(), ^() { + @autoreleasepool { + auto pso = lib.getPipelineStateForFunc( + "fused_gather_mul_kernel_" + mps::scalarToMetalTypeString(lhs_values)); + auto enc = stream->commandEncoder(); + [enc setComputePipelineState:pso]; + + const uint32_t tew = pso.threadExecutionWidth; + uint32_t tgW = std::min(cols, tew); + MTLSize grid = MTLSizeMake(cols, 1, M); + MTLSize tgs = MTLSizeMake(tgW, 1, 1); + + mtl_setArgs(enc, + lhs_values, rhs_values, + lhs_match, rhs_match, + lhs_indices, out_indices, + out_values, + std::array{static_cast(ndim_i), static_cast(lhs_nnz)}, + std::array{M, cols}); + [enc dispatchThreads:grid threadsPerThreadgroup:tgs]; + } + }); + + if (r_.scalar_type() != commonDtype) { + out_values = out_values.to(r_.scalar_type()); + } + + alias_into_sparse(r_, out_indices, out_values); + r_._coalesced_(true); + return r_; +} -Tensor& add_out_dense_sparse_mps( +static Tensor& add_out_dense_sparse_mps( Tensor& out, const Tensor& dense, const SparseTensor& sparse, diff --git a/aten/src/ATen/native/sparse/mps/kernels/Mul.metal b/aten/src/ATen/native/sparse/mps/kernels/Mul.metal new file mode 100644 index 0000000000000..4a9caa393f94c --- /dev/null +++ b/aten/src/ATen/native/sparse/mps/kernels/Mul.metal @@ -0,0 +1,150 @@ +#include +#include +using namespace metal; + + +template +kernel void dense_sparse_mul_kernel( + device const T* dense [[buffer(0)]], + device const T* values [[buffer(1)]], + device T* out_values [[buffer(2)]], + device const long* indices [[buffer(3)]], + device const long* sizes [[buffer(4)]], + constant uint3& sparse_params [[buffer(5)]], + uint3 gid [[thread_position_in_grid]]) +{ + uint col = gid.x; + uint i = gid.z; + uint nnz = sparse_params.x; + uint ndim_i = sparse_params.y; + uint view_cols = sparse_params.z; + + long key = 0; + for (uint d = 0; d < ndim_i; ++d) { + long idx_d = indices[(ulong)d * (ulong)nnz + (ulong)i]; + const auto sz_d = sizes[d]; + key = key * sz_d + idx_d; + } + + ulong dense_idx = (ulong)key * (ulong)view_cols + (ulong)col; + ulong val_idx = (ulong)i * (ulong)view_cols + (ulong)col; + + const auto a = static_cast(values[val_idx]); + const auto b = static_cast(dense[dense_idx]); + out_values[val_idx] = static_cast(a * b); +} + +kernel void intersect_binary_search( + device const long* keysA [[buffer(0)]], + device const long* keysB [[buffer(1)]], + device long* outA_idx [[buffer(2)]], + device long* outB_idx [[buffer(3)]], + device atomic_uint* counter [[buffer(4)]], + constant uint& lenB [[buffer(5)]], + constant bool& A_is_lhs [[buffer(6)]], + uint3 tid_in_grid [[thread_position_in_grid]]) +{ + uint gid = tid_in_grid.x; + + long key = keysA[gid]; + + // lower_bound in B + uint lo = 0; + uint hi = lenB; + while (lo < hi) { + uint mid = (lo + hi) >> 1; + long v = keysB[mid]; + if (v < key) lo = mid + 1; + else hi = mid; + } + + if (lo < lenB && keysB[lo] == key) { + uint pos = atomic_fetch_add_explicit(counter, 1u, memory_order_relaxed); + if (A_is_lhs) { + outA_idx[pos] = (long)gid; + outB_idx[pos] = (long)lo; + } else { + outA_idx[pos] = (long)lo; + outB_idx[pos] = (long)gid; + } + } +} + + +template +kernel void fused_gather_mul_kernel( + device const T* lhs_vals [[buffer(0)]], + device const T* rhs_vals [[buffer(1)]], + device const long* lhs_sel [[buffer(2)]], + device const long* rhs_sel [[buffer(3)]], + device const long* lhs_indices [[buffer(4)]], + device long* out_indices [[buffer(5)]], + device T* out_vals [[buffer(6)]], + constant uint2& dims_input [[buffer(7)]], + constant uint2& dims_output [[buffer(8)]], + uint3 gid [[thread_position_in_grid]]) +{ + const uint col = gid.x; + const uint k = gid.z; + const uint n_dim_i = dims_input.x; + const uint L = dims_input.y; + const uint M = dims_output.x; + const uint view_cols = dims_output.y; + + const long iL = lhs_sel[k]; + const long iR = rhs_sel[k]; + + if (col < view_cols) { + const ulong offL = (ulong)iL * (ulong)view_cols + (ulong)col; + const ulong offR = (ulong)iR * (ulong)view_cols + (ulong)col; + const ulong offO = (ulong)k * (ulong)view_cols + (ulong)col; + + const float a = (float)lhs_vals[offL]; + const float b = (float)rhs_vals[offR]; + out_vals[offO] = (T)(a * b); + } + + // One thread per match copies the indices column + if (col == 0) { + const ulong uL = (ulong)L; + const ulong uM = (ulong)M; + const ulong src_col = (ulong)iL; // gather from lhs + for (uint d = 0; d < n_dim_i; ++d) { + const long v = lhs_indices[(ulong)d * uL + src_col]; + out_indices[(ulong)d * uM + (ulong)k] = v; + } + } +} + +#define INSTANTIATE_DENSE_SPARSE_MUL(DTYPE) \ + template [[host_name("dense_sparse_mul_kernel_" #DTYPE)]] kernel void \ + dense_sparse_mul_kernel( \ + device const DTYPE* dense [[buffer(0)]], \ + device const DTYPE* values [[buffer(1)]], \ + device DTYPE* out_values [[buffer(2)]], \ + device const long* indices [[buffer(3)]], \ + device const long* sizes [[buffer(4)]], \ + constant uint3& sparse_params [[buffer(5)]], \ + uint3 gid [[thread_position_in_grid]]); + +INSTANTIATE_DENSE_SPARSE_MUL(float); +INSTANTIATE_DENSE_SPARSE_MUL(half); +INSTANTIATE_DENSE_SPARSE_MUL(bfloat); + +#define INSTANTIATE_FUSED_GATHER_MUL(DTYPE) \ + template [[host_name("fused_gather_mul_kernel_" #DTYPE)]] kernel void \ + fused_gather_mul_kernel( \ + device const DTYPE* lhs_vals [[buffer(0)]], \ + device const DTYPE* rhs_vals [[buffer(1)]], \ + device const long* lhs_sel [[buffer(2)]], \ + device const long* rhs_sel [[buffer(3)]], \ + device const long* lhs_indices [[buffer(4)]], \ + device long* out_indices [[buffer(5)]], \ + device DTYPE* out_vals [[buffer(6)]], \ + constant uint2& dims_input [[buffer(7)]], \ + constant uint2& dims_output [[buffer(8)]], \ + uint3 gid [[thread_position_in_grid]]); + +INSTANTIATE_FUSED_GATHER_MUL(float); +INSTANTIATE_FUSED_GATHER_MUL(half); +INSTANTIATE_FUSED_GATHER_MUL(bfloat); \ No newline at end of file diff --git a/test/test_sparse.py b/test/test_sparse.py index 3568c6bc7d194..d01a51a6a0867 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -1108,8 +1108,8 @@ def test_shape(sparse_dims, nnz, sizes): test_shape(2, 20, [3, 17, 19, 5]) test_shape(2, 20, [3, 17, 19, 0]) - @expectedFailureMPS @dtypes(torch.double, torch.cdouble) + @dtypesIfMPS(torch.float32, torch.complex64) def test_add_sub_nnz(self, device, dtype): # nnz should not grow unbounded (gh-34964) x = torch.randn(10, dtype=dtype, device=device).to_sparse() @@ -1687,24 +1687,24 @@ def fn(S, D): test_shape(7, 8, 9, 20, True) @coalescedonoff - @expectedFailureMPS @dtypes(torch.double) + @dtypesIfMPS(torch.float32) @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error") @gradcheck_semantics() def test_sparse_mul(self, device, dtype, coalesced, gradcheck): # https://github.com/pytorch/pytorch/issues/79914 a = torch.tensor([[0., 1]], dtype=dtype, device=device).to_sparse().requires_grad_(True) b = torch.tensor([[0., 1]], dtype=dtype, device=device).to_sparse().requires_grad_(True) - gradcheck(lambda x, y: torch.sparse.sum(x * y).to_dense(masked_grad=gradcheck.masked), [a, b]) + gradcheck(lambda x, y: torch.sparse.sum(x * y).to_dense(masked_grad=gradcheck.masked), [a, b], eps=1e-4) def test_shape(sparse_dims, nnz, with_shape): a = self._gen_sparse(sparse_dims, nnz, with_shape, dtype, device, coalesced)[0].requires_grad_(True) b = self._gen_sparse(sparse_dims, nnz, with_shape, dtype, device, coalesced)[0].requires_grad_(True) self.assertEqual((a * b).to_dense(), a.to_dense() * b.to_dense(), masked=True) - gradcheck(lambda x, y: (x * y).to_dense(), [a, b]) + gradcheck(lambda x, y: (x * y).to_dense(), [a, b], eps=1e-4) # Issues with 0-dim indices/values - gradcheck(lambda x, y: torch.sparse.sum(x * y).to_dense(), [a, b], masked=True) + gradcheck(lambda x, y: torch.sparse.sum(x * y).to_dense(), [a, b], masked=True, eps=1e-4) # TODO: Re-enable these # test_shape(2, 3, [2, 3, 4, 5]) @@ -1933,7 +1933,6 @@ def fn(S): S = self._gen_sparse(sparse_dims, nnz, with_size, dtype, device, coalesced)[0] run_tests(S.requires_grad_(True), test_dim) - @expectedFailureMPS def _test_basic_ops_shape(self, nnz_x1, nnz_x2, shape_i, shape_v, dtype, device, coalesced): shape = shape_i + (shape_v) x1, _, _ = self._gen_sparse(len(shape_i), nnz_x1, shape, dtype, device, coalesced) @@ -2011,6 +2010,7 @@ def _test_basic_ops_shape(self, nnz_x1, nnz_x2, shape_i, shape_v, dtype, device, @coalescedonoff @dtypes(torch.double) + @dtypesIfMPS(torch.float32) def test_basic_ops(self, device, dtype, coalesced): def _test_basic_ops(): @@ -3328,8 +3328,8 @@ def test_isnan(self, device): self.assertEqual(torch.isnan(t).int(), t_nan.int()) @coalescedonoff - @expectedFailureMPS @dtypes(torch.float32, torch.float64) + @dtypesIfMPS(torch.float16, torch.float32) def test_div_rounding_mode(self, device, dtype, coalesced): sparse, _, _ = self._gen_sparse(2, 10, (10, 10), dtype, device, coalesced) @@ -3349,13 +3349,11 @@ def test_div_rounding_mode(self, device, dtype, coalesced): torch.div(sparse, -2, rounding_mode=mode, out=actual) self.assertEqual(self.safeToDense(actual), expect) - @expectedFailureMPS def test_div_by_sparse_error(self, device): self.assertRaisesRegex(RuntimeError, 'Sparse division requires', lambda: torch.tensor(1., device=device).to_sparse() / torch.tensor(1., device=device).to_sparse()) - @expectedFailureMPS def test_floor_divide_by_sparse_error(self, device): self.assertRaisesRegex(RuntimeError, 'Sparse floor division requires', lambda: torch.tensor(1., device=device).to_sparse() @@ -4136,8 +4134,8 @@ def test_small_nnz_coalesced(self): self.assertFalse(torch.sparse_coo_tensor([[0, 1], [0, 1]], [1, 2], (2, 2)).is_coalesced()) @coalescedonoff - @expectedFailureMPS @dtypes(*all_types_and_complex_and(torch.bool)) + @dtypesIfMPS(*all_mps_types()) def test_sum(self, device, dtype, coalesced): def run_test(shape, nnz): a = self._gen_sparse(2, nnz, shape, dtype, device, coalesced)[0] From 9cac1b92595ec7836101d51dbe1415081042c7a0 Mon Sep 17 00:00:00 2001 From: Arijit Mukhopadhyay Date: Thu, 11 Sep 2025 18:41:29 +0000 Subject: [PATCH 114/693] AMD CPU CI - Add freezing + fix label trigger (#162176) Added the following changes: 1. Added freezing by default for AMD CPU based CI 2. Fixed issue with label based CI triggers Addresses code review comment in https://github.com/pytorch/pytorch/pull/161155 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162176 Approved by: https://github.com/malfet, https://github.com/jeffdaily --- .../workflows/inductor-perf-test-nightly-x86-zen.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml index 170de752ab875..d6a1c95ad4007 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml @@ -43,6 +43,11 @@ on: required: false type: boolean default: false + freezing: + description: Run freezing? + required: false + type: boolean + default: true benchmark_configs: description: The list of configs used the benchmark required: false @@ -102,7 +107,7 @@ jobs: if: github.event.schedule == '0 7 * * *' with: build-environment: linux-jammy-py3.9-gcc11-build - dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true + dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} timeout-minutes: 720 @@ -116,10 +121,9 @@ jobs: name: inductor-test uses: ./.github/workflows/_linux-test.yml needs: inductor-build - if: github.event_name == 'workflow_dispatch' with: build-environment: linux-jammy-py3.9-gcc11-build - dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }} + dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} timeout-minutes: 720 From ac72f81c120c4bf8f0440974a26f4813e8dcfacd Mon Sep 17 00:00:00 2001 From: Pian Pawakapan Date: Thu, 11 Sep 2025 18:51:22 +0000 Subject: [PATCH 115/693] [dynamic shapes] unbacked-safe should_swap (#160473) Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/160473 Approved by: https://github.com/laithsakka --- test/dynamo/test_misc.py | 63 ++++++++++++++++++++++++++++++ torch/_inductor/decomposition.py | 2 +- torch/_meta_registrations.py | 2 +- torch/_prims/__init__.py | 2 +- torch/_prims_common/__init__.py | 66 +++++++++++++++++--------------- torch/_refs/__init__.py | 2 +- torch/_tensor.py | 15 ++++++-- 7 files changed, 114 insertions(+), 38 deletions(-) diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py index 0a85aac2bf318..3f08da0825468 100644 --- a/test/dynamo/test_misc.py +++ b/test/dynamo/test_misc.py @@ -9604,6 +9604,69 @@ def f(x, i): f(torch.randn(9, requires_grad=True), torch.tensor([3, 6])) + @torch._dynamo.config.patch(capture_scalar_outputs=True) + def test_dim_order(self): + @torch.compile(dynamic=False, fullgraph=True, backend="eager") + def f(x): + x = x.permute(3, 0, 2, 1) + return x, x.dim_order() + + @torch.compile(dynamic=False, fullgraph=True, backend="eager") + def g(x): + return x.dim_order() + + @torch.compile(dynamic=False, fullgraph=True, backend="eager") + def h0(xs, ambiguity_check=False): + u0, u1, u2 = xs.tolist() + torch._check(u2 >= u0) + torch._check(u1 >= u0) + # stride ordering still isn't unique here, should raise + y = torch.empty_strided([4, 4, 4], [u0, u1, u2]) + return y.dim_order(ambiguity_check=ambiguity_check) + + @torch.compile(dynamic=False, fullgraph=True, backend="eager") + def h1(xs, ambiguity_check=False): + u0, u1, u2 = xs.tolist() + y = torch.empty_strided([4, 4, 4], [u0, u0, u0]) # no ordering + return y.dim_order(ambiguity_check=ambiguity_check) + + # check that for functions permuting contiguous input, the original stride is recovered with dim_order. + def test(x): + stride_inp = tuple(x.stride()) + f_out, f_order = f(x) + self.assertEqual(stride_inp, tuple(f_out.stride(i) for i in f_order)) + + # shape: [4, u0, 5, u1] + x0 = torch.randn(4, 1, 5, 2) + torch._dynamo.decorators.mark_unbacked(x0, 1) + torch._dynamo.decorators.mark_unbacked(x0, 3) + test(x0) + + # shape: [u0, u1, u2, u3] + x1 = torch.randn(4, 1, 5, 2) + for i in range(x1.ndim): + torch._dynamo.decorators.mark_unbacked(x1, i) + test(x1) + + # custom strides (all integers) + x2 = torch.randn(10000) + x2 = x2.as_strided([4, 4, 4, 4], [1, 2, 4, 8]) + assert g(x2) == (3, 2, 1, 0) + + # custom unbacked strides with no ordering: ambiguity check should raise + xs = torch.tensor([2, 3, 4]) + h0(xs) + with self.assertRaisesRegex( + torch._dynamo.exc.TorchRuntimeError, + r"The tensor does not have unique dim order.", + ): + h0(xs, ambiguity_check=True) + with self.assertRaisesRegex( + torch._dynamo.exc.TorchRuntimeError, + r"The tensor does not have unique dim order.", + ): + h1(xs, ambiguity_check=True) + def test_str_format_assert1(self): @torch.compile(backend="eager", fullgraph=True) def fn(img): diff --git a/torch/_inductor/decomposition.py b/torch/_inductor/decomposition.py index eebe6c974e173..3c970b66cd9e5 100644 --- a/torch/_inductor/decomposition.py +++ b/torch/_inductor/decomposition.py @@ -579,7 +579,7 @@ def view_copy_dtype( def _get_shape_permutation_like( self: torch.Tensor, ) -> tuple[utils.ShapeType, utils.StrideType]: - physical_layout = utils.compute_elementwise_output_logical_to_physical_perm(self) + physical_layout, _ = utils.compute_elementwise_output_logical_to_physical_perm(self) shape = [self.shape[l] for l in physical_layout] permutation = [0] * len(shape) diff --git a/torch/_meta_registrations.py b/torch/_meta_registrations.py index 7a0301371b119..8b8c2c16b9c77 100644 --- a/torch/_meta_registrations.py +++ b/torch/_meta_registrations.py @@ -3465,7 +3465,7 @@ def _restride_src(self): # Note that perm here is the reverse of the 'perm_' decided by # TensorIteratorBase::reorder_dimensions restrided_self = _restride_src(self) - perm = utils.compute_elementwise_output_logical_to_physical_perm(restrided_self) + perm, _ = utils.compute_elementwise_output_logical_to_physical_perm(restrided_self) # Follow TensorIteratorBase::allocate_or_resize_outputs if list(perm) != list(range(len(perm))): diff --git a/torch/_prims/__init__.py b/torch/_prims/__init__.py index 34f77a2aed2e8..5fef517dc59fd 100644 --- a/torch/_prims/__init__.py +++ b/torch/_prims/__init__.py @@ -404,7 +404,7 @@ def _prim_elementwise_meta( utils.check_same_device(*args_, allow_cpu_scalar_tensors=True) utils.check_same_shape(*args_, allow_cpu_scalar_tensors=True) - l2p_perm = utils.compute_elementwise_output_logical_to_physical_perm(*args_) + l2p_perm, _ = utils.compute_elementwise_output_logical_to_physical_perm(*args_) shape = utils.extract_shape(*args_, allow_cpu_scalar_tensors=True) # Acquires the dtype diff --git a/torch/_prims_common/__init__.py b/torch/_prims_common/__init__.py index 91b0cc1f68d47..67238f05b23df 100644 --- a/torch/_prims_common/__init__.py +++ b/torch/_prims_common/__init__.py @@ -534,12 +534,9 @@ def is_non_overlapping_and_dense(a: Tensor) -> bool: # This is also INCORRECT because it does not model TensorIterator's # short-circuit, which can cause different strides. def compute_elementwise_output_logical_to_physical_perm( - *tensors, _skip_checks=False -) -> list[int]: - from torch.fx.experimental.symbolic_shapes import ( - guard_or_false, - guard_size_oblivious, - ) + *tensors, _skip_checks=False, ambiguity_check=False +) -> tuple[list[int], bool]: + from torch.fx.experimental.symbolic_shapes import guard_or_false if not _skip_checks and len(tensors) == 0: msg = "Can't compute elementwise output strides for zero tensors!" @@ -558,15 +555,15 @@ def compute_elementwise_output_logical_to_physical_perm( # Short-circuits for CPU scalar case if len(tensors) == 0: - return [] + return [], False # Short-circuits for shapes with zero or one dimensions # TODO: are these necessary? ndim = tensors[0].ndim if ndim == 0: - return [] + return [], False if ndim == 1: - return [0] + return [0], False # Short-circuits if contiguous or channels last, following the fake fast path. # This reduces the number of guards we end up making @@ -584,42 +581,40 @@ def compute_elementwise_output_logical_to_physical_perm( ) if is_contiguous and not is_channels_last: - return list(range(ndim)) + return list(range(ndim)), False if is_channels_last and not is_contiguous: - return [0, *list(range(2, ndim)), 1] + return [0, *list(range(2, ndim)), 1], False shape = tensors[0].shape def should_swap(idx_a, idx_b): + def ge(a, b): + """ + Returns true if a is symbolically greater than or equal to b, assuming a >= 0, b >= 0. + """ + if guard_or_false(b == 0): + return True + elif guard_or_false(a == 0): + return False + return guard_or_false(a >= b) or guard_or_false(a % b == 0) + for tensor in tensors: stride_a = tensor.stride()[idx_a] stride_b = tensor.stride()[idx_b] - if guard_size_oblivious(stride_a == 0) or guard_size_oblivious( - stride_b == 0 - ): + + if guard_or_false(stride_a == 0) or guard_or_false(stride_b == 0): continue if guard_or_false(stride_a == stride_b): - if guard_size_oblivious(shape[idx_a] > shape[idx_b]): - return 1 - - # when stride_a = 1, we want stride_a < stride_b to be TRUE - # when stride_b = 1, we want stride_a < stride_b to be FALSE - elif guard_or_false(stride_a == 1): - return -1 - - elif guard_or_false(stride_b == 1): + if ge(shape[idx_b], shape[idx_a]): + continue return 1 - if guard_size_oblivious(stride_a < stride_b): + if ge(stride_b, stride_a): return -1 - if guard_size_oblivious(stride_a > stride_b): - return 1 - - # stride_a == stride_b - if guard_size_oblivious(shape[idx_a] > shape[idx_b]): + if ge(stride_a, stride_b): return 1 # Note: this case is hit if all strides are zero, @@ -644,7 +639,16 @@ def should_swap(idx_a, idx_b): elif comparison < 0: break - return list(reversed(perm)) + # verify we've imposed ordering if ambiguity_check=True + raise_ambiguous = False + if ambiguity_check: + for i, j in zip(range(ndim - 1), range(1, ndim)): + order = should_swap(perm[i], perm[j]) + if order != -1: + raise_ambiguous = True + break + + return list(reversed(perm)), raise_ambiguous def compute_elementwise_output_strides(*tensors) -> tuple[int, ...]: @@ -674,7 +678,7 @@ def compute_elementwise_output_strides(*tensors) -> tuple[int, ...]: if ndim == 1: return (1,) - logical_to_physical_perm = compute_elementwise_output_logical_to_physical_perm( + logical_to_physical_perm, _ = compute_elementwise_output_logical_to_physical_perm( *tensors, _skip_checks=True ) permuted_shape = apply_perm(shape, logical_to_physical_perm) # to physical diff --git a/torch/_refs/__init__.py b/torch/_refs/__init__.py index 8a418f349de6b..18455b5194150 100644 --- a/torch/_refs/__init__.py +++ b/torch/_refs/__init__.py @@ -5111,7 +5111,7 @@ def empty_like( ) # memory_format == torch.preserve_format - logical_to_physical_perm = ( + logical_to_physical_perm, _ = ( utils.compute_elementwise_output_logical_to_physical_perm(a) ) # identity perm is [2, 1, 0] diff --git a/torch/_tensor.py b/torch/_tensor.py index 6cebed28b8b0d..bb6c2b0466843 100644 --- a/torch/_tensor.py +++ b/torch/_tensor.py @@ -1585,17 +1585,19 @@ def has_multiple_dim_order(tensor): If any two dimensions have the same stride, swapping these dimensions won't change how data is accessed, leading to multiple correct dimension orders. """ + from torch.fx.experimental.symbolic_shapes import guard_or_false sizes = tensor.size() strides = tensor.stride() # Check if there are any duplicate strides has_duplicate_strides = any( - earlier == later for earlier, later in zip(strides, strides[1:]) + guard_or_false(earlier == later) + for earlier, later in zip(strides, strides[1:]) ) # Check if there are any singleton dimensions - has_singleton_dims = any(size == 1 for size in sizes) + has_singleton_dims = any(guard_or_false(size == 1) for size in sizes) return has_duplicate_strides or has_singleton_dims @@ -1615,7 +1617,14 @@ def has_multiple_dim_order(tensor): import torch._prims_common as utils - return tuple(utils.compute_elementwise_output_logical_to_physical_perm(self)) + out_perm, raise_ambiguity = ( + utils.compute_elementwise_output_logical_to_physical_perm( + self, ambiguity_check=ambiguity_check + ) + ) + if raise_ambiguity: + raise RuntimeError("The tensor does not have unique dim order.") + return tuple(out_perm) def _update_names(self, names, inplace): if has_torch_function_unary(self): From d65ffdef3d39173637fcf2f2557abed341d0e3df Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Thu, 11 Sep 2025 19:37:48 +0000 Subject: [PATCH 116/693] [ROCm] fix miopen batchnorm changing output format (#162112) It was found that the integration of miopen batchnorm was causing the output to always be in default contig memory format even when the input was channels last. This also unskips a number of related unit tests. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162112 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily Co-authored-by: Dmitry Nikolaev Co-authored-by: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> --- aten/src/ATen/native/Normalization.cpp | 4 +- .../ATen/native/miopen/BatchNorm_miopen.cpp | 34 ++++++++--------- test/functorch/test_ops.py | 14 ------- test/nn/test_convolution.py | 29 +++++--------- test/test_nn.py | 38 +++++++++++++------ tools/autograd/derivatives.yaml | 2 +- 6 files changed, 56 insertions(+), 65 deletions(-) diff --git a/aten/src/ATen/native/Normalization.cpp b/aten/src/ATen/native/Normalization.cpp index 7327bf2d7e30b..13b421d1e6888 100644 --- a/aten/src/ATen/native/Normalization.cpp +++ b/aten/src/ATen/native/Normalization.cpp @@ -624,7 +624,9 @@ std::tuple _batch_norm_impl_index( if (backend == BatchNormBackend::Miopen) { return std::tuple_cat( at::miopen_batch_norm( - input.contiguous(), weight.contiguous(), bias.contiguous(), + input.contiguous(input.suggest_memory_format()), + weight.contiguous(), + bias.contiguous(), running_mean.defined() ? running_mean.contiguous() : running_mean, running_var.defined() ? running_var.contiguous() : running_var, training, momentum, eps), diff --git a/aten/src/ATen/native/miopen/BatchNorm_miopen.cpp b/aten/src/ATen/native/miopen/BatchNorm_miopen.cpp index af69dfc76e571..0c122c9e13d4d 100644 --- a/aten/src/ATen/native/miopen/BatchNorm_miopen.cpp +++ b/aten/src/ATen/native/miopen/BatchNorm_miopen.cpp @@ -7,6 +7,7 @@ #include #else #include +#include #include #include #endif @@ -102,7 +103,7 @@ std::tuple miopen_batch_norm( mode = miopenBNSpatial; } - auto output_t = at::empty(input->sizes(), input->options()); + auto output_t = at::empty_like(input_t, input_t.options(), input_t.suggest_memory_format()); TensorArg output{ output_t, "output", 0 }; auto handle = getMiopenHandle(); @@ -170,20 +171,15 @@ std::tuple miopen_batch_norm_backward( const std::optional& save_var_t_opt, double epsilon) { // See [Note: hacky wrapper removal for optional tensor] - const Tensor& running_mean = - running_mean_opt.value_or(Tensor()); - const Tensor& running_var = - running_var_opt.value_or(Tensor()); - const Tensor& save_mean_t = - save_mean_t_opt.value_or(Tensor()); - const Tensor& save_var_t = - save_var_t_opt.value_or(Tensor()); - - TensorArg input{ input_t, "input", 1 }, - grad_output{ grad_output_t, "grad_output", 2 }, - weight{ weight_t, "weight", 3 }, - save_mean{ save_mean_t, "save_mean", 4 }, - save_var{ save_var_t, "save_var", 5 }; + const Tensor& save_mean_t = save_mean_t_opt.value_or(Tensor()); + const Tensor& save_var_t = save_var_t_opt.value_or(Tensor()); + + auto grad_output_contig = + grad_output_t.contiguous(input_t.suggest_memory_format()); + TensorArg input{input_t, "input", 1}, + grad_output{grad_output_contig, "grad_output", 2}, + weight{weight_t, "weight", 3}, save_mean{save_mean_t, "save_mean", 4}, + save_var{save_var_t, "save_var", 5}; CheckedFrom c = "miopen_batch_norm_backward"; checkAllDefined(c, {input, grad_output, weight, save_mean, save_var}); @@ -195,7 +191,11 @@ std::tuple miopen_batch_norm_backward( } checkAllSameType(c, {input, grad_output}); checkAllSameType(c, {weight, save_mean, save_var}); - checkAllContiguous(c, {input, grad_output, save_mean, save_var}); + // TODO: is weight required to be contiguous? + checkAllContiguous(c, {save_mean, save_var}); + // TODO: TensorArg check should start handle memory format + TORCH_CHECK(input->is_contiguous(input->suggest_memory_format())); + TORCH_CHECK(grad_output->is_contiguous(input->suggest_memory_format())); checkDimRange(c, input, 2, 6 /* exclusive */); checkSameSize(c, input, grad_output); auto num_features = input->size(1); @@ -210,7 +210,7 @@ std::tuple miopen_batch_norm_backward( mode = miopenBNSpatial; } - auto grad_input_t = at::empty(input->sizes(), input->options()); + auto grad_input_t = at::empty(input->sizes(), input->options(), input->suggest_memory_format()); auto grad_weight_t = at::empty(weight->sizes(), weight->options()); auto grad_bias_t = at::empty(weight->sizes(), weight->options()); diff --git a/test/functorch/test_ops.py b/test/functorch/test_ops.py index 78e64278cb1e2..a2c88f7c35a13 100644 --- a/test/functorch/test_ops.py +++ b/test/functorch/test_ops.py @@ -468,13 +468,6 @@ class TestOperators(TestCase): ), # Works on ROCm xfail("torch.ops.aten._flash_attention_forward"), xfail("torch.ops.aten._efficient_attention_forward"), - # RuntimeError: Expected contiguous tensor, but got - # non-contiguous tensor for argument #2 'grad_output' - decorate( - "_batch_norm_with_update", - decorator=expectedFailureIf(TEST_WITH_ROCM), - device_type="cuda", - ), } ), ) @@ -2400,13 +2393,6 @@ def fn(input, weight, bias): skip("sparse.sampled_addmm", ""), skip("sparse.mm", "reduce"), skip("native_layer_norm", "", device_type="cpu"), - # RuntimeError: Expected contiguous tensor, but got - # non-contiguous tensor for argument #2 'grad_output' - decorate( - "_batch_norm_with_update", - decorator=expectedFailureIf(TEST_WITH_ROCM), - device_type="cuda", - ), }, ) @opsToleranceOverride( diff --git a/test/nn/test_convolution.py b/test/nn/test_convolution.py index abc3cffe3d4df..81a9cf1ae5bea 100644 --- a/test/nn/test_convolution.py +++ b/test/nn/test_convolution.py @@ -30,7 +30,6 @@ skipCUDAIfMiopen, skipCUDAIfNoCudnn, skipCUDAIfNoMiopen, - skipCUDAIfNotMiopenSuggestNHWC, skipCUDAIfRocm, skipMeta, skipMPS, @@ -51,8 +50,6 @@ parametrize as parametrize_test, run_tests, set_default_dtype, - skipIfNotMiopenSuggestNHWC, - skipIfRocmVersionLessThan, subtest, TEST_SCIPY, TEST_WITH_ROCM, @@ -64,6 +61,7 @@ if TEST_WITH_ROCM: os.environ["PYTORCH_MIOPEN_SUGGEST_NHWC"] = "1" + os.environ["PYTORCH_MIOPEN_SUGGEST_NHWC_BATCHNORM"] = "1" if TEST_SCIPY: @@ -715,7 +713,6 @@ def test_ConvTranspose2d_half_cublas_gemm(self): # Almost identical to the above `test_Conv2d_naive_groups` @torch.backends.cudnn.flags(enabled=True, deterministic=True, benchmark=False) @tf32_on_and_off(0.001) - @unittest.skipIf(TEST_WITH_ROCM, "Skipped on ROCm, since it is failing on ROCm 5.7") def test_Conv2d_groups_nobias(self): dev_dtypes = [("cpu", torch.float)] if TEST_CUDA: @@ -761,7 +758,6 @@ def test_Conv2d_groups_nobias(self): # and https://github.com/pytorch/pytorch/pull/18463#issuecomment-477001024 @torch.backends.cudnn.flags(enabled=True, deterministic=True, benchmark=False) @tf32_on_and_off(0.001) - @unittest.skipIf(TEST_WITH_ROCM, "Skipped on ROCm, since it is failing on ROCm 5.7") def test_Conv2d_groups_nobias_v2(self): torch.manual_seed(123) dev_dtypes = [("cpu", torch.float)] @@ -896,7 +892,6 @@ def test_conv_tbc(self): @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") @unittest.skipIf(not TEST_CUDNN, "needs cudnn") - @skipIfNotMiopenSuggestNHWC def test_grouped_conv_cudnn_nhwc_support(self): # in order to catch the hols in grouped convolution in nhwc support for earlier cudnn version input = torch.randn((16, 16, 8, 8), dtype=torch.float16, device="cuda").to( @@ -3146,7 +3141,6 @@ def test_conv_noncontig_weights_and_bias(self, device): @onlyCUDA @largeTensorTest("12GB") - @skipIfRocmVersionLessThan((6, 0)) def test_conv_transposed_large(self, device): dtype = torch.half if self.device_type == "cuda" else torch.float conv = nn.ConvTranspose2d(1, 1, 1, 1, bias=False).to(device).to(dtype) @@ -3190,7 +3184,6 @@ def test_conv_transposed_large(self, device): self.assertEqual(maxdiff3, 0) @onlyCUDA - @skipCUDAIfRocm @largeTensorTest("12GB") def test_conv_large(self, device): dtype = torch.half if self.device_type == "cuda" else torch.float @@ -3223,7 +3216,6 @@ def test_conv_large(self, device): self.assertEqual(grad1, grad2, atol=5e-2, rtol=5e-3) @onlyCUDA - @skipCUDAIfRocm @largeTensorTest("20GB", "cpu") @largeTensorTest("60GB", "cuda") def test_conv_large_batch_1(self, device): @@ -3360,7 +3352,6 @@ def test_ConvTranspose3d_size_1_kernel(self, device): @dtypes(torch.float) @torch.backends.cudnn.flags(enabled=True, deterministic=True, benchmark=False) @tf32_on_and_off(0.001) - @unittest.skipIf(TEST_WITH_ROCM, "Skipped on ROCm, since it is failing on ROCm 5.7") def test_Conv2d_naive_groups(self, device, dtype): # Check that grouped convolutions matches two half convolutions m = nn.Conv2d(4, 4, kernel_size=3, groups=2).to(device, dtype) @@ -3629,19 +3620,21 @@ def helper( ) @onlyCUDA - @skipCUDAIfNotMiopenSuggestNHWC @dtypes(torch.half, torch.float, torch.cfloat) def test_conv_cudnn_nhwc(self, device, dtype): def helper(n, c, h, w, out_channels, kernel_size, groups): - input = torch.randint(-3, 3, (n, c, h, w), dtype=dtype, device=device).to( - memory_format=torch.channels_last - ) + # randint with dtype=torch.cfloat fails with + # RuntimeError: check_random_bounds handles only integral, floating-point and boolean types + # must create randint and randint_like using default int64, then cast to desired + input = torch.randint( + -3, 3, (n, c, h, w), dtype=torch.int64, device=device + ).to(dtype, memory_format=torch.channels_last) input.requires_grad_() conv = nn.Conv2d(c, out_channels, kernel_size, groups=groups).to( device="cuda", dtype=dtype, memory_format=torch.channels_last ) for p in conv.parameters(): - p.data = torch.randint_like(p, -3, 3) + p.data = torch.randint_like(p, -3, 3, dtype=torch.int64).to(p.dtype) # use FP64 channels-first conv as reference ref_input = input.detach().clone().contiguous().double().requires_grad_() @@ -3655,7 +3648,7 @@ def helper(n, c, h, w, out_channels, kernel_size, groups): out = conv(input) ref_out = ref_conv(ref_input) - grad = torch.randint_like(out, -3, 3) + grad = torch.randint_like(out, -3, 3, dtype=torch.int64).to(out.dtype) ref_grad = grad.detach().clone().double().contiguous() out.backward(grad) @@ -3682,7 +3675,6 @@ def helper(n, c, h, w, out_channels, kernel_size, groups): helper(1, 16, 56, 56, out_channels=16, kernel_size=3, groups=16) @onlyCUDA - @skipCUDAIfRocm @dtypes(torch.half, torch.float) def test_conv_cudnn_ndhwc(self, device, dtype): def helper(n, c, d, h, w, out_channels, kernel_size, groups): @@ -3812,7 +3804,6 @@ def _test_conv_cudnn_nhwc_nchw(self, layer, n, c, h, w, k, filter_size, device): ) @onlyCUDA - @skipCUDAIfNotMiopenSuggestNHWC @tf32_on_and_off(0.05) def test_conv_cudnn_mismatch_memory_format(self, device): configs = [ @@ -3945,7 +3936,6 @@ def test_cudnn_convolution_add_relu(self, device, dtype): self.assertEqual(F.relu(conv2d_out + alpha * z), cudnn_out) @onlyCUDA - @skipCUDAIfRocm def test_convert_conv2d_weight_memory_format(self, device): input = torch.randint(1, 10, (2, 8, 4, 4), dtype=torch.float32, device=device) model = nn.Sequential(nn.Conv2d(8, 4, 3), nn.BatchNorm2d(4)).to(device).float() @@ -3965,7 +3955,6 @@ def test_convert_conv2d_weight_memory_format(self, device): self.assertTrue(out.is_contiguous(memory_format=memory_format)) @onlyCUDA - @skipCUDAIfRocm def test_convert_conv3d_weight_memory_format(self, device): input = torch.randint( 1, 10, (2, 8, 4, 4, 4), dtype=torch.float32, device=device diff --git a/test/test_nn.py b/test/test_nn.py index 13ee5c2e2a420..33d5763c64b76 100644 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -62,6 +62,7 @@ if TEST_WITH_ROCM: os.environ["PYTORCH_MIOPEN_SUGGEST_NHWC"] = "1" + os.environ["PYTORCH_MIOPEN_SUGGEST_NHWC_BATCHNORM"] = "1" # load_tests from common_utils is used to automatically filter tests for # sharding on sandcastle. This line silences flake warnings @@ -3514,7 +3515,6 @@ def test_cudnn_forward_exception(self): self.assertRaisesRegex(RuntimeError, re.escape("input.size(-1) must be equal to input_size"), rnn, x_wrong) @unittest.skipIf(not TEST_CUDNN, 'CUDNN not available') - @skipIfRocm def test_cudnn_weight_format(self): rnns = [ nn.LSTM(10, 20, batch_first=True), @@ -3522,7 +3522,8 @@ def test_cudnn_weight_format(self): nn.GRU(10, 20, batch_first=True), nn.RNN(10, 20, batch_first=True) ] - first_warn = True + # ROCm RNN does not issue warning about single contig chunk of memory, so don't assert it + first_warn = False if torch.version.hip else True for rnn in rnns: rnn.cuda() input = torch.randn(5, 4, 10, requires_grad=True, device="cuda") @@ -5171,24 +5172,38 @@ def test_batchnorm_buffer_update_when_stats_are_not_tracked(self): ("NCHW", "native", False, torch.float), ("NCHW", "native", True, torch.half), ("NCHW", "native", True, torch.bfloat16), + + ("NHWC", "cpu", False, torch.float), + ("NHWC", "cpu", True, torch.half), + ("NHWC", "cpu", True, torch.bfloat16), + + ("NHWC", "native", False, torch.float), + ("NHWC", "native", True, torch.half), + ("NHWC", "native", True, torch.bfloat16), + + ("NHWC", "NCHW", False, torch.float), + ("NHWC", "NCHW", True, torch.half), + ("NHWC", "NCHW", True, torch.bfloat16), ], name_fn=lambda f, b, m, t: f"{f}_vs_{b}{'_mixed' if m else ''}_{dtype_name(t)}" ) def test_batchnorm(self, dims, mode, memory_format, ref_backend, mixed, dtype): if torch.version.cuda: if self._testMethodName in ("test_batchnorm_2D_train_NCHW_vs_cpu_mixed_bfloat16", - "test_batchnorm_3D_train_NCHW_vs_cpu_mixed_bfloat16"): - self.skipTest("bfloat16 NHWC train failed on CUDA due to native tolerance issue " - "https://github.com/pytorch/pytorch/issues/156513") - if self._testMethodName == "test_batchnorm_3D_train_NCHW_vs_native_mixed_float16": - self.skipTest("Batchnorm 3D NHWC train failed on CUDA") + "test_batchnorm_3D_train_NCHW_vs_cpu_mixed_bfloat16", + "test_batchnorm_2D_train_NHWC_vs_NCHW_mixed_bfloat16", + "test_batchnorm_3D_train_NHWC_vs_NCHW_mixed_bfloat16", + "test_batchnorm_3D_train_NCHW_vs_native_mixed_float16"): + self.skipTest("Failed on CUDA") if torch.version.hip: if self._testMethodName in ("test_batchnorm_2D_train_NCHW_vs_cpu_mixed_bfloat16", - "test_batchnorm_3D_train_NCHW_vs_cpu_mixed_bfloat16") \ + "test_batchnorm_3D_train_NCHW_vs_cpu_mixed_bfloat16", + "test_batchnorm_2D_train_NHWC_vs_NCHW_mixed_bfloat16", + "test_batchnorm_3D_train_NHWC_vs_NCHW_mixed_bfloat16") \ and _get_torch_rocm_version() < (6, 4): # NCHW bfloat16 path uses native kernels for rocm<=6.3 - # train failed on rocm<=6.3 due to native tolerance issue + # train failed on rocm<=6.3 due to native accuracy issue # https://github.com/pytorch/pytorch/issues/156513 self.skipTest("bfloat16 NHWC train failed on ROCm <= 6.3") @@ -5198,9 +5213,8 @@ def test_batchnorm(self, dims, mode, memory_format, ref_backend, mixed, dtype): # https://github.com/pytorch/pytorch/issues/156513 self.skipTest("bfloat16 NCHW train failed due to native tolerance issue") - if self._testMethodName == "test_batchnorm_3D_train_NCHW_vs_native_mixed_float16" \ - and _get_torch_rocm_version() < (7, 0): - self.skipTest("3D float16 NCHW train failed on ROCm<7.0") + if self._testMethodName == "test_batchnorm_3D_train_NCHW_vs_native_mixed_float16": + self.skipTest("3D float16 NCHW train failed on ROCm") if dims == 3 and memory_format in ("NHWC", "NCHW"): memory_format = memory_format + "3D" diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml index c050c6cbdc4c3..506d829b5712c 100644 --- a/tools/autograd/derivatives.yaml +++ b/tools/autograd/derivatives.yaml @@ -2801,7 +2801,7 @@ self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_sizes(), stride, padding, dilation, false, std::vector(padding.size(), 0), groups, grad_input_mask) : std::tuple()" - name: miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor) - input, weight, bias: "grad.defined() ? (training ? miopen_batch_norm_backward(input, grad.contiguous(), weight, running_mean, running_var, result1, result2, epsilon) : native_batch_norm_backward(grad, input, weight, running_mean, running_var, result1, result2, training, epsilon, grad_input_mask)) : std::tuple()" + input, weight, bias: "grad.defined() ? (training ? miopen_batch_norm_backward(input, grad.contiguous(input.suggest_memory_format()), weight, running_mean, running_var, result1, result2, epsilon) : native_batch_norm_backward(grad, input, weight, running_mean, running_var, result1, result2, training, epsilon, grad_input_mask)) : std::tuple()" result0: batch_norm_jvp(input_p, input_t, weight_p, weight_t, bias_p, bias_t, running_mean, running_var, result1, result2, training, epsilon) - name: miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor) From b500c166ef62cff584a9c910d46eb68715fdb4c9 Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Thu, 11 Sep 2025 19:51:19 +0000 Subject: [PATCH 117/693] [FlexAttention][Easy] turn off TMA when cannot use it (#162569) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162569 Approved by: https://github.com/drisspg --- torch/_inductor/kernel/flex/flex_attention.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/torch/_inductor/kernel/flex/flex_attention.py b/torch/_inductor/kernel/flex/flex_attention.py index 52144b03cf4d2..fb986e93c78cf 100644 --- a/torch/_inductor/kernel/flex/flex_attention.py +++ b/torch/_inductor/kernel/flex/flex_attention.py @@ -10,7 +10,6 @@ import sympy import torch -from torch._inductor.utils import can_use_tma from torch._inductor.virtualized import V from ...ir import ComputedBuffer, ExternKernel, FixedLayout, TensorBox @@ -317,9 +316,6 @@ def flex_attention( # USE TMA = false by default cur_kernel_options.setdefault("USE_TMA", False) - if cur_kernel_options["USE_TMA"] and can_use_tma(query, key, value): - cur_kernel_options["USE_TMA"] = True - cur_kernel_options.setdefault("BLOCK_M", conf.block_m) cur_kernel_options.setdefault("BLOCK_N", conf.block_n) # Blocksparse options From cef05b1202bb26929917aeaabae94de430fdb8fe Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Thu, 11 Sep 2025 20:24:15 +0000 Subject: [PATCH 118/693] Revert "[inductor][choices] rename get_mm_configs to get_template_configs (#162293)" This reverts commit 30191fcf03ddd6a09381a490096c4bb721874316. Reverted https://github.com/pytorch/pytorch/pull/162293 on behalf of https://github.com/huydhn due to Check with @coconutruben and the internal failures look real ([comment](https://github.com/pytorch/pytorch/pull/161351#issuecomment-3282511692)) --- torch/_inductor/choices.py | 10 +++++----- torch/_inductor/kernel/bmm.py | 6 ++---- torch/_inductor/kernel/mm.py | 18 ++++++------------ torch/_inductor/kernel/mm_plus_mm.py | 2 +- 4 files changed, 14 insertions(+), 22 deletions(-) diff --git a/torch/_inductor/choices.py b/torch/_inductor/choices.py index bb7fe6d46d9d1..fa70c6a53358e 100644 --- a/torch/_inductor/choices.py +++ b/torch/_inductor/choices.py @@ -106,7 +106,7 @@ def get_flex_decode_configs( flex_heuristics = self.get_config_heuristics(device_type) return flex_heuristics.get_flex_decode_configs(head_dim, dtype) - def _finalize_template_configs( + def _finalize_mm_configs( self, template_choices: dict[str, Generator[KernelTemplateChoice, None, None]], kernel_inputs: KernelInputs, @@ -148,12 +148,12 @@ def get_ktc( """ Utility to get the KernelTemplateChoice generator for a specific input. - This is a per template/op call, whereas get_template_configs is an op wide call (all templates). + This is a per template/op call, whereas get_mm_configs is an op wide call (all templates). Consider when overriding/using at which level you need to make decisions """ # Extract device_type from kernel_inputs device_type = kernel_inputs.device_type - assert device_type is not None, "get_ktc requires a valid device type" + assert device_type is not None, "get_mm_configs requires a valid device type" # Extract template_name from the template object template_name = template.uid @@ -221,7 +221,7 @@ def _need_to_fix_layout( not isinstance(ktc.template, ExternKernelChoice) for ktc in adjusted_choices ) - def get_template_configs( + def get_mm_configs( self, kernel_inputs: KernelInputs, templates: list[Union[KernelTemplate, ExternKernelChoice]], @@ -258,7 +258,7 @@ def get_template_configs( ) # Second pass: Adjust the template choices - adjusted_choices = self._finalize_template_configs( + adjusted_choices = self._finalize_mm_configs( template_choices, kernel_inputs, templates, diff --git a/torch/_inductor/kernel/bmm.py b/torch/_inductor/kernel/bmm.py index 20d101b951c09..734ab96810743 100644 --- a/torch/_inductor/kernel/bmm.py +++ b/torch/_inductor/kernel/bmm.py @@ -214,7 +214,7 @@ def may_require_contiguous(t, meta_t): # Single unified call for all templates choices.extend( - V.choices.get_template_configs( + V.choices.get_mm_configs( kernel_inputs, templates_to_use, name, @@ -290,8 +290,6 @@ def tuned_baddbmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): templates_to_use.append(bmm_template) # Single unified call for all templates - choices.extend( - V.choices.get_template_configs(kernel_inputs, templates_to_use, name) - ) + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 24c5c23218ba6..fc20286887069 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -770,9 +770,7 @@ def tuned_mm(mat1, mat2, *, layout=None): templates_to_use.append(mm_contiguous_subgraph_template) # Single unified call for all non-autoheuristic templates - choices.extend( - V.choices.get_template_configs(kernel_inputs, templates_to_use, "mm") - ) + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm")) if ( is_nonzero @@ -807,7 +805,7 @@ def tuned_mm(mat1, mat2, *, layout=None): always_included.append("extern_mm") num_choices_before_extra_configs = len(choices) choices.extend( - V.choices.get_template_configs( + V.choices.get_mm_configs( # TODO(coconutruben): remove once we deprecate ah # mm-extra is a hack to keep the ah functionality alive # while we transition to the unified kwargs retrieval @@ -900,9 +898,7 @@ def tuned_int_mm(mat1, mat2, *, layout=None): templates_to_use.append(mm_template) # Single unified call for all templates - choices.extend( - V.choices.get_template_configs(kernel_inputs, templates_to_use, name) - ) + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) if use_cutlass and _use_cutlass_for_op(name): CUTLASS3xGemmTemplate.add_cutlass_gemm_choices( @@ -948,7 +944,7 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): [inp, mat1, mat2], scalars=dict(alpha=alpha, beta=beta) ) choices.extend( - V.choices.get_template_configs( + V.choices.get_mm_configs( kernel_inputs, [aten_addmm], name, @@ -970,9 +966,7 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): templates_to_use.append(addmm_contiguous_subgraph_template) # Single unified call for all templates - choices.extend( - V.choices.get_template_configs(kernel_inputs, templates_to_use, name) - ) + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) if ( is_nonzero @@ -1159,7 +1153,7 @@ def tuned_scaled_mm( # Single unified call for all templates choices.extend( - V.choices.get_template_configs( + V.choices.get_mm_configs( kernel_inputs, templates_to_use, name, diff --git a/torch/_inductor/kernel/mm_plus_mm.py b/torch/_inductor/kernel/mm_plus_mm.py index df94e3e5cd7bb..20140378477a2 100644 --- a/torch/_inductor/kernel/mm_plus_mm.py +++ b/torch/_inductor/kernel/mm_plus_mm.py @@ -167,7 +167,7 @@ def tuned_mm_plus_mm(mat1, mat2, mat3, mat4, *, layout=None): # Single unified call for all templates choices.extend( - V.choices.get_template_configs(kernel_inputs, templates_to_use, "mm_plus_mm") + V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm_plus_mm") ) return autotune_select_algorithm( From 934f87888380ff828c03139c0435ebe27a18d76d Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Thu, 11 Sep 2025 20:24:15 +0000 Subject: [PATCH 119/693] Revert "[inductor] leverage template stacking in V.choices.get_mm_configs (#161350)" This reverts commit 623e623c821f639559248e9acd6084311c8fd3d5. Reverted https://github.com/pytorch/pytorch/pull/161350 on behalf of https://github.com/huydhn due to Check with @coconutruben and the internal failures look real ([comment](https://github.com/pytorch/pytorch/pull/161351#issuecomment-3282511692)) --- torch/_inductor/choices.py | 4 +- torch/_inductor/kernel/bmm.py | 47 +++---- torch/_inductor/kernel/mm.py | 175 ++++++++++++++++----------- torch/_inductor/kernel/mm_plus_mm.py | 20 ++- 4 files changed, 136 insertions(+), 110 deletions(-) diff --git a/torch/_inductor/choices.py b/torch/_inductor/choices.py index fa70c6a53358e..3db131e2584eb 100644 --- a/torch/_inductor/choices.py +++ b/torch/_inductor/choices.py @@ -210,7 +210,9 @@ def _need_to_fix_layout( # Since the following backends are not using get_template_configs yet through the singular call, # we don't know if they are a valid choice or not. Instead, just skip the optimization # defensively. - # TODO(coconutruben): remove this once CPP,CK,CUTLASS are supported + # TODO(coconutruben): remove this once TRITON,CPP,CK,CUTLASS are supported + if _use_autotune_backend("TRITON"): + return True if _use_autotune_backend("CUTLASS"): return True if _use_autotune_backend("CK") or _use_autotune_backend("CKTILE"): diff --git a/torch/_inductor/kernel/bmm.py b/torch/_inductor/kernel/bmm.py index 734ab96810743..e9867212767eb 100644 --- a/torch/_inductor/kernel/bmm.py +++ b/torch/_inductor/kernel/bmm.py @@ -1,6 +1,6 @@ # mypy: allow-untyped-defs import logging -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING import torch from torch._dynamo.utils import counters @@ -28,7 +28,6 @@ if TYPE_CHECKING: from ..ir import ChoiceCaller - from ..select_algorithm import KernelTemplate log = logging.getLogger(__name__) aten = torch.ops.aten @@ -198,29 +197,21 @@ def may_require_contiguous(t, meta_t): aten_extra_kwargs = {"out_dtype": out_dtype} choices: list[ChoiceCaller] = [] - - # Collect all templates for unified call - templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] - kwarg_overrides = {} - if use_aten_gemm_kernels(): - templates_to_use.append(aten_handler) - kwarg_overrides[aten_handler.uid] = aten_extra_kwargs + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [aten_handler], + name, + kwarg_overrides={aten_handler.uid: aten_extra_kwargs}, + ) + ) if use_triton_template(layout, check_max_autotune=False): # TODO: add out_dtype support for Triton Template assert out_dtype is None, "out_dtype is not supported for Triton" - templates_to_use.append(bmm_template) - - # Single unified call for all templates - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - templates_to_use, - name, - kwarg_overrides=kwarg_overrides, - ) - ) + + choices.extend(V.choices.get_mm_configs(kernel_inputs, [bmm_template], name)) _, is_nonzero = _is_static_problem(layout) batch_stride_largest_or_zero = is_batch_stride_largest_or_zero(mat1, mat2, layout) if ( @@ -280,16 +271,16 @@ def tuned_baddbmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): name = "baddbmm" # options to tune from choices: list[ChoiceCaller] = [] - - # Collect all templates for unified call - templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - templates_to_use.append(aten_baddbmm) + choices.extend(V.choices.get_mm_configs(kernel_inputs, [aten_baddbmm], name)) if use_triton_template(layout, check_max_autotune=False): - templates_to_use.append(bmm_template) - - # Single unified call for all templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [bmm_template], + name, + ) + ) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index fc20286887069..73239596a2903 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import functools import logging -from typing import Any, Optional, Union +from typing import Any, Optional import torch from torch._dynamo.utils import counters @@ -29,7 +29,6 @@ from ..select_algorithm import ( autotune_select_algorithm, ExternKernelChoice, - KernelTemplate, realize_inputs, TritonTemplate, ) @@ -751,26 +750,32 @@ def tuned_mm(mat1, mat2, *, layout=None): ) choices: list[ChoiceCaller] = [] - static_shape, is_nonzero = _is_static_problem(layout) - - # Collect all templates for unified call - templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - templates_to_use.append(aten_mm) - - if is_nonzero and use_triton_template(layout, check_max_autotune=True): - templates_to_use.append(mm_template) + choices.extend(V.choices.get_mm_configs(kernel_inputs, [aten_mm], "mm")) + static_shape, is_nonzero = _is_static_problem(layout) + if is_nonzero and use_triton_template(layout, check_max_autotune=False): + # Get template choices using the new unified function + choices.extend(V.choices.get_mm_configs(kernel_inputs, [mm_template], "mm")) if use_triton_tma_template(mat1, mat2): - templates_to_use.append(persistent_tma_mm_template) + # Get TMA template choices using the new unified function + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, [persistent_tma_mm_template], "mm" + ) + ) if use_decompose_k_choice(m, n, k): - templates_to_use.append(decompose_k_subgraph_template) - - templates_to_use.append(mm_contiguous_subgraph_template) - - # Single unified call for all non-autoheuristic templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm")) + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, [decompose_k_subgraph_template], "mm" + ) + ) + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, [mm_contiguous_subgraph_template], "mm" + ) + ) if ( is_nonzero @@ -886,25 +891,25 @@ def tuned_int_mm(mat1, mat2, *, layout=None): # Create MMKernelInputs for Int MM kernel_inputs = MMKernelInputs([mat1, mat2], out_dtype=torch.int32) - - # Collect all templates for unified call - templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - templates_to_use.append(aten__int_mm) - - if is_nonzero and use_triton_template( - layout, enable_int32=True, check_max_autotune=False - ): - templates_to_use.append(mm_template) - - # Single unified call for all templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [aten__int_mm], + name, + ) + ) if use_cutlass and _use_cutlass_for_op(name): CUTLASS3xGemmTemplate.add_cutlass_gemm_choices( choices, layout, kernel_inputs.nodes(), fuseable=True, non_fuseable=True ) + if is_nonzero and use_triton_template( + layout, enable_int32=True, check_max_autotune=False + ): + choices.extend(V.choices.get_mm_configs(kernel_inputs, [mm_template], name)) + return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) @@ -952,21 +957,50 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): ) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) - # Collect all templates for unified call - templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - templates_to_use.extend([aten_bias_addmm, aten_addmm]) + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [aten_bias_addmm], + name, + ) + ) + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [aten_addmm], + name, + ) + ) if is_nonzero and use_triton_template(layout, check_max_autotune=False): - templates_to_use.append(mm_template) + # all the triton templates use the extra_kwargs + # Get template choices using the new unified function + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [mm_template], + name, + ) + ) if use_triton_tma_template(mat1, mat2): - templates_to_use.append(persistent_tma_mm_template) - - templates_to_use.append(addmm_contiguous_subgraph_template) + # Get TMA template choices using the new unified function + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [persistent_tma_mm_template], + name, + ) + ) - # Single unified call for all templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [addmm_contiguous_subgraph_template], + "addmm", + ) + ) if ( is_nonzero @@ -1121,49 +1155,52 @@ def tuned_scaled_mm( ) choices: list[ChoiceCaller] = [] - - # Collect all templates for unified call - templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] - kwarg_overrides = {} - if use_aten_gemm_kernels(): - templates_to_use.append(aten__fp8_mm) - kwarg_overrides[aten__fp8_mm.uid] = dict( - out_dtype=out_dtype, use_fast_accum=use_fast_accum + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [aten__fp8_mm], + name, + kwarg_overrides={ + aten__fp8_mm.uid: dict( + out_dtype=out_dtype, use_fast_accum=use_fast_accum + ) + }, + ) ) + # We dont have triton lowerings for the MX variants yet + if scale_a.dtype != torch.float32: + return autotune_select_algorithm(name, choices, input_nodes, layout) + _, is_nonzero = _is_static_problem(layout) - if ( - # We dont have triton lowerings for the MX variants yet - scale_a.dtype == torch.float32 - and is_nonzero - and use_triton_template(layout, enable_float8=True, check_max_autotune=False) + if is_nonzero and use_triton_template( + layout, enable_float8=True, check_max_autotune=False ): overriders = dict(USE_FAST_ACCUM=use_fast_accum) - # TODO (paulzhan): There is no template that exists for bias and TMA # Don't run tma template currently if bias exists if use_triton_tma_template(mat_a, mat_b) and not bias: - templates_to_use.append(scaled_mm_device_tma_template) - kwarg_overrides[scaled_mm_device_tma_template.uid] = overriders - - templates_to_use.append(mm_template) - kwarg_overrides[mm_template.uid] = overriders + # Get TMA template choices using the new unified function + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [scaled_mm_device_tma_template], + name, + kwarg_overrides={scaled_mm_device_tma_template.uid: overriders}, + ) + ) - # Single unified call for all templates - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - templates_to_use, - name, - kwarg_overrides=kwarg_overrides, + # Get template choices using the new unified function + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + [mm_template], + name, + kwarg_overrides={mm_template.uid: overriders}, + ) ) - ) - - # Early return for MX variants - if scale_a.dtype != torch.float32: - return autotune_select_algorithm(name, choices, input_nodes, layout) if ( is_nonzero diff --git a/torch/_inductor/kernel/mm_plus_mm.py b/torch/_inductor/kernel/mm_plus_mm.py index 20140378477a2..c27056e5a3227 100644 --- a/torch/_inductor/kernel/mm_plus_mm.py +++ b/torch/_inductor/kernel/mm_plus_mm.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import logging -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING import torch @@ -19,7 +19,6 @@ if TYPE_CHECKING: from torch._inductor.ir import ChoiceCaller - from torch._inductor.select_algorithm import KernelTemplate log = logging.getLogger(__name__) @@ -156,19 +155,16 @@ def tuned_mm_plus_mm(mat1, mat2, mat3, mat4, *, layout=None): assert layout1 == layout2 # options to tune from choices: list[ChoiceCaller] = [] - - # Collect all templates for unified call - templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - templates_to_use.append(aten_mm_plus_mm) + choices.extend( + V.choices.get_mm_configs(kernel_inputs, [aten_mm_plus_mm], "mm_plus_mm") + ) if use_triton_template(layout1, check_max_autotune=False): - templates_to_use.append(mm_plus_mm_template) - - # Single unified call for all templates - choices.extend( - V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm_plus_mm") - ) + # Get template choices using the new unified function + choices.extend( + V.choices.get_mm_configs(kernel_inputs, [mm_plus_mm_template], "mm_plus_mm") + ) return autotune_select_algorithm( "mm_plus_mm", choices, kernel_inputs.nodes(), layout1 From 1c6dfbe5578cdd4e49c67e0c777c36aa9295d7b1 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Thu, 11 Sep 2025 20:24:15 +0000 Subject: [PATCH 120/693] Revert "[inductor] FlexibleLayout for ExternKernelChoice for mms (#161351)" This reverts commit f08487aa8692751c36e608e338204490b0955583. Reverted https://github.com/pytorch/pytorch/pull/161351 on behalf of https://github.com/huydhn due to Check with @coconutruben and the internal failures look real ([comment](https://github.com/pytorch/pytorch/pull/161351#issuecomment-3282511692)) --- test/inductor/test_max_autotune.py | 38 +------------ torch/_inductor/choices.py | 86 ++++++++---------------------- torch/_inductor/kernel/mm.py | 21 +++++++- 3 files changed, 43 insertions(+), 102 deletions(-) diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py index aea205163d808..320bdf3462e64 100644 --- a/test/inductor/test_max_autotune.py +++ b/test/inductor/test_max_autotune.py @@ -27,7 +27,7 @@ TuningProcessPool, ) from torch._inductor.graph import GraphLowering -from torch._inductor.ir import Buffer, ChoiceCaller, FixedLayout, FlexibleLayout +from torch._inductor.ir import Buffer, ChoiceCaller, FixedLayout from torch._inductor.kernel.mm_plus_mm import aten_mm_plus_mm from torch._inductor.select_algorithm import ( add_feedback_saver, @@ -1973,42 +1973,6 @@ def choice_validator(choices): finally: clear_preprocessing_fns() - @config.patch( - {"test_configs.max_mm_configs": 4, "max_autotune_gemm_backends": "ATEN,TRITON"} - ) - @parametrize("max_autotune_enabled", (True, False)) - def test_autotune_layout_optimization(self, max_autotune_enabled): - """Test that layouts are flexible when every choice is ExternKernelChoice""" - - # we use a proxy here of bias_addmm and max-autotune because this enables us to see - # multiple choices in both scenarios (bias_addmm, addmm, triton (max-autotune only)) - # and both bias_addmm and addmm are extern kernel choices - def layout_checker(choices): - if choices: - expected_layout = ( - FixedLayout if max_autotune_enabled else FlexibleLayout - ) - for choice in choices: - self.assertIsInstance( - choice.layout, - expected_layout, - f"Expected {expected_layout.__name__} with max_autotune={max_autotune_enabled}", - ) - return choices - - add_preprocessing_fn(layout_checker) - - try: - bias = torch.randn(64, device=GPU_TYPE) - x = torch.randn(32, 128, device=GPU_TYPE) - w = torch.randn(128, 64, device=GPU_TYPE) - - with config.patch({"max_autotune": max_autotune_enabled}): - compiled_fn = torch.compile(lambda b, x, w: torch.addmm(b, x, w)) - _ = compiled_fn(bias, x, w) - finally: - clear_preprocessing_fns(clear_defaults=False) - class TestMaxAutotunePrecompile(TestCase): def test_precompilation_threads(self): diff --git a/torch/_inductor/choices.py b/torch/_inductor/choices.py index 3db131e2584eb..a6275ac85c110 100644 --- a/torch/_inductor/choices.py +++ b/torch/_inductor/choices.py @@ -14,7 +14,6 @@ from .metrics import get_metric_table, is_metric_table_enabled from .runtime.hints import DeviceProperties, ReductionHint from .scheduler import BaseSchedulerNode, Scheduler, WhyNoFuse -from .select_algorithm import ExternKernelChoice from .template_heuristics import get_template_heuristic from .template_heuristics.triton import ( BaseConfigHeuristic, @@ -24,7 +23,6 @@ ROCmConfigHeuristic, XPUConfigHeuristic, ) -from .utils import _use_autotune_backend from .virtualized import V @@ -34,13 +32,14 @@ from triton import Config as TritonConfig + from torch.utils._ordered_set import OrderedSet + from .codegen.common import KernelTemplate from .codegen.simd_kernel_features import SIMDKernelFeatures from .codegen.triton import TritonKernel - from .ir import ChoiceCaller + from .ir import ChoiceCaller, Layout from .kernel_template_choice import KernelTemplateChoice - - from torch.utils._ordered_set import OrderedSet # isort: skip + from .select_algorithm import ExternKernelChoice class Sortable(typing.Protocol): @@ -110,6 +109,7 @@ def _finalize_mm_configs( self, template_choices: dict[str, Generator[KernelTemplateChoice, None, None]], kernel_inputs: KernelInputs, + layout: Any, templates: list[Union[KernelTemplate, ExternKernelChoice]], op_name: str, kwarg_overrides: Optional[dict[str, dict[str, Any]]] = None, @@ -126,6 +126,7 @@ def _finalize_mm_configs( Args: template_choices: Dictionary mapping template UIDs to generators of KernelTemplateChoice objects kernel_inputs: MMKernelInputs containing input tensor nodes and matrix indices + layout: Output layout templates: List of template objects (KernelTemplate or ExternKernelChoice) in use op_name: Operation name (e.g., "bmm", "baddbmm", "addmm") kwarg_overrides: Optional dict of kwargs to override for each template heuristic @@ -141,6 +142,7 @@ def _finalize_mm_configs( def get_ktc( self, kernel_inputs: KernelInputs, + layout: Layout, template: Union[KernelTemplate, ExternKernelChoice], op_name: str, kwarg_overrides: Optional[dict[str, Any]] = None, @@ -174,60 +176,16 @@ def get_ktc( cs=cs, overrides=overrides, extra_kwargs=extra_kwargs, - layout=kernel_inputs.output_layout(), + layout=layout, inputs=inputs_val, ) - def _need_to_fix_layout( - self, - adjusted_choices: list[KernelTemplateChoice], - op_name: str, - ) -> bool: - """ - Check if we need to fix the layout instead of keeping it flexible - - Args: - ktc: KernelTemplateChoice object - - Returns: - True if we need to fix the layout, False otherwise - """ - # TODO: debug and fix - # NOTE: on mps, we see issues with flexible layouts on baddmm. This check just makes sure - # that for mps, everything stays as it was before this optimization - if len(adjusted_choices) > 0: - if adjusted_choices[0].inputs.device_type == "mps" and op_name not in [ - "mm", - "addmm", - ]: - return True - - # Since the following backends are not using get_mm_configs yet through the singular call, - if not (config.max_autotune or config.max_autotune_gemm): - # no danger of using other backends than ATEN - return False - - # Since the following backends are not using get_template_configs yet through the singular call, - # we don't know if they are a valid choice or not. Instead, just skip the optimization - # defensively. - # TODO(coconutruben): remove this once TRITON,CPP,CK,CUTLASS are supported - if _use_autotune_backend("TRITON"): - return True - if _use_autotune_backend("CUTLASS"): - return True - if _use_autotune_backend("CK") or _use_autotune_backend("CKTILE"): - return True - if _use_autotune_backend("CPP"): - return True - return any( - not isinstance(ktc.template, ExternKernelChoice) for ktc in adjusted_choices - ) - def get_mm_configs( self, kernel_inputs: KernelInputs, templates: list[Union[KernelTemplate, ExternKernelChoice]], op_name: str, + layout: Optional[Layout] = None, kwarg_overrides: Optional[dict[str, dict[str, Any]]] = None, ) -> list[ChoiceCaller]: """ @@ -248,12 +206,17 @@ def get_mm_configs( input_tensors = kernel_inputs.nodes() if len(input_tensors) < 2: raise ValueError(f"Need at least 2 input tensors, got {len(input_tensors)}") - layout = kernel_inputs.output_layout() + if layout is None: + # TODO(coconutruben): remove this once we remove the layout argument entirely + # This is just here to the brief gap between commits where we still need this + # to accommodate fixed vs flexible layout decision externally + layout = kernel_inputs.output_layout(flexible=False) # First pass: Create dict of template.uid to generator of KernelTemplateChoice objects template_choices = {} for template in templates: template_choices[template.uid] = self.get_ktc( kernel_inputs, + layout, template, op_name, kwarg_overrides.get(template.uid, {}), @@ -263,21 +226,18 @@ def get_mm_configs( adjusted_choices = self._finalize_mm_configs( template_choices, kernel_inputs, + layout, templates, op_name, kwarg_overrides, ) - # Layout optimization: if all choices are ExternKernelChoice and layout is FixedLayout, convert to FlexibleLayout - if self._need_to_fix_layout(adjusted_choices, op_name): - layout = kernel_inputs.output_layout(flexible=False) - for ktc in adjusted_choices: - ktc.layout = layout - # for good measure, delete the cached ChoiceCaller from the ktc if it existed. - # ExternKernelChoice are cheap to generate - if hasattr(ktc, "_choice"): - del ktc._choice - # Third pass: Convert to ChoiceCaller objects - return [ktc.choice for ktc in adjusted_choices if ktc.choice is not None] + choices = [] + # Third pass: Get adjusted choices and collect non-None ChoiceCaller objects + for ktc in adjusted_choices: + if ktc.choice is not None: + choices.append(ktc.choice) + + return choices def triton_kernel_kwargs( self, diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 73239596a2903..30510042dd1d8 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -23,7 +23,7 @@ from ..codegen.rocm.ck_tile_universal_gemm_template import CKTileGemmTemplate from ..codegen.rocm.ck_universal_gemm_template import CKGemmTemplate from ..codegen.subgraph import SubgraphChoiceCaller, SubgraphTemplate -from ..ir import Buffer, ChoiceCaller, is_triton, Layout +from ..ir import Buffer, ChoiceCaller, FlexibleLayout, is_triton, Layout from ..kernel_inputs import MMKernelInputs from ..lowering import add_layout_constraint, constrain_to_fx_strides, register_lowering from ..select_algorithm import ( @@ -749,9 +749,16 @@ def tuned_mm(mat1, mat2, *, layout=None): layout, ) + aten_layout = layout + if not (inductor_config.max_autotune or inductor_config.max_autotune_gemm): + aten_layout = FlexibleLayout( + device=layout.device, dtype=layout.dtype, size=layout.size + ) choices: list[ChoiceCaller] = [] if use_aten_gemm_kernels(): - choices.extend(V.choices.get_mm_configs(kernel_inputs, [aten_mm], "mm")) + choices.extend( + V.choices.get_mm_configs(kernel_inputs, [aten_mm], "mm", aten_layout) + ) static_shape, is_nonzero = _is_static_problem(layout) if is_nonzero and use_triton_template(layout, check_max_autotune=False): @@ -939,9 +946,18 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): mat2.get_dtype(), layout, ) + aten_layout = layout if (not is_nonzero) or ( not (inductor_config.max_autotune or inductor_config.max_autotune_gemm) ): + # Use a FlexibleLayout if we are not autotuning. + # This allows padding strides for the output. + from torch._inductor.ir import FixedLayout, FlexibleLayout + + if isinstance(layout, FixedLayout): + aten_layout = FlexibleLayout( + device=layout.device, dtype=layout.dtype, size=layout.size + ) # TODO(coconutruben): combine this with the main flow of addmm through # a subgraph or something as inp vs inp_expanded causes some slight numeric # differences @@ -953,6 +969,7 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): kernel_inputs, [aten_addmm], name, + aten_layout, ) ) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) From 3ad3bfe11df7bf542885a6dd2dc2ada4ab940e53 Mon Sep 17 00:00:00 2001 From: Rohit Manav Date: Thu, 11 Sep 2025 20:25:23 +0000 Subject: [PATCH 121/693] added example for torch.is_storage (#162614) Fixes #162613 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162614 Approved by: https://github.com/malfet Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com> --- torch/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/torch/__init__.py b/torch/__init__.py index 7969b6edc787b..eac57306e63d2 100644 --- a/torch/__init__.py +++ b/torch/__init__.py @@ -1129,6 +1129,14 @@ def is_storage(obj: _Any, /) -> _TypeIs[_Union["TypedStorage", "UntypedStorage"] Args: obj (Object): Object to test + Example:: + + >>> x = torch.tensor([1, 2, 3]) + >>> torch.is_storage(x) + False + >>> torch.is_storage(x.untyped_storage()) + True + """ return type(obj) in _storage_classes From 4c6a6c2db9d276b0503ea917f108ce0357e4e814 Mon Sep 17 00:00:00 2001 From: Janani Sriram Date: Thu, 11 Sep 2025 21:21:02 +0000 Subject: [PATCH 122/693] [Inductor][FP8] Add new scaled_mm and scaled_persistent_mm configs to Inductor FP8 Triton templates (#162699) Summary: Add new `scaled_mm` and `scaled_persistent_mm` configs to `template_heuristics.py` for Inductor FP8 Triton templates. These configs are a representative subset of the most performant configs generated from exhaustively autotuning FP8 Triton kernels with per-tensor and per-row scaling. See this [spreadsheet](https://docs.google.com/spreadsheets/d/1Fal1vhFUJIUcLpM2kJect6IkgeUFvCY-nUr3RTupM_4/edit?gid=1732602731#gid=1732602731) for benchmarks and performance metrics. Test Plan: Verify that configs do not error, i.e. ``` CUDA_VISIBLE_DEVICES=0 TRITON_PRINT_AUTOTUNING=1 TRITON_ALWAYS_COMPILE=1 TORCH_LOGS=+i nductor TORCHINDUCTOR_FORCE_DISABLE_CACHES=1 ENABLE_PERSISTENT_TMA_MATMUL=1 TORCHINDUCTOR_MAX_AUTOTUNE_GEMM=1 buck2 run mode/{opt,inplace} pytorch/tritonbench:run -- --op fp8_gemm --only pt2_fp8_gemm --metrics tflops,accuracy --input-loader={input_path} --output="{output_csv}" --atol=1e-2 --rtol=0.5 2>&1 | tee {log_file} ``` Rollback Plan: Reviewed By: NikhilAPatel, PaulZhang12 Differential Revision: D81651226 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162699 Approved by: https://github.com/PaulZhang12 --- torch/_inductor/template_heuristics/triton.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/torch/_inductor/template_heuristics/triton.py b/torch/_inductor/template_heuristics/triton.py index f2756a5ee515a..f4c836247dff6 100644 --- a/torch/_inductor/template_heuristics/triton.py +++ b/torch/_inductor/template_heuristics/triton.py @@ -357,6 +357,10 @@ def __init__(self) -> None: GemmConfig(32, 64, 64, 6, 2), GemmConfig(32, 128, 64, 6, 4), GemmConfig(32, 256, 64, 6, 4), + GemmConfig(64, 16, 256, 5, 4), + GemmConfig(64, 32, 256, 5, 4), + GemmConfig(64, 128, 128, 3, 4), + GemmConfig(128, 256, 128, 4, 8), ] self.scaled_persistent_mm_configs: list[BaseConfig] = [ @@ -369,6 +373,10 @@ def __init__(self) -> None: GemmConfig(128, 128, 128, 5, 8), GemmConfig(128, 128, 128, 6, 8), GemmConfig(128, 128, 64, 4, 8), + GemmConfig(64, 32, 256, 5, 4), + GemmConfig(128, 256, 128, 3, 8), + GemmConfig(64, 128, 256, 4, 4), + GemmConfig(64, 256, 128, 4, 4), ] # TODO: Unify with other gemm patterns, mm_plus_mm currently follows From 9614c2eb14cb80a3e499f5b50b242ec7a50fa468 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Thu, 11 Sep 2025 21:22:32 +0000 Subject: [PATCH 123/693] [Triton] [Inductor] Pruned failed compilations from Autotuning candidates (#162673) Summary: When exahaustively autotuning a new template you may hit situations that lead to compilation failures. This template will still attempt to autotune because nothing was marking this as failed and in my experiments lead to a crash/segfault if I didn't set `TORCHINDUCTOR_AUTOTUNE_IN_SUBPROC=1`. To help eliminate this issue this PR marks any template that fails to compile as "failed" and then removes all of the failed templates from the choice candidates. In the case where it would have just failed to compile twice, this should at least reduce compilation time. Test Plan: Tested locally when experminenting with the new blackwell templates and a Triton version that contains a bug related to `num_warps < 4`. Rollback Plan: Differential Revision: D82172207 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162673 Approved by: https://github.com/PaulZhang12, https://github.com/mlazos --- torch/_inductor/ir.py | 9 +++++++++ torch/_inductor/select_algorithm.py | 13 +++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index eb490dd1c6e59..a380cd930fc9d 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -4972,6 +4972,7 @@ def __init__( # An additional description used to describe the choice (useful for # knowing what autotuning is choosing) self.description = description + self.failed: bool = False def benchmark(self, *args: Any, out: torch.Tensor) -> float: algo = self.to_callable() @@ -5009,6 +5010,14 @@ def info_dict(self) -> dict[str, Union[PrimitiveInfoType, list[PrimitiveInfoType def autoheuristic_id(self) -> str: return "unsupported_choice" + def mark_failed(self) -> None: + """ + Mark the choice as failed so that it can be + removed later. Useful for when we decouple + compilation and tuning. + """ + self.failed = True + class TritonTemplateCallerBase(ChoiceCaller): def get_make_kernel_render(self) -> Any: diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py index eff89a21223c7..b76eb8cf9f1a4 100644 --- a/torch/_inductor/select_algorithm.py +++ b/torch/_inductor/select_algorithm.py @@ -2421,16 +2421,19 @@ def __call__( N = input_nodes[-1].get_size()[-1] append_to_log(mm_file_name, {"invoke": str((M, K, N))}) - if len(choices) == 0: + def create_no_valid_choices() -> NoValidChoicesError: backend_config = ( "max_autotune_gemm_backends" if name != "convolution" else "max_autotune_conv_backends" ) - raise NoValidChoicesError( + return NoValidChoicesError( f"No choices to select, please consider adding ATEN into {backend_config} " "config (defined in torch/_inductor/config.py) to allow at least one choice. " ) + + if len(choices) == 0: + raise create_no_valid_choices() log.debug("Max autotune selects from %s choices.", str(len(choices))) if len(choices) == 1: @@ -2487,6 +2490,10 @@ def do_autotuning(choices, precompile_fn, hint_override: Optional[int] = None): precompile_fn() precompile_elapse = time.time() - precompile_start_ts log.debug("Precompilation elapsed time: %.02fs", precompile_elapse) + # Prune anything that failed to compile + choices = [c for c in choices if not c.failed] + if len(choices) == 0: + raise create_no_valid_choices() candidates = self.prescreen_choices( choices, name, inputs_key, self.prescreening_cache @@ -2823,6 +2830,7 @@ def wait_on_futures(): futures[future], exc_info=e, ) + futures[future].mark_failed() else: log.exception( # noqa: G202 "Exception %s for benchmark choice %s", @@ -2830,6 +2838,7 @@ def wait_on_futures(): futures[future], exc_info=e, ) + futures[future].mark_failed() else: counters["inductor"]["select_algorithm_num_precompiles"] += 1 log.info( From 468c1f9e9d9259afeca0248c5e8f4b0e7fb113bf Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Thu, 11 Sep 2025 21:22:55 +0000 Subject: [PATCH 124/693] Revert "[nn] Assert parsed iterable arguments are an appropriate length (#162340)" This reverts commit b5e6e58050bd2a15f4173cfffa00c7e32e382b49. Reverted https://github.com/pytorch/pytorch/pull/162340 on behalf of https://github.com/huydhn due to Sorry for reverting your change but it seems to break an MPS tests on ExecuTorch ([comment](https://github.com/pytorch/pytorch/pull/162340#issuecomment-3282676242)) --- test/nn/test_pooling.py | 2 +- test/quantization/core/test_quantized_op.py | 20 +++++++++----------- test/test_mps.py | 6 +++--- test/test_nn.py | 10 ++++++++-- torch/nn/modules/conv.py | 2 +- torch/nn/modules/utils.py | 15 ++------------- 6 files changed, 24 insertions(+), 31 deletions(-) diff --git a/test/nn/test_pooling.py b/test/nn/test_pooling.py index 2e85f2da22683..a8f77df22d311 100644 --- a/test/nn/test_pooling.py +++ b/test/nn/test_pooling.py @@ -481,7 +481,7 @@ def test_max_unpool(self): def test_max_unpool3d_input_check(self): x = torch.ones(1, 3, 1, 1, 1) - with self.assertRaises(AssertionError): + with self.assertRaises(RuntimeError): F.max_unpool3d(x, torch.zeros(x.shape, dtype=int), [1, 1]) def test_quantized_max_pool1d_empty_kernel(self): diff --git a/test/quantization/core/test_quantized_op.py b/test/quantization/core/test_quantized_op.py index 6b362bef365e6..b6df2089e87e7 100644 --- a/test/quantization/core/test_quantized_op.py +++ b/test/quantization/core/test_quantized_op.py @@ -15,7 +15,7 @@ from torch import _VF import torch.jit import torch.nn.functional as F -from torch.nn.modules.utils import _ntuple, _pair, _single +from torch.nn.modules.utils import _single, _pair from hypothesis import settings, HealthCheck from hypothesis import assume, given, note @@ -5311,11 +5311,10 @@ def _make_qconv_tensors( input_channels = input_channels_per_group * groups output_channels = output_channels_per_group * groups # Padded input size should be at least as big as dilated kernel - input_dimension_function = _ntuple(len(input_feature_map_shape)) - kernels = input_dimension_function(kernels) - strides = input_dimension_function(strides) - pads = input_dimension_function(pads) - dilations = input_dimension_function(dilations) + kernels = _single(kernels) + strides = _single(strides) + pads = _single(pads) + dilations = _single(dilations) for i in range(len(kernels)): assume(input_feature_map_shape[i] + 2 * pads[i] >= dilations[i] * (kernels[i] - 1) + 1) @@ -7847,11 +7846,10 @@ def _make_qconv_tensors_fp8( input_channels = input_channels_per_group * groups output_channels = output_channels_per_group * groups # Padded input size should be at least as big as dilated kernel - input_dimension_function = _ntuple(len(input_feature_map_shape)) - kernels = input_dimension_function(kernels) - strides = input_dimension_function(strides) - pads = input_dimension_function(pads) - dilations = input_dimension_function(dilations) + kernels = _single(kernels) + strides = _single(strides) + pads = _single(pads) + dilations = _single(dilations) for i in range(len(kernels)): assume(input_feature_map_shape[i] + 2 * pads[i] >= dilations[i] * (kernels[i] - 1) + 1) diff --git a/test/test_mps.py b/test/test_mps.py index c172c8c119b2b..756b2cd20567a 100644 --- a/test/test_mps.py +++ b/test/test_mps.py @@ -8957,9 +8957,9 @@ def helper(shape, padding, op, value=0): # pad dims == input dims helper((1, 3), (0, 2, 0, 1), nn.ConstantPad2d) # input.numel() == 0 but output.numel() > 0 - helper((0, 3, 3), 1, nn.ConstantPad2d) + helper((0, 3, 3), (1, 1, 1, 1, 1, 1), nn.ConstantPad2d) # pad dims < input dims - 2 - helper((1, 2, 3, 4, 5), (1, 2, 0, 0), nn.ConstantPad2d) + helper((1, 2, 3, 4), (1, 2), nn.ConstantPad2d) # 3D Padding helper((2, 4, 6, 8, 4), (1, 3, 3, 5, 3, 4), nn.ReflectionPad3d) @@ -8972,7 +8972,7 @@ def helper(shape, padding, op, value=0): # input size < pad size helper((2, 4, 6), (1, 3, 3, 5, 3, 4), nn.ConstantPad3d) # check the workaround for the right padding bug in Monterey - helper((1, 2, 2, 2, 2), (0, 1, 0, 1, 0, 1), nn.ConstantPad3d) + helper((1, 2, 2, 2, 2), (0, 1), nn.ConstantPad3d) def test_constant_pad_nd_preserves_memory_format(self): nchw_tensor = torch.rand((1, 2, 5, 3)) diff --git a/test/test_nn.py b/test/test_nn.py index 33d5763c64b76..dc01fbe077267 100644 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -7480,8 +7480,14 @@ def test_padding_list(self): def test_fractional_max_pool2d_invalid_output_ratio(self): arg_1 = [2, 1] arg_2 = [0.5, 0.5, 0.6] - with self.assertRaisesRegex(AssertionError, "Expected an iterable of length 2, but got length 3"): - arg_class = torch.nn.FractionalMaxPool2d(kernel_size=arg_1, output_ratio=arg_2,) + arg_class = torch.nn.FractionalMaxPool2d(kernel_size=arg_1, output_ratio=arg_2,) + arg_3_0_tensor = torch.rand([20, 16, 50, 32], dtype=torch.float32) + arg_3_0 = arg_3_0_tensor.clone() + arg_3 = [arg_3_0,] + + with self.assertRaisesRegex(ValueError, + "fractional_max_pool2d requires output_ratio to either be a single Int or tuple of Ints."): + res = arg_class(*arg_3) def test_max_pool1d_invalid_output_size(self): arg_1 = 3 diff --git a/torch/nn/modules/conv.py b/torch/nn/modules/conv.py index ffb6f21e67145..2f15c3d488f72 100644 --- a/torch/nn/modules/conv.py +++ b/torch/nn/modules/conv.py @@ -768,7 +768,7 @@ def _output_padding( dilation: Optional[list[int]] = None, ) -> list[int]: if output_size is None: - ret = list(self.output_padding) # converting to list if was not already + ret = _single(self.output_padding) # converting to list if was not already else: has_batch_dim = input.dim() == num_spatial_dims + 2 num_non_spatial_dims = 2 if has_batch_dim else 1 diff --git a/torch/nn/modules/utils.py b/torch/nn/modules/utils.py index 492556dab01e6..220b8f206b195 100644 --- a/torch/nn/modules/utils.py +++ b/torch/nn/modules/utils.py @@ -1,5 +1,5 @@ # mypy: allow-untyped-defs -import collections.abc +import collections from itertools import repeat from typing import Any @@ -10,18 +10,7 @@ def _ntuple(n, name="parse"): def parse(x): if isinstance(x, collections.abc.Iterable): - ret = tuple(x) - - # If the iterable is length 1, automatically expand to fill. This - # matches the behavior of expand_param_if_needed. - if len(ret) == 1: - return tuple(repeat(ret[0], n)) - - # Otherwise assert the correct length. - assert len(ret) == n, ( - f"Expected an iterable of length {n}, but got length {len(ret)}" - ) - return ret + return tuple(x) return tuple(repeat(x, n)) parse.__name__ = name From 082d3dd9d53a60deb022e203892f0c492cf2cce7 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Thu, 11 Sep 2025 22:17:57 +0000 Subject: [PATCH 125/693] [Triton] [Inductor] Restrict subprocess autotuning to just Triton (#162688) Summary: Restricts subprocess benchmarking to only `TritonTemplateCaller`, which is expected by the underlying `target` method. THhis triggered a bug with large K shapes because the decompose k is `SubgraphChoiceCaller`. Test Plan: mm autotuning with a large k and `TORCHINDUCTOR_AUTOTUNE_IN_SUBPROC=1` Rollback Plan: Differential Revision: D82181924 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162688 Approved by: https://github.com/PaulZhang12, https://github.com/eellison, https://github.com/mlazos --- torch/_inductor/select_algorithm.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py index b76eb8cf9f1a4..b2bdef0f7ef8d 100644 --- a/torch/_inductor/select_algorithm.py +++ b/torch/_inductor/select_algorithm.py @@ -3032,8 +3032,13 @@ def benchmark_in_sub_process( # only benchmark triton kernel in sub process for now. # ATen/Extern kernel are still benchmarked in the current process. - extern = [c for c in choices if isinstance(c, ExternKernelCaller)] - triton = [c for c in choices if not isinstance(c, ExternKernelCaller)] + extern = [] + triton = [] + for c in choices: + if isinstance(c, TritonTemplateCaller): + triton.append(c) + else: + extern.append(c) timings = cls.benchmark_in_current_process( extern, input_nodes, layout, input_gen_fns, hint_override=hint_override From 62843c14bbf694f5722fd6e1075da4792507fe42 Mon Sep 17 00:00:00 2001 From: Aaryaman Vasishta Date: Thu, 11 Sep 2025 22:35:09 +0000 Subject: [PATCH 126/693] [ROCm/Windows] Support aotriton for scaled_dot_product_attention on Windows. (#162330) Enables flash attention and/or memory efficient attention on Windows with scaled_dot_product_attention via. aotriton. Already tested to be working on Windows with TheRock. Steps to enable: simply set `USE_FLASH_ATTENTION=1` and `USE_MEM_EFF_ATTENTION=1` as usual. See https://github.com/ROCm/TheRock/blob/main/external-builds/pytorch/build_prod_wheels.py#L578-L604 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162330 Approved by: https://github.com/xinyazhang, https://github.com/ScottTodd, https://github.com/jeffdaily Co-authored-by: Scott Todd --- CMakeLists.txt | 4 +- .../native/transformers/cuda/attention.cu | 66 ++++++++++ .../transformers/hip/flash_attn/flash_api.h | 39 +----- cmake/External/aotriton.cmake | 113 +++++++++++++++++- tools/linter/dictionary.txt | 1 + 5 files changed, 179 insertions(+), 44 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a43e0da8f2ea..efad5419aaffa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -874,7 +874,7 @@ cmake_dependent_option( "Whether to build the flash_attention kernel for scaled dot product attention.\ Will be disabled if not supported by the platform" ON - "USE_CUDA OR USE_ROCM;NOT MSVC" + "USE_CUDA OR USE_ROCM" OFF) cmake_dependent_option( @@ -909,7 +909,7 @@ cmake_dependent_option( # USE_FLASH_ATTENTION -> USE_ROCM -> Dependencies.cmake -> aotriton.cmake # if(USE_ROCM) - if(UNIX AND (USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION)) + if(USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION) include(cmake/External/aotriton.cmake) endif() endif() diff --git a/aten/src/ATen/native/transformers/cuda/attention.cu b/aten/src/ATen/native/transformers/cuda/attention.cu index b8b43e0086c1a..c2193f2378dd5 100644 --- a/aten/src/ATen/native/transformers/cuda/attention.cu +++ b/aten/src/ATen/native/transformers/cuda/attention.cu @@ -95,6 +95,72 @@ #endif #endif +#if defined(USE_ROCM) && (defined(USE_FLASH_ATTENTION) || defined(USE_MEM_EFF_ATTENTION)) +namespace pytorch_flash +{ +std::tuple< + at::Tensor, + at::Tensor, + at::Tensor, + at::Tensor, + at::Tensor, + at::Tensor, + at::Tensor, + at::Tensor> +mha_fwd( + const at::Tensor& q, // batch_size x seqlen_q x num_heads x head_size + const at::Tensor& k, // batch_size x seqlen_k x num_heads_k x head_size + const at::Tensor& v, // batch_size x seqlen_k x num_heads_k x head_size + std::optional& + out_, // batch_size x seqlen_q x num_heads x head_size + std::optional& + alibi_slopes_, // num_heads or batch_size x num_heads + const float p_dropout, + const float softmax_scale, + bool is_causal, + std::optional window_size_left, + std::optional window_size_right, + const float softcap, + const bool return_softmax, + std::optional gen_) { +#if defined(USE_ROCM_CK_SDPA) + if (at::globalContext().getROCmFAPreferredBackend() == + at::ROCmFABackend::Ck) { + const int non_null_window_left = window_size_left.value_or(-1); + const int non_null_window_right = window_size_right.value_or(-1); + std::optional dummy_attn_bias = std::nullopt; + return mha_fwd_ck( + q, + k, + v, + out_, + p_dropout, + softmax_scale, + is_causal, + non_null_window_left, + non_null_window_right, + return_softmax, + gen_, + dummy_attn_bias); // Not used in flash attention + } +#endif + return mha_fwd_aot( + q, + k, + v, + out_, + alibi_slopes_, + p_dropout, + softmax_scale, + is_causal, + window_size_left, + window_size_right, + return_softmax, + gen_); +} +} +#endif + namespace at { namespace cuda::philox { diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h b/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h index f6f2240d4f091..71a1959065970 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h +++ b/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h @@ -270,7 +270,7 @@ std::tuple mha_varle #endif TORCH_API -inline std::tuple< +std::tuple< at::Tensor, at::Tensor, at::Tensor, @@ -294,42 +294,7 @@ mha_fwd( std::optional window_size_right, const float softcap, const bool return_softmax, - std::optional gen_) { -#if defined(USE_ROCM_CK_SDPA) - if (at::globalContext().getROCmFAPreferredBackend() == - at::ROCmFABackend::Ck) { - const int non_null_window_left = window_size_left.value_or(-1); - const int non_null_window_right = window_size_right.value_or(-1); - std::optional dummy_attn_bias = std::nullopt; - return mha_fwd_ck( - q, - k, - v, - out_, - p_dropout, - softmax_scale, - is_causal, - non_null_window_left, - non_null_window_right, - return_softmax, - gen_, - dummy_attn_bias); // Not used in flash attention - } -#endif - return mha_fwd_aot( - q, - k, - v, - out_, - alibi_slopes_, - p_dropout, - softmax_scale, - is_causal, - window_size_left, - window_size_right, - return_softmax, - gen_); -} + std::optional gen_); inline std::tuple< at::Tensor, diff --git a/cmake/External/aotriton.cmake b/cmake/External/aotriton.cmake index 5d91587746540..4f7a79a78bfc6 100644 --- a/cmake/External/aotriton.cmake +++ b/cmake/External/aotriton.cmake @@ -45,13 +45,88 @@ if(NOT __AOTRITON_INCLUDED) ) set(__AOTRITON_BASE_URL "https://github.com/ROCm/aotriton/releases/download/") # @lint-ignore set(__AOTRITON_Z "gz") + # Set the default __AOTRITON_LIB path + set(__AOTRITON_LIB "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so") + if(WIN32) + set(__AOTRITON_LIB "${__AOTRITON_INSTALL_DIR}/lib/aotriton_v2.lib") + endif() + + function(aotriton_build_windows_dependencies dlfcn-win32_external xz_external dlfcn-win32_DIR liblzma_DIR) + # Windows-specific dependencies - build these first + if(NOT noimage) + message(FATAL_ERROR "noimage must be ON for Windows builds") + endif() + # Build dlfcn-win32 + set(__DLFCN_WIN32_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/dlfcn-win32") + set(__DLFCN_WIN32_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/dlfcn-win32-install") + + ExternalProject_Add(${dlfcn-win32_external} + GIT_REPOSITORY https://github.com/dlfcn-win32/dlfcn-win32.git + GIT_TAG v1.4.2 + PREFIX ${__DLFCN_WIN32_PREFIX} + INSTALL_DIR ${__DLFCN_WIN32_INSTALL_DIR} + CMAKE_ARGS + -DCMAKE_INSTALL_PREFIX=${__DLFCN_WIN32_INSTALL_DIR} + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=cl + -DCMAKE_CXX_COMPILER=cl + -DBUILD_SHARED_LIBS=ON + -DBUILD_TESTS=OFF + BUILD_BYPRODUCTS + "${__DLFCN_WIN32_INSTALL_DIR}/lib/dl.lib" + "${__DLFCN_WIN32_INSTALL_DIR}/bin/dl.dll" + ) + ExternalProject_Add_Step(${dlfcn-win32_external} copy_to_aotriton + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${__DLFCN_WIN32_INSTALL_DIR}/bin/dl.dll" + "${__AOTRITON_INSTALL_DIR}/lib/" + DEPENDEES install + ) + set(${dlfcn-win32_DIR} "${__DLFCN_WIN32_INSTALL_DIR}/share/dlfcn-win32" CACHE PATH "Path to dlfcn-win32 CMake config" FORCE) + + # Build xz/liblzma + set(__XZ_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/xz") + set(__XZ_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/xz-install") + + ExternalProject_Add(${xz_external} + GIT_REPOSITORY https://github.com/tukaani-project/xz.git + GIT_TAG v5.8.1 + PREFIX ${__XZ_PREFIX} + INSTALL_DIR ${__XZ_INSTALL_DIR} + CMAKE_ARGS + -DCMAKE_INSTALL_PREFIX=${__XZ_INSTALL_DIR} + -DCMAKE_BUILD_TYPE=Release + -DBUILD_SHARED_LIBS=ON + -DENABLE_NLS=OFF + -DXZ_TOOL_LZMAINFO=OFF + -DXZ_TOOL_XZ=OFF + -DXZ_TOOL_XZDEC=OFF + -DXZ_TOOL_LZMADEC=OFF + BUILD_BYPRODUCTS + "${__XZ_INSTALL_DIR}/lib/lzma.lib" + "${__XZ_INSTALL_DIR}/bin/liblzma.dll" + ) + ExternalProject_Add_Step(${xz_external} copy_to_aotriton + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${__XZ_INSTALL_DIR}/bin/liblzma.dll" + "${__AOTRITON_INSTALL_DIR}/lib/" + DEPENDEES install + ) + set(${liblzma_DIR} "${__XZ_INSTALL_DIR}/lib/cmake/liblzma" CACHE PATH "Path to xz/liblzma CMake config" FORCE) + endfunction() + function(aotriton_build_from_source noimage project) if(noimage) SET(RECURSIVE "OFF") else() SET(RECURSIVE "ON") endif() + if(WIN32) + message(STATUS "Building AOTriton Windows dependencies") + aotriton_build_windows_dependencies(dlfcn-win32_external xz_external dlfcn-win32_DIR liblzma_DIR) + endif() message(STATUS "PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}") + ExternalProject_Add(${project} GIT_REPOSITORY https://github.com/ROCm/aotriton.git GIT_SUBMODULES_RECURSE ${RECURSIVE} @@ -65,12 +140,19 @@ if(NOT __AOTRITON_INCLUDED) -DAOTRITON_GPU_BUILD_TIMEOUT=0 -DAOTRITON_NO_PYTHON=ON -DAOTRITON_NOIMAGE_MODE=${noimage} - BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so" + -DHIP_PLATFORM=amd + $<$:-Ddlfcn-win32_DIR=${dlfcn-win32_DIR}> + $<$:-Dliblzma_DIR=${liblzma_DIR}> + BUILD_BYPRODUCTS + "${__AOTRITON_LIB}" USES_TERMINAL_DOWNLOAD TRUE USES_TERMINAL_CONFIGURE TRUE USES_TERMINAL_BUILD TRUE USES_TERMINAL_INSTALL TRUE ) + if(WIN32) + add_dependencies(${project} dlfcn-win32_external xz_external) + endif() endfunction() set(__AOTRITON_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR}) @@ -95,7 +177,7 @@ if(NOT __AOTRITON_INCLUDED) INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_BINARY_DIR}/aotriton_runtime" "${__AOTRITON_INSTALL_DIR}" - BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so" + BUILD_BYPRODUCTS "${__AOTRITON_LIB}" ) message(STATUS "Using AOTriton Runtime from pre-compiled binary ${__AOTRITON_URL}.\ Set env variables AOTRITON_INSTALL_FROM_SOURCE=1 to build from source.") @@ -111,14 +193,35 @@ if(NOT __AOTRITON_INCLUDED) string(CONCAT __AOTRITON_URL "${__AOTRITON_BASE_URL}" "${__AOTRITON_VER}/${__AOTRITON_FILE}") + + # Set up directories + set(__AOTRITON_DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_download-${image}) + set(__AOTRITON_EXTRACT_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_image-${image}) + set(__AOTRITON_INSTALL_SOURCE_DIR ${__AOTRITON_EXTRACT_DIR}) + set(__DOWNLOAD_NO_EXTRACT "") + set(__BUILD_COMMANDS "") + + # On Windows, we need custom tar extraction with UTF-8 support + if(WIN32) + set(__DOWNLOAD_NO_EXTRACT "DOWNLOAD_NO_EXTRACT;TRUE") + set(__BUILD_COMMANDS + COMMAND ${CMAKE_COMMAND} -E make_directory "${__AOTRITON_EXTRACT_DIR}" + COMMAND tar --options hdrcharset=UTF-8 -xf "${__AOTRITON_DOWNLOAD_DIR}/${__AOTRITON_FILE}" -C "${__AOTRITON_EXTRACT_DIR}" + ) + set(__AOTRITON_INSTALL_SOURCE_DIR ${__AOTRITON_EXTRACT_DIR}/aotriton) + endif() + ExternalProject_Add(${project} URL "${__AOTRITON_URL}" URL_HASH SHA256=${__AOTRITON_SHA256} - SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_image-${image} + DOWNLOAD_DIR ${__AOTRITON_DOWNLOAD_DIR} + ${__DOWNLOAD_NO_EXTRACT} + SOURCE_DIR ${__AOTRITON_EXTRACT_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" + ${__BUILD_COMMANDS} INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory - "${CMAKE_CURRENT_BINARY_DIR}/aotriton_image-${image}" + "${__AOTRITON_INSTALL_SOURCE_DIR}" "${__AOTRITON_INSTALL_DIR}" BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/aotriton.images/${image}/__signature__" @@ -164,7 +267,7 @@ if(NOT __AOTRITON_INCLUDED) endforeach() endforeach() endif() - target_link_libraries(__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so) + target_link_libraries(__caffe2_aotriton INTERFACE ${__AOTRITON_LIB}) target_include_directories(__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/include) set(AOTRITON_FOUND TRUE) endif() # __AOTRITON_INCLUDED diff --git a/tools/linter/dictionary.txt b/tools/linter/dictionary.txt index 706881a8f10f6..c4a250db04836 100644 --- a/tools/linter/dictionary.txt +++ b/tools/linter/dictionary.txt @@ -12,6 +12,7 @@ BU contiguities contiguity coo +DEPENDEES deser din dout From a3f01f6418667f791f36d928f7e912eb89be2e67 Mon Sep 17 00:00:00 2001 From: Nakul Iyer Date: Thu, 11 Sep 2025 22:47:03 +0000 Subject: [PATCH 127/693] [MTIA Runtime] Add foreach_div ops to native_functions.yaml (#162732) Summary: Quick fix for runtime support on foreach_div, see D81274963. Fixed an issue that I created in that diff so that the CIs pass. Test Plan: CIs created in D81274963 and D81286593 pass. Added some logs in [aten_mtia_ops.py](https://www.internalfb.com/code/fbsource/[c56272ba042c43c65517dcac254364cf732fcfa9]/fbcode/mtia/host_runtime/torch_mtia/aten_mtia_ops.cpp?lines=3676) to all the foreach_div ops. We can see that the correct MTIA kernels are being invoked in the tests. https://www.internalfb.com/intern/testinfra/testrun/15481123829281588 Rollback Plan: Differential Revision: D82161434 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162732 Approved by: https://github.com/danielhou0515 --- aten/src/ATen/native/native_functions.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 65ed5f402c2a3..3cf6f66468544 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -10699,6 +10699,7 @@ dispatch: CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow CUDA: foreach_tensor_div_list_kernel_cuda + MTIA: foreach_tensor_div_list_kernel_mtia - func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> () device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices @@ -10706,6 +10707,7 @@ dispatch: CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow_ CUDA: foreach_tensor_div_list_kernel_cuda_ + MTIA: foreach_tensor_div_list_kernel_mtia_ autogen: _foreach_div.List_out - func: _foreach_div.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[] @@ -10729,6 +10731,7 @@ dispatch: CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow CUDA: foreach_tensor_div_tensor_kernel_cuda + MTIA: foreach_tensor_div_tensor_kernel_mtia - func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> () device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices @@ -10736,6 +10739,7 @@ dispatch: CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow_ CUDA: foreach_tensor_div_tensor_kernel_cuda_ + MTIA: foreach_tensor_div_tensor_kernel_mtia_ autogen: _foreach_div.Tensor_out - func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[] From 429052f151959d524fc98c9c8b675f1dfde49f38 Mon Sep 17 00:00:00 2001 From: hanchchch Date: Thu, 11 Sep 2025 23:07:54 +0000 Subject: [PATCH 128/693] fix: raise value error on init ParametrizationList if original.device != new.device (#162717) raise value error on init `ParametrizationList`, if `original.device != new.device`. currently `_maybe_set` will throw below error in such situations, which I think it's not convenient to debug. ``` [rank1]: RuntimeError: Attempted to set the storage of a tensor on device "cuda:1" to a storage on different device "cpu". This is no longer allowed; the devices must match. ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162717 Approved by: https://github.com/lezcano --- test/nn/test_parametrization.py | 16 ++++++++++++++++ torch/nn/utils/parametrize.py | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/test/nn/test_parametrization.py b/test/nn/test_parametrization.py index eb1f7c982b7ca..da83ed26caaca 100644 --- a/test/nn/test_parametrization.py +++ b/test/nn/test_parametrization.py @@ -593,6 +593,22 @@ def right_inverse(self, w): parametrize.register_parametrization(module, "weight", ChangeDtypeInverse()) self.assertFalse(parametrize.is_parametrized(module)) + class ChangeDeviceInverse(nn.Module): + def forward(self, x): + return x.float() + + def right_inverse(self, w): + return w.to(torch.device("meta")) + + # For parametrizations that return one tensor, right_inverse may not change the device + with self.assertRaisesRegex( + ValueError, "outputs one tensor, it may not change the device" + ): + parametrize.register_parametrization( + module, "weight", ChangeDeviceInverse() + ) + self.assertFalse(parametrize.is_parametrized(module)) + # Doesn't return a tensor class NotTensor(nn.Module): def forward(self, x): diff --git a/torch/nn/utils/parametrize.py b/torch/nn/utils/parametrize.py index 11f7106b31491..25de247c6df68 100644 --- a/torch/nn/utils/parametrize.py +++ b/torch/nn/utils/parametrize.py @@ -185,6 +185,14 @@ def __init__( f"original.dtype: {original.dtype}\n" f"right_inverse(original).dtype: {new.dtype}" ) + + if original.device != new.device: + raise ValueError( + "When `right_inverse` outputs one tensor, it may not change the device.\n" + f"original.device: {original.device}\n" + f"right_inverse(original).device: {new.device}" + ) + # Set the original to original so that the user does not need to re-register the parameter # manually in the optimiser with torch.no_grad(): From 8e217a9f6dc81e3d12697b04c3e611d82d9d866a Mon Sep 17 00:00:00 2001 From: Zhengxu Chen Date: Thu, 11 Sep 2025 23:09:52 +0000 Subject: [PATCH 129/693] [precompile] Fix issues with guard serialization on distributed types. (#162418) Summary: Add more support for torch internal distributed data structures. Test Plan: test_guard_serialization.py Rollback Plan: Differential Revision: D81927732 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162418 Approved by: https://github.com/dolpm --- test/dynamo/test_guard_serialization.py | 80 +++++++++++-- torch/_dynamo/guards.py | 113 ++++++++++++++++-- torch/_dynamo/utils.py | 3 +- .../fsdp/_fully_shard/_fully_shard.py | 5 + 4 files changed, 178 insertions(+), 23 deletions(-) diff --git a/test/dynamo/test_guard_serialization.py b/test/dynamo/test_guard_serialization.py index e826492089f63..7e19de2973431 100644 --- a/test/dynamo/test_guard_serialization.py +++ b/test/dynamo/test_guard_serialization.py @@ -26,6 +26,7 @@ from torch._dynamo.utils import dynamo_timed, get_metrics_context from torch._guards import compile_context, CompileContext, tracing from torch.overrides import TorchFunctionMode +from torch.testing._internal.common_utils import IS_MACOS from torch.testing._internal.inductor_utils import HAS_GPU from torch.utils import _pytree as pytree @@ -54,6 +55,14 @@ def __torch_function__(self, func, types, args=(), kwargs=None): return func(*args, **kwargs) +class MyClass: + def __getstate__(self): + raise RuntimeError("Cannot pickle") + + def add(self, x): + return x + 1 + + class SubclassWithMeta(torch.Tensor): @staticmethod def __new__(cls, a, extra, outer_size=None, outer_stride=None): @@ -235,17 +244,7 @@ def __hash__(self): pytree.register_constant(CustomConstantType) -@torch._dynamo.config.patch({"strict_precompile": True}) -class TestGuardSerialization(torch._inductor.test_case.TestCase): - def test_function_locals(self): - def foo(x): - return x + 1 - - def fn(x, g): - return g(x) + 1 - - self._test_serialization("TENSOR_MATCH", fn, torch.randn(3), foo) - +class TestGuardSerializationBase(torch._inductor.test_case.TestCase): def _tracefunc(self, frame, event, arg): if event != "call": return @@ -379,6 +378,18 @@ def _test_check_fn(self, ref, loaded, inputs, expected): self.assertEqual(ref.check(inputs), expected) self.assertEqual(ref.check(inputs), loaded.check(inputs)) + +@torch._dynamo.config.patch({"strict_precompile": True}) +class TestGuardSerialization(TestGuardSerializationBase): + def test_function_locals(self): + def foo(x): + return x + 1 + + def fn(x, g): + return g(x) + 1 + + self._test_serialization("TENSOR_MATCH", fn, torch.randn(3), foo) + def test_tensor_match(self): def f(x: torch.Tensor): return x + 1 @@ -1346,6 +1357,53 @@ def forward(self, x): ref, loaded = self._test_serialization("TENSOR_MATCH", m, torch.randn(3, 2)) self._test_check_fn(ref, loaded, {"self": m, "x": torch.randn(3, 2)}, True) + def test_bound_method_input(self): + class MyModule(torch.nn.Module): + def forward(self, foo, x): + return x + id(type(foo)) + + m = MyModule() + ref, loaded = self._test_serialization( + "TYPE_MATCH", m, MyClass().add, torch.randn(3, 2) + ) + self._test_check_fn( + ref, loaded, {"self": m, "foo": MyClass().add, "x": torch.randn(3, 2)}, True + ) + + +class SimpleModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.p = torch.nn.Parameter(torch.randn(3, 2)) + + def forward(self, x): + z = x + 1 + for p in self.parameters(): + z += p + return z + + +if not IS_MACOS: + from torch.testing._internal.common_fsdp import FSDPTestMultiThread + + @torch._dynamo.config.patch({"strict_precompile": True}) + class TestGuardSerializationFSDP(TestGuardSerializationBase, FSDPTestMultiThread): + def setUp(self): + TestGuardSerializationBase.setUp(self) + FSDPTestMultiThread.setUp(self) + + def test_guard_serialization_fsdp_module(self): + from torch.distributed._tensor import distribute_tensor, Replicate + from torch.distributed.device_mesh import init_device_mesh + from torch.distributed.fsdp import fully_shard + + mesh = init_device_mesh(str(torch.get_default_device()), (1,)) + m = SimpleModule() + m = fully_shard(m, mesh=mesh) + inputs = distribute_tensor(torch.randn(3, 2), mesh, [Replicate()]) + ref, loaded = self._test_serialization("TENSOR_MATCH", m, inputs) + self._test_check_fn(ref, loaded, {"self": m, "x": inputs}, True) + if __name__ == "__main__": from torch._dynamo.test_case import run_tests diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index 05e1311cf7ebd..d8c4271f614b3 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -1047,6 +1047,7 @@ def __init__( self.already_guarded_not_present_in_generic_dict: OrderedSet[ tuple[str, str] ] = OrderedSet() + self.guard_tree_values: dict[int, Any] = {} def guard_on_dict_keys_and_ignore_order( self, example_value: dict[Any, Any], guard: Guard @@ -1340,6 +1341,7 @@ def get_guard_manager_from_source(self, source: Source) -> GuardManager: if source_name != "": example_value = self.get(source_name) + self.guard_tree_values[id(example_value)] = example_value guard_manager_enum = self.get_guard_manager_type(source, example_value) @@ -3099,11 +3101,34 @@ class _Missing: pass +@functools.cache +def _get_unsupported_types() -> tuple[type, ...]: + # We only do ID_MATCH on C objects which is already banned from guards serialization. + ret: tuple[type, ...] = ( + types.CodeType, + torch._C.Stream, + weakref.ReferenceType, + ) + try: + ret += (torch._C._distributed_c10d.ProcessGroup,) + except AttributeError: + pass + return ret + + class GuardsStatePickler(pickle.Pickler): - def __init__(self, *args: Any, **kwargs: Any) -> None: + def __init__( + self, + guard_tree_values: dict[int, Any], + empty_values: dict[int, Any], + *args: Any, + **kwargs: Any, + ) -> None: super().__init__(*args, **kwargs) self.fake_mode = torch._subclasses.FakeTensorMode() self.tensor_converter = torch._subclasses.fake_tensor.FakeTensorConverter() + self.guard_tree_values = guard_tree_values + self.empty_values = empty_values @classmethod def _unpickle_module(cls, state: Any) -> torch.nn.Module: @@ -3175,6 +3200,18 @@ def _unpickle_mapping_proxy( ) -> types.MappingProxyType[Any, Any]: return types.MappingProxyType(d) + @classmethod + def _unpickle_dict_keys(cls, elems: list[Any]) -> Any: + return dict.fromkeys(elems).keys() + + @classmethod + def _unpickle_fsdp_module_type( + cls, original_type: type[torch.nn.Module] + ) -> type[torch.nn.Module]: + return torch.distributed.fsdp._fully_shard._fully_shard.get_cls_to_fsdp_cls()[ + original_type + ] + @classmethod def _unpickle_c_op(cls, name: str) -> Any: return getattr(torch.ops._C, name) @@ -3184,9 +3221,15 @@ def reducer_override( ) -> Union[tuple[Callable[..., Any], tuple[Any, ...]], Any]: import sympy + if id(obj) in self.empty_values: + return type(obj).__new__, (type(obj),) + if isinstance(obj, torch.Tensor) and obj.device.type != "meta": from torch.utils._python_dispatch import is_traceable_wrapper_subclass + if id(obj) not in self.guard_tree_values: + return _Missing, () + if is_traceable_wrapper_subclass(obj): # inner_data is a list of tuples of: # (inner attr name, unpickle func, tuple of func inputs) @@ -3196,6 +3239,8 @@ def reducer_override( # recursively call for inner tensor components for attr in attrs: inner = getattr(obj, attr) + if isinstance(inner, torch.Tensor): + self.guard_tree_values[id(inner)] = inner func, args_tuple = self.reducer_override(inner) inner_data.append((attr, func, args_tuple)) @@ -3245,6 +3290,9 @@ def reducer_override( elif isinstance(obj, types.MappingProxyType): return type(self)._unpickle_mapping_proxy, (obj.copy(),) + elif isinstance(obj, torch._dynamo.utils.dict_keys): + return type(self)._unpickle_dict_keys, (list(obj),) + elif isinstance( obj, torch._ops.OpOverloadPacket ) and obj._qualified_op_name.startswith("_C::"): @@ -3257,14 +3305,18 @@ def reducer_override( # Skipping PyCapsule since there isn't much to be guarded about them. return _Missing, () - elif isinstance(obj, types.CodeType): - # We only do ID_MATCH on code objects which is already banned from guards serialization. + elif isinstance(obj, _get_unsupported_types()): return _Missing, () - elif inspect.isfunction(obj) and (obj.__code__.co_flags & inspect.CO_NESTED): - # Skipping nested function since CLOSURE_MATCH is banned from guards serialization. - assert obj.__qualname__ != obj.__name__ - return _Missing, () + elif inspect.isfunction(obj): + if obj.__code__.co_flags & inspect.CO_NESTED: + return _Missing, () + if obj.__module__ in sys.modules: + f = sys.modules[obj.__module__] + for name in obj.__qualname__.split("."): + f = getattr(f, name, None) # type: ignore[assignment] + if f is not obj: + return _Missing, () if type(obj).__qualname__ != type(obj).__name__: raise torch._dynamo.exc.PackageError( @@ -3273,12 +3325,44 @@ def reducer_override( + "Please define the class at global scope (top level of a module)." ) + if hasattr(torch.distributed, "distributed_c10d") and isinstance( + obj, torch.distributed.distributed_c10d.Work + ): + if id(obj) not in self.guard_tree_values: + return _Missing, () + + if ( + inspect.isclass(obj) + and hasattr(torch.distributed, "fsdp") + and issubclass(obj, torch.distributed.fsdp._fully_shard.FSDPModule) + ): + if obj is not torch.distributed.fsdp._fully_shard.FSDPModule: + original_type = obj.__mro__[2] + assert issubclass(original_type, torch.nn.Module) + assert ( + original_type + in torch.distributed.fsdp._fully_shard._fully_shard.get_cls_to_fsdp_cls() + ) + return type(self)._unpickle_fsdp_module_type, (original_type,) + return NotImplemented -def pickle_guards_state(state: GuardsState) -> bytes: +def pickle_guards_state(state: GuardsState, guard_tree_values: dict[int, Any]) -> bytes: buf = io.BytesIO() - pickler = GuardsStatePickler(buf) + empty_values = {} + + leaves = pytree.tree_leaves(state.output_graph.local_scope) + for leaf in leaves: + if inspect.ismethod(leaf) and hasattr(leaf, "__self__"): + base = leaf.__self__ + if id(base) not in guard_tree_values: + try: + type(base).__new__(type(base)) + empty_values[id(base)] = base + except: # noqa: E722, B001 + pass + pickler = GuardsStatePickler(guard_tree_values, empty_values, buf) try: pickler.dump(state) except AttributeError as e: @@ -3341,8 +3425,15 @@ def guard_filter_fn(guards: list[GuardFilterEntry]) -> list[bool]: if not keep: ret.append(False) elif ( - g.guard_type in ("ID_MATCH", "CLOSURE_MATCH", "WEAKREF_ALIVE") + g.guard_type + in ( + "ID_MATCH", + "CLOSURE_MATCH", + "WEAKREF_ALIVE", + "DICT_VERSION", + ) or "ID_MATCH" in g.derived_guard_types + or "DICT_VERSION" in g.derived_guard_types ): log.warning( "%s guard on %s is dropped with caching_precompile=True.", @@ -3631,7 +3722,7 @@ def _ref(x: Any) -> Any: shape_code_parts=self.shape_code_parts, ) - return pickle_guards_state(guards_state) + return pickle_guards_state(guards_state, builder.guard_tree_values) def build_guards( self, diff --git a/torch/_dynamo/utils.py b/torch/_dynamo/utils.py index 058a66cf5b772..9d3b6bcd43cf6 100644 --- a/torch/_dynamo/utils.py +++ b/torch/_dynamo/utils.py @@ -709,6 +709,7 @@ def _foo(...): if key not in compilation_time_metrics: compilation_time_metrics[key] = [] + metrics = compilation_time_metrics[key] event_metadata = {} if metadata: event_metadata.update(metadata) @@ -756,7 +757,7 @@ def _foo(...): finally: end_ns = time.time_ns() time_spent_ns = end_ns - start_ns - compilation_time_metrics[key].append(time_spent_ns / 1e9) + metrics.append(time_spent_ns / 1e9) chromium_log.log_event_end( event_name, end_ns, {}, start_ns, log_pt2_compile_event, compile_id ) diff --git a/torch/distributed/fsdp/_fully_shard/_fully_shard.py b/torch/distributed/fsdp/_fully_shard/_fully_shard.py index eb348a00f5f98..4b116e415042b 100644 --- a/torch/distributed/fsdp/_fully_shard/_fully_shard.py +++ b/torch/distributed/fsdp/_fully_shard/_fully_shard.py @@ -45,12 +45,17 @@ "FSDPModule", "UnshardHandle", "register_fsdp_forward_method", + "get_cls_to_fsdp_cls", ] cls_to_fsdp_cls: dict[type, type] = {} +def get_cls_to_fsdp_cls() -> dict[type, type]: + return cls_to_fsdp_cls + + @overload def fully_shard( module: nn.Module, From 92f9ed7ac37be57ec883e1cca798654b5b581607 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Thu, 11 Sep 2025 23:51:21 +0000 Subject: [PATCH 130/693] Revert "[2/N]Port several test files under test/distributed to Intel GPU (#159473)" This reverts commit fa1d409e83af93425a2672d62e134e8f20c5ccc0. Reverted https://github.com/pytorch/pytorch/pull/159473 on behalf of https://github.com/huydhn due to Sorry for reverting your change but it seems to break an distributed tests ([comment](https://github.com/pytorch/pytorch/pull/159473#issuecomment-3282999084)) --- test/distributed/test_c10d_common.py | 67 +++------ .../test_c10d_functional_native.py | 77 +++++----- test/distributed/test_device_mesh.py | 41 ++---- test/distributed/test_dynamo_distributed.py | 138 ++++++------------ test/distributed/test_inductor_collectives.py | 138 +++++++----------- test/distributed/test_store.py | 14 +- test/distributions/test_distributions.py | 39 ++--- test/inductor/test_snode_runtime.py | 18 +++ torch/distributed/distributed_c10d.py | 6 +- torch/testing/_internal/common_distributed.py | 29 ++-- .../testing/_internal/distributed/fake_pg.py | 2 +- 11 files changed, 233 insertions(+), 336 deletions(-) diff --git a/test/distributed/test_c10d_common.py b/test/distributed/test_c10d_common.py index 89afc369fe149..1857feffd9394 100644 --- a/test/distributed/test_c10d_common.py +++ b/test/distributed/test_c10d_common.py @@ -43,7 +43,6 @@ retry_on_connect_failures, run_tests, TEST_WITH_DEV_DBG_ASAN, - TEST_XPU, TestCase, ) from torch.utils.checkpoint import checkpoint @@ -64,8 +63,6 @@ torch.backends.cuda.matmul.allow_tf32 = False -device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" - def gpus_for_rank(world_size): """Multigpu tests are designed to simulate the multi nodes with multi @@ -73,9 +70,8 @@ def gpus_for_rank(world_size): On a single node, all visible GPUs are evenly divided to subsets, each process only uses a subset. """ - device_count = torch.accelerator.device_count() - visible_devices = list(range(device_count)) - gpus_per_process = device_count // world_size + visible_devices = list(range(torch.cuda.device_count())) + gpus_per_process = torch.cuda.device_count() // world_size gpus_for_rank = [] for rank in range(world_size): gpus_for_rank.append( @@ -405,7 +401,7 @@ def _prepare_multi_device_module( gradient_as_bucket_view=gradient_as_bucket_view, ) - input = torch.randn(global_batch_size, 2).to(devices[0]) + input = torch.randn(global_batch_size, 2).cuda(devices[0]) target = torch.randn(global_batch_size, 4) return model, ddp_model, input, target @@ -439,10 +435,10 @@ def _test_ddp_checkpointing( allow_none_grads=False, ): # to reproduce the same training results - torch.accelerator.set_device_index(self.rank) + torch.cuda.set_device(self.rank) torch.manual_seed(31415) - model = copy.deepcopy(input_model).to(device_type) - ddp_model = copy.deepcopy(input_model).to(device_type) + model = copy.deepcopy(input_model).cuda() + ddp_model = copy.deepcopy(input_model).cuda() ddp_model = nn.parallel.DistributedDataParallel( ddp_model, bucket_cap_mb=1, @@ -558,8 +554,8 @@ def __init__(self, use_reentrant=True): def _prepare_dummy_data(self): ddp_bs = 16 bs = ddp_bs * self.world_size - input = torch.rand((bs, 20), device=device_type, requires_grad=True) - target = torch.randn((bs, 20), device=device_type) + input = torch.rand((bs, 20), device="cuda", requires_grad=True) + target = torch.randn((bs, 20), device="cuda") offset = self.rank * ddp_bs ddp_input = input[offset : offset + ddp_bs] ddp_target = target[offset : offset + ddp_bs] @@ -719,7 +715,7 @@ def test_ddp_checkpointing_weight_sharing(self, use_reentrant): Test that checkpointing with weight sharing works. """ process_group = self._get_process_group() - torch.accelerator.set_device_index(self.rank) + torch.cuda.set_device(self.rank) for use_bucket_view, static_graph in product((False, True), (False, True)): torch.manual_seed(31415) l1 = nn.Linear(20, 20) @@ -742,7 +738,7 @@ def test_ddp_checkpointing_twice_weight_sharing(self): same layer twice and having weights shared across layers. """ process_group = self._get_process_group() - torch.accelerator.set_device_index(self.rank) + torch.cuda.set_device(self.rank) for use_bucket_view in (True, False): self._test_ddp_checkpointing( self.CheckpointTwiceModuleWeightSharing(), @@ -1166,7 +1162,7 @@ def _test_sequence_num_incremented(self, process_group, ranks): # Verify sequence numbers are appropriately incremented for i in range(10): - t = torch.ones(1, device=device_type) + t = torch.ones(1, device=torch.cuda.current_device()) dist.all_reduce(t, group=process_group) if not c10d._rank_not_in_group(process_group): seq_num = self._verify_sequence_number_across_pg( @@ -1197,7 +1193,7 @@ def _test_sequence_num_incremented(self, process_group, ranks): self.assertEqual(rank_to_seq_num[0] + 1, rank_to_seq_num[1]) def _test_sequence_num_incremented_default_group(self, backend_name): - torch.accelerator.set_device_index(self.rank) + torch.cuda.set_device(self.rank) store = dist.FileStore(self.file_name, self.world_size) dist.init_process_group( backend_name, @@ -1211,7 +1207,7 @@ def _test_sequence_num_incremented_default_group(self, backend_name): ) def _test_sequence_num_incremented_subgroup(self, backend_name): - torch.accelerator.set_device_index(self.rank) + torch.cuda.set_device(self.rank) store = dist.FileStore(self.file_name, self.world_size) dist.init_process_group( backend_name, @@ -1266,8 +1262,8 @@ def _test_warn_not_in_group(self, backend): in_group_ranks = list(filter(lambda x: x % 2 == 0, range(self.world_size))) group = dist.new_group(in_group_ranks) - x = torch.zeros(2, 2).to(self.rank) - xs = [torch.zeros(2, 2).to(self.rank) for _ in range(len(in_group_ranks))] + x = torch.zeros(2, 2).cuda(self.rank) + xs = [torch.zeros(2, 2).cuda(self.rank) for _ in range(len(in_group_ranks))] if self.rank not in in_group_ranks: msg = ".*{}.*does not belong to.*" with self.assertWarnsOnceRegex(UserWarning, msg.format("all_gather")): @@ -1396,7 +1392,7 @@ def _test_bool_tensors(self, backend): rank=self.rank, store=store, ) - device = "cuda" if backend == "nccl" else "xpu" if backend == "xccl" else "cpu" + device = "cuda" if backend == "nccl" else "cpu" # test alltoall_base tensor = torch.tensor([1, 0, 0, 1], dtype=torch.bool, device=device) zeros = torch.tensor([0, 0, 0, 0], dtype=torch.bool, device=device) @@ -1578,8 +1574,8 @@ def test_debug_level(self): class DummyWork(dist._Work): def wait(self, timeout=5.0): - if torch.accelerator.is_available(): - torch.accelerator.current_stream().synchronize() + if torch.cuda.is_available(): + torch.cuda.current_stream().synchronize() return True @@ -1794,18 +1790,6 @@ def test_backend_config(self): ("cpu:gloo,cuda:nccl", "cpu:gloo,cuda:nccl"), ] - if TEST_XPU: - # Override backend_config_strings_and_expected_values for Intel GPU. - backend_config_strings_and_expected_values[4:10] = [ - (dist.Backend.DUMMY, "cpu:dummy,cuda:dummy,xpu:dummy"), - ("DUMMY", "cpu:dummy,cuda:dummy,xpu:dummy"), - ("dummy", "cpu:dummy,cuda:dummy,xpu:dummy"), - ("cpu:dummy,xpu:dummy", "cpu:dummy,xpu:dummy"), - ("cpu:dummy,xpu:xccl", "cpu:dummy,xpu:xccl"), - ("cpu:gloo,xpu:dummy", "cpu:gloo,xpu:dummy"), - ("cpu:gloo,xpu:xccl", "cpu:gloo,xpu:xccl"), - ] - for config_str, expected_value in backend_config_strings_and_expected_values: with self.subTest(config_str): # ensures these configs strings are valid and no ValueError is raised @@ -1816,8 +1800,6 @@ def test_backend_config(self): invalid_backend_config_strings = [ "cpu:gloo,cuda:nccl,", # trailing comma "cpu:gloo,cuda:nccl,cpu:dummy", # duplicate device - "cpu:gloo,xpu:xccl,", # trailing comma - "cpu:gloo,xpu:xccl,cpu:dummy", # duplicate device ] for config_str in invalid_backend_config_strings: with self.subTest(config_str): @@ -1832,7 +1814,7 @@ def test_init_process_group_with_multiple_backends(self): os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = "6789" dist.init_process_group( - "cpu:dummy,cuda:dummy,xpu:dummy", rank=self.rank, world_size=self.world_size + "cpu:dummy,cuda:dummy", rank=self.rank, world_size=self.world_size ) # test all_gather @@ -2071,7 +2053,7 @@ def _call_collective_with_varying_tensors(self, backend, collective, *args): # correctly dispatched # TODO: this will be updated in the future to not be backend specific - device = "cuda" if backend == "nccl" else "xpu" if backend == "xccl" else "cpu" + device = "cuda" if backend == "nccl" else "cpu" # ensure supported devices (cpu, cuda) succeeds during dispatch call tensor = torch.zeros(2, 2, device=torch.device(device)) # multi tensor collectives @@ -2137,7 +2119,7 @@ def _test_all_to_all_single(self, backend): rank=self.rank, store=store, ) - device = "cuda" if backend == "nccl" else "xpu" if backend == "xccl" else "cpu" + device = "cuda" if backend == "nccl" else "cpu" # test alltoall_base input_tensor = torch.ones(2, 2, device=torch.device(device)) output_tensor = torch.zeros(2, 2, device=torch.device(device)) @@ -2269,9 +2251,8 @@ def testNodeLocalRank(self): if __name__ == "__main__": - if device_type != "cpu": - assert not torch.get_device_module()._initialized, ( - "test_distributed must not have initialized {device_type} context on main process" - ) + assert not torch.cuda._initialized, ( + "test_distributed must not have initialized CUDA context on main process" + ) run_tests() diff --git a/test/distributed/test_c10d_functional_native.py b/test/distributed/test_c10d_functional_native.py index 930f034759395..bafc781b591c6 100644 --- a/test/distributed/test_c10d_functional_native.py +++ b/test/distributed/test_c10d_functional_native.py @@ -24,7 +24,7 @@ from torch.testing._internal.common_cuda import SM90OrLater from torch.testing._internal.common_distributed import ( MultiProcessTestCase, - requires_accelerator_dist_backend, + requires_nccl, skip_if_lt_x_gpu, ) from torch.testing._internal.common_utils import ( # type: ignore[attr-defined] @@ -59,7 +59,7 @@ def load_test_module(name): sys.exit(0) -@requires_accelerator_dist_backend(["nccl", "xccl"]) +@requires_nccl() class TestWithNCCL(MultiProcessTestCase): def setUp(self) -> None: super().setUp() @@ -75,15 +75,13 @@ def ranks(self) -> list[int]: @property def device(self) -> torch.device: - return torch.device(self.rank) + return torch.device(f"cuda:{self.rank}") def _init_process_group(self) -> None: - torch.accelerator.set_device_index(self.rank) + torch.cuda.set_device(self.device) store = dist.FileStore(self.file_name, self.world_size) - backend = dist.get_default_backend_for_device(self.device.type) - dist.init_process_group( - backend=backend, + backend="nccl", world_size=self.world_size, rank=self.rank, store=store, @@ -275,7 +273,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: ) # check memory leak for i in range(1, 10): - mem_usage[i] = torch.accelerator.max_memory_allocated() + mem_usage[i] = torch.cuda.max_memory_allocated() compiled(arg) assert mem_usage[9] == mem_usage[8] @@ -372,16 +370,14 @@ def test_reduce_scatter_tensor_coalesced(self) -> None: @skip_if_lt_x_gpu(2) def test_all_to_all_single(self) -> None: self._init_process_group() - torch.accelerator.set_device_index(self.rank) + torch.cuda.set_device(self.device) torch.manual_seed(42) send_sz_matrix = torch.randint(0, 20, (self.world_size, self.world_size)) input_split_sizes = send_sz_matrix[self.rank].tolist() output_split_sizes = send_sz_matrix[:, self.rank].tolist() - input = torch.full((sum(input_split_sizes),), float(self.rank)).to( - self.device.type - ) + input = torch.full((sum(input_split_sizes),), float(self.rank)).cuda() output = torch.ops._c10d_functional.all_to_all_single( input, @@ -392,7 +388,7 @@ def test_all_to_all_single(self) -> None: output = torch.ops._c10d_functional.wait_tensor(output) expect = torch.cat( [ - torch.full((sz,), float(rank)).to(self.device.type) + torch.full((sz,), float(rank)).cuda() for rank, sz in enumerate(output_split_sizes) ] ) @@ -468,7 +464,7 @@ def test_unwaited(self) -> None: @fresh_cache() def test_threading(self): self._init_process_group() - device = self.device + device = torch.device(f"cuda:{self.rank}") def func(arg: torch.Tensor) -> torch.Tensor: buf0 = arg + 42 @@ -550,9 +546,9 @@ def fp8_rowwise_backward(in_, w, out_grad): return in_grad, w_grad m, n, k = 128, 256, 64 - in_ = torch.randn((m, k), device=self.device.type, dtype=torch.bfloat16) - w = torch.randn((n, k), device=self.device.type, dtype=torch.bfloat16) - out_grad = torch.randn((m, n), device=self.device.type, dtype=torch.bfloat16) + in_ = torch.randn((m, k), device="cuda", dtype=torch.bfloat16) + w = torch.randn((n, k), device="cuda", dtype=torch.bfloat16) + out_grad = torch.randn((m, n), device="cuda", dtype=torch.bfloat16) eager_in_grad, eager_w_grad = fp8_rowwise_backward(in_, w, out_grad) compile_in_grad, compile_w_grad = torch.compile(fp8_rowwise_backward)( @@ -781,8 +777,7 @@ def setUp(self): self.rank = 0 self.world_size = 2 - torch.accelerator.set_device_index(0) - self.device = torch.accelerator.current_accelerator() + torch.cuda.set_device("cuda:0") store = FakeStore() dist.init_process_group( @@ -808,7 +803,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: ar1 = funcol.wait_tensor(ar1) return ar0, ar1 - arg = torch.rand(4, 4, device=self.device) + arg = torch.rand(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) @@ -841,7 +836,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (arg,)) - torch.accelerator.synchronize() + torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -856,7 +851,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: ar1 = [funcol.wait_tensor(out) for out in ar1] return ar0, ar1 - args = [torch.rand(4, 4, device=self.device.type) for _ in range(2)] + args = [torch.rand(4, 4, device="cuda") for _ in range(2)] compiled = torch.compile(func) code = run_and_get_triton_code(compiled, args) buf0, buf1, buf2, buf3 = find_buffer_assignments(code) @@ -886,7 +881,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: # Test aoti out = AOTIRunnerUtil.run(func, (args,)) # noqa: F841 - torch.accelerator.synchronize() + torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -897,7 +892,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: ar0 = funcol.wait_tensor(ar0) return ar0 - arg = torch.rand(4, 4, device=self.device.type) + arg = torch.rand(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) @@ -922,7 +917,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Expect allocation return ar0 - arg = torch.rand(4, 4, device=self.device.type).T + arg = torch.rand(4, 4, device="cuda").T compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) @@ -953,7 +948,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: buf2 = torch.mm(arg, buf1) return buf1, buf2 - arg = torch.rand(4, 4, device=self.device.type) + arg = torch.rand(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) buf0, buf1 = find_buffer_assignments(code) @@ -983,7 +978,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: ag0 = funcol.wait_tensor(ag0) return ag0 - arg = torch.rand(4, 4, device=self.device.type) + arg = torch.rand(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) ( @@ -1000,7 +995,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (arg,)) - torch.accelerator.synchronize() + torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -1010,7 +1005,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: ag0 = [funcol.wait_tensor(out) for out in ag0] return ag0 - args = [torch.rand(4, 4, device=self.device.type) for _ in range(4)] + args = [torch.rand(4, 4, device="cuda") for _ in range(4)] compiled = torch.compile(func) code = run_and_get_triton_code(compiled, args) ( @@ -1034,7 +1029,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: # Test aoti out = AOTIRunnerUtil.run(func, (args,)) # noqa: F841 - torch.accelerator.synchronize() + torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "This is a GPU test!") @fresh_cache() @@ -1044,7 +1039,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: return funcol.wait_tensor(t) # Test aoti - arg = torch.rand(4, 4, device=self.device.type) + arg = torch.rand(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) ( @@ -1056,7 +1051,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (arg,)) - torch.accelerator.synchronize() + torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -1066,7 +1061,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: rs0 = funcol.wait_tensor(rs0) return rs0 - arg = torch.rand(4, 4, device=self.device.type) + arg = torch.rand(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) ( @@ -1082,7 +1077,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (arg,)) - torch.accelerator.synchronize() + torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -1094,7 +1089,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: rs0 = [funcol.wait_tensor(out) for out in rs0] return rs0 - args = [torch.rand(4, 4, device=self.device.type) for _ in range(4)] + args = [torch.rand(4, 4, device="cuda") for _ in range(4)] compiled = torch.compile(func) code = run_and_get_triton_code(compiled, args) ( @@ -1118,7 +1113,7 @@ def func(args: list[torch.Tensor]) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (args,)) - torch.accelerator.synchronize() + torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -1147,9 +1142,7 @@ def func( input_split_sizes = send_sz_matrix[self.rank] output_split_sizes = send_sz_matrix[:, self.rank].contiguous() - input = torch.full((input_split_sizes.sum().item(),), float(self.rank)).to( - self.device.type - ) + input = torch.full((input_split_sizes.sum().item(),), float(self.rank)).cuda() with torch._dynamo.config.patch( dynamic_shapes=True, @@ -1183,7 +1176,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: br1 = funcol.wait_tensor(br1) return br0, br1 - arg = torch.rand(4, 4, device=self.device.type) + arg = torch.rand(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, arg) @@ -1206,7 +1199,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: # Test aoti AOTIRunnerUtil.run(func, (arg,)) - torch.accelerator.synchronize() + torch.cuda.synchronize() @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @fresh_cache() @@ -1221,7 +1214,7 @@ def func(arg: torch.Tensor) -> torch.Tensor: ar1 = funcol.wait_tensor(ar1) return ar0, ar1 - arg = torch.rand(4, 4, device=self.device.type) + arg = torch.rand(4, 4, device="cuda") compiled = torch.compile(func, fullgraph=True) code = run_and_get_triton_code(compiled, arg) diff --git a/test/distributed/test_device_mesh.py b/test/distributed/test_device_mesh.py index 29b66ade63efd..98557c9fe941a 100644 --- a/test/distributed/test_device_mesh.py +++ b/test/distributed/test_device_mesh.py @@ -1,7 +1,6 @@ # Copyright (c) Meta Platforms, Inc. and affiliates # Owner(s): ["oncall: distributed"] import os -import unittest import torch import torch.distributed as dist @@ -27,7 +26,7 @@ ) from torch.distributed.tensor.placement_types import _Partial, Shard from torch.testing._internal.common_distributed import skip_if_lt_x_gpu -from torch.testing._internal.common_utils import run_tests, TEST_XPU +from torch.testing._internal.common_utils import run_tests from torch.testing._internal.distributed._tensor.common_dtensor import ( DTensorTestBase, with_comms, @@ -36,10 +35,6 @@ from torch.utils._typing_utils import not_none -device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" -device_count = torch.accelerator.device_count() - - def _set_env_var(addr="localhost", port="25364", world_size=1, rank=0, local_rank=-1): os.environ["MASTER_ADDR"] = addr os.environ["MASTER_PORT"] = port @@ -49,7 +44,6 @@ def _set_env_var(addr="localhost", port="25364", world_size=1, rank=0, local_ran os.environ["LOCAL_RANK"] = f"{local_rank}" -@unittest.skipIf(TEST_XPU, "XPU does not support gloo backend.") class DeviceMeshTestGlooBackend(DTensorTestBase): @property def backend(self): @@ -79,16 +73,14 @@ def test_manual_set_device(self): # Set the device on each process before DeviceMesh constructor, # and device to be different than the default world rank - torch.accelerator.set_device_index((self.rank + 2) % self.world_size) + torch.cuda.set_device((self.rank + 2) % self.world_size) _set_env_var(world_size=self.world_size, rank=self.rank) DeviceMesh(self.device_type, mesh_tensor) self.assertTrue(is_initialized()) # check that the device is set to the correct device # and respect the previous set_device calls - self.assertEqual( - torch.accelerator.current_device_idx(), (self.rank + 2) % self.world_size - ) + self.assertEqual(torch.cuda.current_device(), (self.rank + 2) % self.world_size) self.destroy_pg() @skip_if_lt_x_gpu(4) @@ -109,7 +101,7 @@ def test_auto_set_device_from_local_rank(self): # check that the device is set to the correct device # and respect the LOCAL_RANK env var - self.assertEqual(torch.accelerator.current_device_idx(), local_rank) + self.assertEqual(torch.cuda.current_device(), local_rank) self.destroy_pg() @skip_if_lt_x_gpu(4) @@ -128,7 +120,7 @@ def test_auto_set_device_from_heuristic(self): self.assertTrue(is_initialized()) # check that the device is set to the correct device - self.assertEqual(torch.accelerator.current_device_idx(), self.rank) + self.assertEqual(torch.cuda.current_device(), self.rank) self.destroy_pg() @@ -230,7 +222,7 @@ def test_get_local_rank(self): @with_comms def test_device_mesh_2d(self): mesh_tensor = torch.arange(4).reshape(2, 2) - # construct a device mesh for self.device_type + # construct a cuda device mesh mesh = DeviceMesh(self.device_type, mesh_tensor) # check all dim groups @@ -265,10 +257,10 @@ def test_device_mesh_init_backend(self): # we call init_backend we should make sure the default pg already created self.assertEqual(mesh.get_coordinate(), [5]) - @unittest.skipIf(not torch.accelerator.is_available(), "No accelerator available!") def test_fake_pg_device_mesh(self): fake_store = FakeStore() init_process_group("fake", store=fake_store, rank=0, world_size=self.world_size) + device_type = "cuda" if torch.cuda.is_available() else "cpu" mesh = DeviceMesh(device_type, torch.arange(self.world_size)) local_tensor = torch.randn(2, 8) @@ -308,7 +300,7 @@ def test_from_group_with_invalid_mesh(self): regex = r"Invalid mesh \[\[0, 1\], \[2, 3\]\] for ProcessGroup with ranks \[0, 1, 2, 3\]" with self.assertRaisesRegex(ValueError, regex): DeviceMesh.from_group( - global_pg, device_type, invalid_mesh, mesh_dim_names=("dim0", "dim1") + global_pg, "cuda", invalid_mesh, mesh_dim_names=("dim0", "dim1") ) device_mesh = init_device_mesh(self.device_type, (2, 2)) @@ -328,11 +320,12 @@ def test_raises_invalid_device_type(self): # test init_device_mesh with an invalid device type that contains a GPU index mesh_shape = (2, self.world_size // 2) init_device_mesh( - f"{device_type}:0", mesh_shape=mesh_shape, mesh_dim_names=("dp", "tp") + "cuda:0", mesh_shape=mesh_shape, mesh_dim_names=("dp", "tp") ) @with_comms def test_set_mesh_dim_group_options(self): + device_type = "cuda" if torch.cuda.is_available() else "cpu" _mesh_resources._set_mesh_dim_group_options(1, "fake", None) mesh_tensor = torch.arange(4).reshape(2, 2) @@ -348,7 +341,7 @@ def world_size(self): @with_comms def test_device_mesh_nd(self): - # construct a device mesh for self.device_type + # construct a cuda device mesh mesh_tensor = torch.arange(8).reshape(2, 2, 2) mesh = DeviceMesh(self.device_type, mesh_tensor) @@ -717,9 +710,7 @@ def test_raises_invalid_mesh_dim_name(self): with self.assertRaisesRegex(KeyError, "Invalid mesh_dim_name"): mesh_dim_names = ("DP", "TP") mesh = init_device_mesh( - self.device_type, - (2, 4), - mesh_dim_names=mesh_dim_names, + self.device_type, (2, 4), mesh_dim_names=mesh_dim_names ) mesh[child_mesh_dim_name] @@ -947,9 +938,7 @@ def world_size(self): @with_comms def test_get_root_mesh(self): mesh_3d = init_device_mesh( - self.device_type, - (2, 2, 2), - mesh_dim_names=("dp", "cp", "tp"), + self.device_type, (2, 2, 2), mesh_dim_names=("dp", "cp", "tp") ) dp_cp_mesh = mesh_3d["dp", "cp"] @@ -997,9 +986,7 @@ def test_get_mesh_dim_by_name(self): @with_comms def test_get_all_submeshes(self): mesh_2d = init_device_mesh( - self.device_type, - (2, 4), - mesh_dim_names=("replicate", "shard"), + self.device_type, (2, 4), mesh_dim_names=("replicate", "shard") ) all_submeshes = _mesh_resources._get_all_submeshes(mesh_2d, "replicate") self.assertEqual(len(all_submeshes), 4) diff --git a/test/distributed/test_dynamo_distributed.py b/test/distributed/test_dynamo_distributed.py index af07e50435a81..d3436bbe47548 100644 --- a/test/distributed/test_dynamo_distributed.py +++ b/test/distributed/test_dynamo_distributed.py @@ -43,12 +43,11 @@ DynamoDistributedMultiProcTestCase, DynamoDistributedSingleProcTestCase, import_transformers_or_skip, - requires_accelerator_dist_backend, + requires_nccl, skip_if_lt_x_gpu, ) -from torch.testing._internal.common_utils import skipIfXpu +from torch.testing._internal.common_utils import requires_cuda from torch.testing._internal.inductor_utils import HAS_GPU -from torch.testing._internal.triton_utils import requires_cuda_and_triton def reset_rng_state(): @@ -271,15 +270,7 @@ def get_hf_bert(rank): except ImportError as e: raise unittest.SkipTest("Unable to import transformers") from e - device_type = ( - acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" - ) - batch_size, max_length, config, device = ( - 4, - 512, - BertConfig(), - f"{device_type}:{rank}", - ) + batch_size, max_length, config, device = 4, 512, BertConfig(), f"cuda:{rank}" model = AutoModelForMaskedLM.from_config(config).to(device) input_ids = torch.randint(0, config.vocab_size, (batch_size, max_length)).to(device) decoder_ids = torch.randint(0, config.vocab_size, (batch_size, max_length)).to( @@ -559,8 +550,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: # Are these tests failing? Check and see if TestFakeDistributedSingleProc has a # single process version; if it's just a problem in the Dynamo distributed -# # optimizer, you should be able to repro it single process! -@requires_accelerator_dist_backend(["nccl", "xccl"]) +# optimizer, you should be able to repro it single process! +@requires_nccl() class TestMultiProc(DynamoDistributedMultiProcTestCase): """ Note: MultiProcTestCase spawns processes per test and is slow. @@ -568,16 +559,12 @@ class TestMultiProc(DynamoDistributedMultiProcTestCase): sparingly for integration tests. """ - device_type = ( - acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" - ) - @skip_if_lt_x_gpu(2) @config.patch(optimize_ddp=False, enable_compiler_collectives=True) def test_ddp_baseline_aot_eager_multiprocess(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): self.assertFalse(config.optimize_ddp) - m, inputs, correct_outputs = get_model(f"{self.device_type}:{self.rank}") + m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") m = DDP(m, device_ids=[self.rank]) m = torch.compile(m, backend="aot_eager") outputs = m(inputs) @@ -645,7 +632,7 @@ def forward(self, inp): with _dynamo_dist_per_rank_init(self.rank, self.world_size): self.assertFalse(config.optimize_ddp) - model = MyModel().to(device=self.device_type) + model = MyModel().to(device="cuda") # Activation checkpointing for Linear layers. non_reentrant_wrapper = functools.partial( @@ -660,7 +647,7 @@ def forward(self, inp): ) model = DDP(model) - x = torch.randn(10, 64).to(self.device_type) + x = torch.randn(10, 64).cuda() correct_outputs = model(x) opt_model = torch.compile(model) @@ -672,14 +659,14 @@ def forward(self, inp): def test_fsdp_aot_eager(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): # Test with basic FSDP wrapping (outer wrap around whole model) - m, inputs, correct_outputs = get_model(f"{self.device_type}:{self.rank}") + m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") fsdp_m = FSDP(m, use_orig_params=True) fsdp_m = torch.compile(fsdp_m, backend="aot_eager") outputs = fsdp_m(inputs) self.assertTrue(same(correct_outputs, outputs)) # Test with recursive wrapping, nested FSDP around each Linear - m, inputs, correct_outputs = get_model(f"{self.device_type}:{self.rank}") + m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") fsdp_m = FSDP( m, auto_wrap_policy=functools.partial( @@ -693,7 +680,6 @@ def test_fsdp_aot_eager(self): @skip_if_lt_x_gpu(2) @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") - @requires_cuda_and_triton def test_ddp_optimizer_cudagraph(self): class Net(nn.Module): def __init__(self): @@ -744,9 +730,7 @@ def test_fsdp_setattr(self): from torch._dynamo.utils import counters counters.clear() - m, inputs, correct_outputs = get_mutating_model( - f"{self.device_type}:{self.rank}" - ) + m, inputs, correct_outputs = get_mutating_model(f"cuda:{self.rank}") fsdp_m = FSDP(m, use_orig_params=True) fsdp_m = torch.compile(fsdp_m, backend="eager", fullgraph=False) outputs = fsdp_m(inputs) @@ -764,9 +748,7 @@ def test_fsdp_unspecialized_forced_getattr_no_inline(self): from torch._dynamo.utils import counters counters.clear() - m, inputs, correct_outputs = get_forced_getattr_module( - f"{self.device_type}:{self.rank}" - ) + m, inputs, correct_outputs = get_forced_getattr_module(f"cuda:{self.rank}") fsdp_m = FSDP(m, use_orig_params=True) fsdp_m = torch.compile(fsdp_m, backend="eager", fullgraph=False) outputs = fsdp_m(inputs) @@ -780,9 +762,7 @@ def test_fsdp_unspecialized_forced_getattr_inline(self): from torch._dynamo.utils import counters counters.clear() - m, inputs, correct_outputs = get_forced_getattr_module( - f"{self.device_type}:{self.rank}" - ) + m, inputs, correct_outputs = get_forced_getattr_module(f"cuda:{self.rank}") fsdp_m = FSDP(m, use_orig_params=True) fsdp_m = torch.compile(fsdp_m, backend="eager", fullgraph=False) outputs = fsdp_m(inputs) @@ -794,14 +774,14 @@ def test_fsdp_unspecialized_forced_getattr_inline(self): def test_fsdp_inductor(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): # Test with basic FSDP wrapping (outer wrap around whole model) - m, inputs, correct_outputs = get_model(f"{self.device_type}:{self.rank}") + m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") fsdp_m = FSDP(m, use_orig_params=True) fsdp_m = torch.compile(fsdp_m, backend="inductor") outputs = fsdp_m(inputs) self.assertTrue(same(correct_outputs, outputs)) # Test with recursive wrapping, nested FSDP around each Linear - m, inputs, correct_outputs = get_model(f"{self.device_type}:{self.rank}") + m, inputs, correct_outputs = get_model(f"cuda:{self.rank}") fsdp_m = FSDP( m, auto_wrap_policy=functools.partial( @@ -819,7 +799,7 @@ def test_fsdp_inductor(self): def test_fsdp_activation_checkpointing(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): model, inputs = get_toy_model_for_activation_checkpointing( - f"{self.device_type}:{self.rank}" + f"cuda:{self.rank}" ) is_inner = lambda module: isinstance(module, ToyInnerModel) # noqa: E731 wrap_policy = functools.partial(lambda_auto_wrap_policy, lambda_fn=is_inner) @@ -981,7 +961,7 @@ def test_compiler_collectives_automatic_dynamic_scalar(self): torch._dynamo.utils.clear_compilation_metrics() # TODO: This should be possible to do inside the function, but - device = f"{self.device_type}:{self.rank}" + device = f"cuda:{self.rank}" @torch.compile() def f(x, y): @@ -1201,7 +1181,7 @@ def test_get_pg_attr(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): pg = dist.distributed_c10d._get_default_group() - device = f"{self.device_type}:{self.rank}" + device = f"cuda:{self.rank}" @torch.compile(fullgraph=True) def f(x): @@ -1216,7 +1196,6 @@ def f(x): pg = dist.distributed_c10d.GroupMember.NON_GROUP_MEMBER self.assertEqual(f(x), x + 1) - @skipIfXpu # ProcessGroupXCCL doesn't support _set_default_timeout yet. @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @patch.object(torch._inductor.config, "fx_graph_cache", False) @patch.object(torch._inductor.config, "fx_graph_remote_cache", False) @@ -1226,7 +1205,7 @@ def test_asymmetric_compilation(self): with _dynamo_dist_per_rank_init(self.rank, self.world_size): torch._dynamo.utils.clear_compilation_metrics() - device = f"{self.device_type}:{self.rank}" + device = f"cuda:{self.rank}" pg = dist.distributed_c10d._get_default_group() @@ -1259,7 +1238,7 @@ def f(x): w = pg.allreduce(x) w.wait() - torch.accelerator.synchronize(device) + torch.cuda.synchronize(device) metrics = torch._dynamo.utils.get_compilation_metrics() # Number of compiles same on all nodes @@ -1268,7 +1247,6 @@ def f(x): for r in res[1:]: self.assertEqual(res[0], r) - @skipIfXpu # ProcessGroupXCCL doesn't support _set_default_timeout yet. @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @patch.object(torch._inductor.config, "fx_graph_cache", True) @patch.object(torch._inductor.config, "fx_graph_remote_cache", False) @@ -1280,7 +1258,7 @@ def test_asymmetric_compilation_with_fx_cache(self): with fresh_cache(), _dynamo_dist_per_rank_init(self.rank, self.world_size): torch._dynamo.utils.clear_compilation_metrics() - device = f"{self.device_type}:{self.rank}" + device = f"cuda:{self.rank}" pg = dist.distributed_c10d._get_default_group() @@ -1303,7 +1281,7 @@ def f(x): w = pg.allreduce(x) w.wait() - torch.accelerator.synchronize(device) + torch.cuda.synchronize(device) torch._dynamo.reset() if self.rank == 0: @@ -1320,11 +1298,11 @@ def f(x): w = pg.allreduce(x) w.wait() - torch.accelerator.synchronize(device) + torch.cuda.synchronize(device) -@requires_accelerator_dist_backend(["nccl", "xccl"]) -@unittest.skipUnless(torch.accelerator.is_available(), "Requires accelerator") +@requires_nccl() +@requires_cuda class TestSingleProc(DynamoDistributedSingleProcTestCase): """ Test harness initializes dist process group. @@ -1333,10 +1311,6 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase): Use TestMultiProc for things that really need to run on multiple nodes """ - device_type = ( - acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" - ) - def get_model( self, bsz=20, in_feat=10, hidden_feat=5000, out_feat=5, ctx_manager=None ): @@ -1454,7 +1428,6 @@ def opt_fn(inputs): self.assertEqual(len(break_reasons), 4) self.assertTrue(all("DDPOptimizer" in r.reason for r in break_reasons)) - @skipIfXpu # XPU device doesn't support flex_attention yet. @patch.object(config, "optimize_ddp", True) def test_compiled_flex_attention_full_model_ddp(self): class Model(torch.nn.Module): @@ -1501,16 +1474,16 @@ def alibi_score_mod(self, score, b, h, q_idx, kv_idx): S = 512 D = 64 + device = "cuda" model = Model(S, H, D) - model.to(self.device_type) + model.to(device) model = torch.compile(model) model = DDP(model, device_ids=self.device_ids) - hidden_states = torch.randn(B, S, H * D).to(self.device_type) + hidden_states = torch.randn(B, S, H * D).to(device) model(hidden_states) - torch.accelerator.synchronize() + torch.cuda.synchronize() - @skipIfXpu # XPU device doesn't support flex_attention yet. @patch.object(config, "optimize_ddp", True) def test_compiled_flex_attention_local_ddp(self): class Model(torch.nn.Module): @@ -1557,14 +1530,15 @@ def alibi_score_mod(self, score, b, h, q_idx, kv_idx): S = 512 D = 64 + device = "cuda" model = Model(S, H, D) - model.to(self.device_type) + model.to(device) model = torch.compile(model) model = DDP(model, device_ids=self.device_ids) - hidden_states = torch.randn(B, S, H * D).to(self.device_type) + hidden_states = torch.randn(B, S, H * D).to(device) model(hidden_states) - torch.accelerator.synchronize() + torch.cuda.synchronize() @patch.object(config, "optimize_ddp", True) @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @@ -1813,9 +1787,9 @@ def forward(self, x): a = torch.cos(a) return a - mod = MockModule().to(self.device_type) + mod = MockModule().cuda() mod = DDP(mod, bucket_cap_mb=1) - x = torch.randn(N, N, device=self.device_type, requires_grad=True) + x = torch.randn(N, N, device="cuda", requires_grad=True) args = (x,) backend = "aot_eager" @@ -1825,7 +1799,7 @@ def forward(self, x): def test_fsdp_orig_params_assert(self): # Test with basic FSDP wrapping (outer wrap around whole model) - m, inputs, _ = get_model(f"{self.device_type}:{self.rank}") + m, inputs, _ = get_model(f"cuda:{self.rank}") fsdp_m = FSDP(m, use_orig_params=False) # Test is that this function call does not throw an exception. fsdp_m = torch.compile(fsdp_m) @@ -1871,7 +1845,7 @@ def _(ctx): return out - device = f"{self.device_type}:{self.rank}" + device = f"cuda:{self.rank}" m = ToyModel( in_feat=10, hidden_feat=5000, @@ -1918,7 +1892,7 @@ def forward(self, inputs): torch._dynamo.reset() - device = f"{self.device_type}:{self.rank}" + device = f"cuda:{self.rank}" m = ToyModel( in_feat=10, hidden_feat=5000, @@ -1959,14 +1933,9 @@ def test_fsdp_dup_tensors_same_source(self): class DuplicateModule(nn.Module): def __init__(self) -> None: super().__init__() - device_type = ( - acc.type - if (acc := torch.accelerator.current_accelerator()) - else "cpu" - ) - self._param = torch.randn((3,), device=device_type) + self._param = torch.randn((3,), device="cuda") self._buf = torch.nn.Buffer( - torch.randn((3,), requires_grad=False, device=device_type) + torch.randn((3,), requires_grad=False, device="cuda") ) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -1979,7 +1948,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: model = DuplicateModule() fsdp_model = FSDP(copy.deepcopy(model), use_orig_params=True) fsdp_model = torch.compile(fsdp_model, backend="aot_eager") - inp = torch.randn((2, 3), device=self.device_type) + inp = torch.randn((2, 3), device="cuda") local_out = model(inp) fsdp_out = fsdp_model(inp) self.assertEqual(local_out, fsdp_out) @@ -1996,13 +1965,8 @@ def test_fsdp_dup_tensors_diff_source(self): class BufModule(nn.Module): def __init__(self) -> None: super().__init__() - device_type = ( - acc.type - if (acc := torch.accelerator.current_accelerator()) - else "cpu" - ) self._buf = nn.Buffer( - torch.randn((3,), requires_grad=False, device=device_type) + torch.randn((3,), requires_grad=False, device="cuda") ) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -2011,12 +1975,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class Model(nn.Module): def __init__(self) -> None: super().__init__() - device_type = ( - acc.type - if (acc := torch.accelerator.current_accelerator()) - else "cpu" - ) - self._param = nn.Parameter(torch.randn((1,), device=device_type)) + self._param = nn.Parameter(torch.randn((1,), device="cuda")) self._buf_module = BufModule() # Share the buffer, meaning same tensor but different source self._buf = self._buf_module._buf @@ -2033,7 +1992,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: fsdp_model = FSDP(Model(), use_orig_params=True) cnt = torch._dynamo.testing.CompileCounterWithBackend("aot_eager") fsdp_model = torch.compile(fsdp_model, backend=cnt) - inp = torch.randn((2, 3), device=self.device_type) + inp = torch.randn((2, 3), device="cuda") for _ in range(15): fsdp_model(inp) # Check for no recompiles (if there were incorrect de-dup guards, then @@ -2052,12 +2011,7 @@ def __init__(self, use_self: bool): super().__init__() self._use_self = use_self torch.manual_seed(42) # force `_param` to be deterministic - device_type = ( - acc.type - if (acc := torch.accelerator.current_accelerator()) - else "cpu" - ) - self._param = nn.Parameter(torch.randn((3,), device=device_type)) + self._param = nn.Parameter(torch.randn((3,), device="cuda")) def forward(self, x: torch.Tensor) -> torch.Tensor: if self._use_self: @@ -2072,7 +2026,7 @@ def _add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: return x + y model = ModuleWithStaticMethod(False) - x = torch.randn((2, 3), device=self.device_type) + x = torch.randn((2, 3), device="cuda") ref_out = model(x) test_outs: list[torch.Tensor] = [] diff --git a/test/distributed/test_inductor_collectives.py b/test/distributed/test_inductor_collectives.py index a0197eb89ebc9..4c14d497234f7 100644 --- a/test/distributed/test_inductor_collectives.py +++ b/test/distributed/test_inductor_collectives.py @@ -10,7 +10,6 @@ import torch._dynamo import torch._dynamo.logging import torch._dynamo.test_case -import torch.distributed as c10d # for some reason importing functional collectives after dynamo breaks collectives handling! import torch.distributed._functional_collectives as _functional_collectives @@ -38,14 +37,14 @@ DynamoDistributedMultiProcTestCase, DynamoDistributedSingleProcTestCase, MultiProcessTestCase, - requires_accelerator_dist_backend, + requires_nccl, skip_if_lt_x_gpu, ) from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, parametrize, + requires_cuda, skipIfRocm, - skipIfXpu, TEST_XPU, xfailIf, ) @@ -60,15 +59,13 @@ def _tolist_with_constrain_as_size(tensor): return lst -@requires_accelerator_dist_backend(["nccl", "xccl"]) +@requires_nccl() @instantiate_parametrized_tests class TestCollectivesMultiProc(DynamoDistributedMultiProcTestCase): """ Run correctness checks in multi-proc runner, mark with minimum # GPUs to run under """ - device = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" - def get_world_trs(self): return { "tag": "", @@ -105,11 +102,8 @@ def compile(func, example_inputs): example, **self.get_world_trs(), ) - t = torch.randn(4, 4, device=self.device) - inputs = ( - t if self.rank == 0 else torch.zeros(4, 4, device=self.device), - 0, - ) + t = torch.randn(4, 4, device="cuda") + inputs = (t if self.rank == 0 else torch.zeros(4, 4, device="cuda"), 0) eager_out = example(*inputs) self.assertTrue(same(t, eager_out)) @@ -143,7 +137,7 @@ def compile(func, example_inputs): matmul_cat_col, **self.get_world_trs(), ) - inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 6 + inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 6 eager_out = matmul_cat_col(*inputs) compiled_matmul_cat_col = compile(matmul_cat_col, inputs) @@ -185,7 +179,7 @@ def func(x): for nelem in [1024, 2048, 4096]: # CI (Tesla T4) does not support bfloat16 compilation natively, # using float - x = torch.randn(nelem, device=self.device, dtype=torch.float) + x = torch.randn(nelem, device="cuda", dtype=torch.float) golden_out = eager_func(x) for _ in range(3): @@ -223,8 +217,8 @@ def compile(func, example_inputs): eager_func, **self.get_world_trs(), ) - eager_inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 4 - inductor_inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 2 + eager_inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 4 + inductor_inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 2 eager_out = inductor_func(eager_func(*eager_inputs), *inductor_inputs) compiled_inductor_func = compile( @@ -262,8 +256,8 @@ def compile(func, example_inputs): inductor_func, **self.get_world_trs(), ) - inductor_inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 4 - eager_inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 2 + inductor_inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 4 + eager_inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 2 eager_out = eager_func(inductor_func(*inductor_inputs), *eager_inputs) compiled_inductor_func = compile(inductor_func, inductor_inputs) @@ -276,7 +270,6 @@ def compile(func, example_inputs): @skip_if_lt_x_gpu(2) @xfailIf(TEST_XPU) # https://github.com/intel/torch-xpu-ops/issues/1728 @skipIfRocm - @xfailIf(TEST_XPU) # https://github.com/intel/torch-xpu-ops/issues/1728 def test_eager_async_allreduce_inductor_wait(self): import torch.distributed as dist from torch._inductor.utils import run_and_get_code @@ -299,7 +292,7 @@ def all_reduce_wait(work, y): # potentially compiled return y * y with _dynamo_dist_per_rank_init(self.rank, self.world_size): - x = torch.ones(12800, 12800, device=self.device) + self.rank + x = torch.ones(12800, 12800, device="cuda") + self.rank self.assertEqual(torch._C._distributed_c10d._get_work_registry_size(), 0) # NOTE: We run for 10 iterations each, to ensure that the GPU execution is way behind CPU @@ -370,7 +363,7 @@ def func(a, *, tag, ranks, group_size): return (e,) with _dynamo_dist_per_rank_init(self.rank, self.world_size): - inputs = torch.ones(4, 4, device=self.device) + self.rank + inputs = torch.ones(4, 4, device="cuda") + self.rank compiled = torch.compile(func) out = compiled(inputs, **self.get_world_trs()) correct = func(inputs, **self.get_world_trs()) @@ -387,8 +380,7 @@ def func(tensor, src_dst_pairs, *, tag, ranks, group_size): with _dynamo_dist_per_rank_init(self.rank, self.world_size): inputs = ( # rank0: [0., 1.], rank1: [2., 3.] - torch.arange(2, dtype=torch.float32, device=self.device) - + 2 * self.rank, + torch.arange(2, dtype=torch.float32, device="cuda") + 2 * self.rank, [1, 0], ) compiled = torch.compile(func) @@ -397,7 +389,7 @@ def func(tensor, src_dst_pairs, *, tag, ranks, group_size): self.assertTrue(same(out, correct)) # rank0: [2., 3.], rank1: [0., 1.] - expected = torch.arange(2, dtype=torch.float32, device=self.device) + 2 * ( + expected = torch.arange(2, dtype=torch.float32, device="cuda") + 2 * ( (self.rank - 1 + self.world_size) % self.world_size ) self.assertEqual(out, expected) @@ -420,9 +412,9 @@ def forward(self, x, world_size, tag, ranks, group_size): return out with _dynamo_dist_per_rank_init(self.rank, self.world_size): - model = Model().to(self.device) + model = Model().cuda() model_compiled = torch.compile(model) - inp = torch.tensor([[2, 1, 3, 0]], dtype=torch.long, device=self.device) + inp = torch.tensor([[2, 1, 3, 0]], dtype=torch.long, device="cuda") out = model_compiled(inp, self.world_size, **self.get_world_trs()) correct = model(inp, self.world_size, **self.get_world_trs()) self.assertTrue(same(out, correct)) @@ -437,7 +429,7 @@ def func(tensor, world_size): with _dynamo_dist_per_rank_init(self.rank, self.world_size): func_compiled = torch.compile(func) - inp = torch.tensor(self.rank, dtype=torch.long, device=self.device) + inp = torch.tensor(self.rank, dtype=torch.long, device="cuda") out = func_compiled(inp, self.world_size) correct = func(inp, self.world_size) self.assertTrue(same(out, correct)) @@ -459,9 +451,9 @@ def forward(self, x, world_size, tag, ranks, group_size): return out with _dynamo_dist_per_rank_init(self.rank, self.world_size): - model = Model().to(self.device) + model = Model().cuda() model_compiled = torch.compile(model) - inp = torch.tensor([[2, 1, 3, 0]], dtype=torch.long, device=self.device) + inp = torch.tensor([[2, 1, 3, 0]], dtype=torch.long, device="cuda") out = model_compiled(inp, self.world_size, **self.get_world_trs()) correct = model(inp, self.world_size, **self.get_world_trs()) self.assertTrue(same(out, correct)) @@ -490,7 +482,7 @@ def compile(func, example_inputs): example, **self.get_world_trs(), ) - inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 2 + inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 2 eager_out = example(*inputs) compiled_matmul_cat_col = compile(example, inputs) @@ -517,7 +509,7 @@ def compile(func, example_inputs): example, **self.get_world_trs(), ) - inputs = (torch.ones(4, 4, device=self.device) + self.rank,) * 2 + inputs = (torch.ones(4, 4, device="cuda") + self.rank,) * 2 eager_out = example(*inputs) compiled_fn = compile(example, inputs) @@ -571,7 +563,7 @@ def example( dtype=torch.int64, ) inputs = ( - torch.ones(int(row), 5, device=self.device) * (self.rank + 1), + torch.ones(int(row), 5, device="cuda") * (self.rank + 1), input_split_sizes_tensor, output_split_sizes_tensor, ) @@ -740,7 +732,7 @@ def example( dtype=torch.int64, ) inputs = ( - torch.ones(int(row), 5, device=self.device, requires_grad=True) + torch.ones(int(row), 5, device="cuda", requires_grad=True) * (self.rank + 1), input_split_sizes_tensor, output_split_sizes_tensor, @@ -803,7 +795,7 @@ def example(inp, *, tag, ranks, group_size): with _dynamo_dist_per_rank_init(self.rank, self.world_size): inputs = ( - torch.ones(self.world_size, self.world_size, device=self.device) + torch.ones(self.world_size, self.world_size, device="cuda") * (self.rank + 1), ) trs = self.get_world_trs() @@ -827,11 +819,8 @@ def example(inp, *, tag, ranks, group_size): @instantiate_parametrized_tests -@requires_accelerator_dist_backend(["nccl", "xccl"]) -@unittest.skipIf( - not torch.accelerator.is_available(), - "No accelerator is available", -) +@requires_nccl() +@requires_cuda class TestCollectivesInductor(DynamoDistributedSingleProcTestCase): """ Prefer single-proc test runner for basic tests as it is easier to work with. @@ -854,7 +843,7 @@ def func(inp, *, tag, ranks, group_size): ar = torch.ops.c10d_functional.wait_tensor(ar) return ar - inputs = torch.ones(4, 4, device=self.device) + inputs = torch.ones(4, 4, device="cuda") compiled = torch.compile(func) out = compiled(inputs, **self.get_world_trs()) @@ -889,7 +878,7 @@ def func(inp, *, tag, ranks, group_size): other = torch.ones_like(inp) + 22 return ar, other - inputs = torch.ones(4, 4, device=self.device) + inputs = torch.ones(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs()) @@ -922,7 +911,7 @@ def func(inp, *, tag, ranks, group_size): other = torch.ones_like(inp) + 22 return ar, y, other - inputs = torch.ones(4, 4, device=self.device) + inputs = torch.ones(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs()) @@ -963,7 +952,7 @@ def func(inp): ar = _functional_collectives.all_reduce(inp, "sum", "0") return ar - inputs = torch.ones(4, 4, device=self.device) + inputs = torch.ones(4, 4, device="cuda") counter = CompileCounter() compiled = torch.compile(func, backend=counter) out = compiled(inputs) @@ -974,13 +963,12 @@ def func(inp): self.assertEqual(counter.op_count, 2) self.assertTrue(same(out, correct)) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_trace_all_gather_tensor(self): def func(inp): ar = _functional_collectives.all_gather_tensor(inp, 0, "0") return ar - inputs = torch.ones(4, 4, device=self.device) + inputs = torch.ones(4, 4, device="cuda") counter = CompileCounter() compiled = torch.compile(func, backend=counter) out = compiled(inputs) @@ -991,7 +979,6 @@ def func(inp): self.assertEqual(counter.op_count, 2) self.assertTrue(same(out, correct)) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_trace_all_gather_tensor_pg(self): def func(inp, *, pg): ar = _functional_collectives.all_gather_tensor(inp, 0, pg) @@ -1008,7 +995,6 @@ def func(inp, *, pg): self.assertEqual(counter.op_count, 2) self.assertTrue(same(out, correct)) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_rewrite_dist_all_gather(self): def func(inp, out, *, pg): torch.distributed.all_gather_into_tensor( @@ -1034,7 +1020,6 @@ def func(inp, out, *, pg): assert counter.op_count == 3 assert same(outputs, correct_outputs) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_rewrite_dist_all_gather_list(self): def func(inp, out, *, pg): torch.distributed.all_gather( @@ -1057,7 +1042,6 @@ def func(inp, out, *, pg): assert counter.frame_count == 1 assert same(outputs, correct_outputs) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_rewrite_dist_all_gather_args_match(self): # Duplicated most of the structure from test_dynamo_rewrite_dist_all_gather # except uses kwargs to ensure rewrite has matching arg names @@ -1086,7 +1070,6 @@ def func(inp, out, *, pg): assert counter.op_count == 3 assert same(outputs, correct_outputs) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_rewrite_dist_reduce_scatter(self): def func(inp, out, *, pg): torch.distributed.reduce_scatter_tensor( @@ -1254,7 +1237,6 @@ def verify(gm, _): input = torch.ones(2, device=self.device) compiled(input) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_support_collective_op_with_async_op_False(self): def func(inp, out, *, pg): # user explicitly set the attribute `async_op` to False, @@ -1314,13 +1296,12 @@ def func(inp, *, pg): assert counter.op_count == 1 assert same(outputs, correct_outputs) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_trace_reduce_scatter_tensor(self): def func(inp): ar = _functional_collectives.reduce_scatter_tensor(inp, "sum", 0, "0") return ar - inputs = torch.ones(4, 4, device=self.device) + inputs = torch.ones(4, 4, device="cuda") counter = CompileCounter() compiled = torch.compile(func, backend=counter) out = compiled(inputs) @@ -1331,7 +1312,6 @@ def func(inp): self.assertEqual(counter.op_count, 2) self.assertTrue(same(out, correct)) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 def test_dynamo_trace_allgather_coalesced(self): def func(inp, *, tag, ranks, group_size): ar = torch.ops.c10d_functional.all_gather_into_tensor_coalesced( @@ -1339,10 +1319,7 @@ def func(inp, *, tag, ranks, group_size): ) return ar - inputs = [ - torch.ones(4, 4, device=self.device), - torch.ones(6, 6, device=self.device), - ] + inputs = [torch.ones(4, 4, device="cuda"), torch.ones(6, 6, device="cuda")] counter = CompileCounter() compiled = torch.compile(func, backend=counter) out = compiled(inputs, **self.get_world_trs()) @@ -1362,7 +1339,7 @@ def func(inp): ar = _functional_collectives.all_reduce(inp, "sum", "0") return ar - input = torch.ones(4, 4, device=self.device, requires_grad=True) + input = torch.ones(4, 4, device="cuda", requires_grad=True) compiled = torch.compile( func, backend="aot_eager" ) # inductor bug with single-op allreduce graph @@ -1380,7 +1357,6 @@ def test_meta(self): out = torch.ops.c10d_functional.all_reduce(x, "sum", **self.get_world_trs()) self.assertEqual(x.size(), out.size()) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @torch._inductor.config.patch({"debug": True, "triton.descriptive_names": False}) def test_inductor_all_gather_coalesced(self): @@ -1400,7 +1376,7 @@ def func(inp, *, tag, ranks, group_size): other = torch.ones_like(inp) + 22 return ar0, y, other, ar1 - inputs = torch.ones(4, 4, device=self.device) + inputs = torch.ones(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs()) @@ -1427,7 +1403,6 @@ def func(inp, *, tag, ranks, group_size): correct = func(inputs, **self.get_world_trs()) assert same(out, correct), f"{out} va {correct}" - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") @torch._inductor.config.patch({"debug": True, "triton.descriptive_names": False}) def test_inductor_reduce_scatter_coalesced(self): @@ -1447,7 +1422,7 @@ def func(inp, *, tag, ranks, group_size): other = torch.ones_like(inp) + 22 return ar0, y, other, ar1 - inputs = torch.ones(4, 4, device=self.device) + inputs = torch.ones(4, 4, device="cuda") compiled = torch.compile(func) code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs()) @@ -1474,7 +1449,6 @@ def func(inp, *, tag, ranks, group_size): correct = func(inputs, **self.get_world_trs()) assert same(out, correct), f"{out} va {correct}" - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") def test_reorder_peak_memory(self): """ @@ -1496,7 +1470,7 @@ def func(inp, *, tag, ranks, group_size): other = torch.ones_like(inp) + 22 return ar0, y, other, ar1 - inputs = torch.ones(4, 4, device=self.device) + inputs = torch.ones(4, 4, device="cuda") # get stats directly from the internal helper without affecting the real pass's signature node_stats: Optional[dict[BaseSchedulerNode, ReorderInfo]] = None @@ -1668,10 +1642,10 @@ def func(x, w, ag_0, ag_1, *, tag, ranks, group_size): return y, ag_0_out, ag_1_out - x = torch.ones(4, 384, device=self.device, dtype=torch.float32) - w = torch.ones(384, 512, device=self.device, dtype=torch.float32) - ag_0 = torch.ones(384, 512, device=self.device, dtype=torch.float32) - ag_1 = torch.ones(384, 512, device=self.device, dtype=torch.float32) + x = torch.ones(4, 384, device="cuda", dtype=torch.float32) + w = torch.ones(384, 512, device="cuda", dtype=torch.float32) + ag_0 = torch.ones(384, 512, device="cuda", dtype=torch.float32) + ag_1 = torch.ones(384, 512, device="cuda", dtype=torch.float32) inputs = [x, w, ag_0, ag_1] with torch._inductor.config.patch( @@ -1836,12 +1810,12 @@ def func(x, w, ag_0, ag_1, ag_2, ag_3, *, tag, ranks, group_size): rs_3_out, ) - x = torch.ones(4, 384, device=self.device, dtype=torch.float32) - w = torch.ones(384, 512, device=self.device, dtype=torch.float32) - ag_0 = torch.ones(1024, 512, device=self.device, dtype=torch.float32) - ag_1 = torch.ones(512, 1024, device=self.device, dtype=torch.float32) - ag_2 = torch.ones(1024, 512, device=self.device, dtype=torch.float32) - ag_3 = torch.ones(512, 1024, device=self.device, dtype=torch.float32) + x = torch.ones(4, 384, device="cuda", dtype=torch.float32) + w = torch.ones(384, 512, device="cuda", dtype=torch.float32) + ag_0 = torch.ones(1024, 512, device="cuda", dtype=torch.float32) + ag_1 = torch.ones(512, 1024, device="cuda", dtype=torch.float32) + ag_2 = torch.ones(1024, 512, device="cuda", dtype=torch.float32) + ag_3 = torch.ones(512, 1024, device="cuda", dtype=torch.float32) inputs = [x, w, ag_0, ag_1, ag_2, ag_3] # get stats directly from the internal helper without affecting the real pass's signature @@ -1943,7 +1917,6 @@ def _reorder_communication_preserving_peak_memory( node_stat1 = next(it) self.assertTrue("collective ordering" in node_stat1.limiting_factor) - @skipIfXpu # https://github.com/intel/torch-xpu-ops/issues/1581 @unittest.skipIf(not HAS_GPU, "Inductor+gpu needs triton and recent GPU arch") def test_reorder_respects_wait_dep(self): """ @@ -1966,7 +1939,7 @@ def func(inp, *, tag, ranks, group_size): # ensure other is not incorrectly aliasing ar's buffer return ag_1_wait - inputs = torch.ones(4, 4, device=self.device) + inputs = torch.ones(4, 4, device="cuda") # get stats directly from the internal helper without affecting the real pass's signature node_stats: Optional[dict[BaseSchedulerNode, ReorderInfo]] = None @@ -2015,7 +1988,7 @@ def _reorder_communication_preserving_peak_memory( self.assertEqual(stats.moves, 0) -@requires_accelerator_dist_backend(["nccl", "xccl"]) +@requires_nccl() class TestSyncDecisionCrossRanks(MultiProcessTestCase): def setUp(self) -> None: super().setUp() @@ -2031,21 +2004,16 @@ def ranks(self) -> list[int]: @property def device(self) -> torch.device: - device_type = torch.accelerator.current_accelerator().type - return torch.device(f"{device_type}:{self.rank}") + return torch.device(f"cuda:{self.rank}") def _init_process_group(self) -> None: torch._inductor.config.triton.store_cubin = True torch._inductor.config.debug = True - torch.get_device_module(self.device).set_device(self.device) + torch.cuda.set_device(self.device) store = torch.distributed.FileStore(self.file_name, self.world_size) - backend = c10d.get_default_backend_for_device( - torch.accelerator.current_accelerator().type - ) - torch.distributed.init_process_group( - backend=backend, + backend="nccl", world_size=self.world_size, rank=self.rank, store=store, diff --git a/test/distributed/test_store.py b/test/distributed/test_store.py index e557a48359623..870805eec75e8 100644 --- a/test/distributed/test_store.py +++ b/test/distributed/test_store.py @@ -54,8 +54,6 @@ torch.backends.cuda.matmul.allow_tf32 = False -device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" - def gpus_for_rank(world_size): """Multigpu tests are designed to simulate the multi nodes with multi @@ -63,8 +61,8 @@ def gpus_for_rank(world_size): On a single node, all visible GPUs are evenly divided to subsets, each process only uses a subset. """ - visible_devices = list(range(torch.accelerator.device_count())) - gpus_per_process = torch.accelerator.device_count() // world_size + visible_devices = list(range(torch.cuda.device_count())) + gpus_per_process = torch.cuda.device_count() // world_size gpus_for_rank = [] for rank in range(world_size): gpus_for_rank.append( @@ -1176,8 +1174,8 @@ def listen() -> None: if __name__ == "__main__": - if device_type != "cpu": - assert not torch.get_device_module()._initialized, ( - "test_distributed must not have initialized {device_type} context on main process" - ) + assert not torch.cuda._initialized, ( + "test_distributed must not have initialized CUDA context on main process" + ) + run_tests() diff --git a/test/distributions/test_distributions.py b/test/distributions/test_distributions.py index aaae775f191cf..7cb8cc678136f 100644 --- a/test/distributions/test_distributions.py +++ b/test/distributions/test_distributions.py @@ -115,13 +115,10 @@ set_default_dtype, set_rng_seed, skipIfTorchDynamo, - TEST_XPU, TestCase, ) -device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" - # load_tests from torch.testing._internal.common_utils is used to automatically filter tests for # sharding on sandcastle. This line silences flake warnings load_tests = load_tests @@ -1791,21 +1788,18 @@ def test_negative_binomial_log_prob_vectorized_count(self): ).logpmf(sample) self.assertEqual(log_prob, expected, atol=1e-4, rtol=0) - @unittest.skipIf(not TEST_CUDA and not TEST_XPU, "CUDA and XPU not found") + @unittest.skipIf(not TEST_CUDA, "CUDA not found") def test_zero_excluded_binomial(self): vals = Binomial( - total_count=torch.tensor(1.0).to(device_type), - probs=torch.tensor(0.9).to(device_type), + total_count=torch.tensor(1.0).cuda(), probs=torch.tensor(0.9).cuda() ).sample(torch.Size((100000000,))) self.assertTrue((vals >= 0).all()) vals = Binomial( - total_count=torch.tensor(1.0).to(device_type), - probs=torch.tensor(0.1).to(device_type), + total_count=torch.tensor(1.0).cuda(), probs=torch.tensor(0.1).cuda() ).sample(torch.Size((100000000,))) self.assertTrue((vals < 2).all()) vals = Binomial( - total_count=torch.tensor(1.0).to(device_type), - probs=torch.tensor(0.5).to(device_type), + total_count=torch.tensor(1.0).cuda(), probs=torch.tensor(0.5).cuda() ).sample(torch.Size((10000,))) # vals should be roughly half zeroes, half ones assert (vals == 0.0).sum() > 4000 @@ -2056,15 +2050,15 @@ def test_poisson_sample(self): ) torch.set_default_dtype(saved_dtype) - @unittest.skipIf(not TEST_CUDA and not TEST_XPU, "CUDA and XPU not found") + @unittest.skipIf(not TEST_CUDA, "CUDA not found") @unittest.skipIf(not TEST_NUMPY, "Numpy not found") def test_poisson_gpu_sample(self): set_rng_seed(1) for rate in [0.12, 0.9, 4.0]: self._check_sampler_discrete( - Poisson(torch.tensor([rate]).to(device_type)), + Poisson(torch.tensor([rate]).cuda()), scipy.stats.poisson(rate), - f"Poisson(lambda={rate}, {device_type})", + f"Poisson(lambda={rate}, cuda)", failure_rate=1e-3, ) @@ -3496,13 +3490,13 @@ def ref_log_prob(idx, x, log_prob): self._check_log_prob(Gamma(alpha, beta), ref_log_prob) - @unittest.skipIf(not TEST_CUDA and not TEST_XPU, "CUDA and XPU not found") + @unittest.skipIf(not TEST_CUDA, "CUDA not found") @unittest.skipIf(not TEST_NUMPY, "NumPy not found") def test_gamma_gpu_shape(self): - alpha = torch.randn(2, 3).to(device_type).exp().requires_grad_() - beta = torch.randn(2, 3).to(device_type).exp().requires_grad_() - alpha_1d = torch.randn(1).to(device_type).exp().requires_grad_() - beta_1d = torch.randn(1).to(device_type).exp().requires_grad_() + alpha = torch.randn(2, 3).cuda().exp().requires_grad_() + beta = torch.randn(2, 3).cuda().exp().requires_grad_() + alpha_1d = torch.randn(1).cuda().exp().requires_grad_() + beta_1d = torch.randn(1).cuda().exp().requires_grad_() self.assertEqual(Gamma(alpha, beta).sample().size(), (2, 3)) self.assertEqual(Gamma(alpha, beta).sample((5,)).size(), (5, 2, 3)) self.assertEqual(Gamma(alpha_1d, beta_1d).sample((1,)).size(), (1, 1)) @@ -3533,10 +3527,7 @@ def test_gamma_sample(self): def test_gamma_gpu_sample(self): set_rng_seed(0) for alpha, beta in product([0.1, 1.0, 5.0], [0.1, 1.0, 10.0]): - a, b = ( - torch.tensor([alpha]).to(device_type), - torch.tensor([beta]).to(device_type), - ) + a, b = torch.tensor([alpha]).cuda(), torch.tensor([beta]).cuda() self._check_sampler_sampler( Gamma(a, b), scipy.stats.gamma(alpha, scale=1.0 / beta), @@ -3982,11 +3973,11 @@ def test_beta_underflow(self): self.assertEqual(frac_zeros, 0.5, atol=0.05, rtol=0) self.assertEqual(frac_ones, 0.5, atol=0.05, rtol=0) - @unittest.skipIf(not TEST_CUDA and not TEST_XPU, "CUDA and XPU not found") + @unittest.skipIf(not TEST_CUDA, "CUDA not found") def test_beta_underflow_gpu(self): set_rng_seed(1) num_samples = 50000 - conc = torch.tensor(1e-2, dtype=torch.float64).to(device_type) + conc = torch.tensor(1e-2, dtype=torch.float64).cuda() beta_samples = Beta(conc, conc).sample([num_samples]) self.assertEqual((beta_samples == 0).sum(), 0) self.assertEqual((beta_samples == 1).sum(), 0) diff --git a/test/inductor/test_snode_runtime.py b/test/inductor/test_snode_runtime.py index cee78592153db..c57393d993eab 100644 --- a/test/inductor/test_snode_runtime.py +++ b/test/inductor/test_snode_runtime.py @@ -258,6 +258,8 @@ def _verify_runtime_estimation(self, fn, inps): finally: dist.destroy_process_group() + # lack of profiler on XPU + @expectedFailureXPU def test_legacy_all_reduce(self): def fn(x): r = c10d.all_reduce(x, "sum", "", self.RANKS, self.WORLD_SIZE) @@ -266,6 +268,8 @@ def fn(x): inp = T(10, 10) self._verify_runtime_estimation(fn, (inp,)) + # lack of profiler on XPU + @expectedFailureXPU def test_legacy_all_reduce_coalesced(self): def fn(x): rs = c10d.all_reduce_coalesced(x, "sum", "", self.RANKS, self.WORLD_SIZE) @@ -274,6 +278,8 @@ def fn(x): inp = [T(10, 10), T(15, 15)] self._verify_runtime_estimation(fn, (inp,)) + # lack of profiler on XPU + @expectedFailureXPU def test_legacy_all_gather_into_tensor_coalesced(self): def fn(x): rs = c10d.all_gather_into_tensor_coalesced( @@ -287,6 +293,8 @@ def fn(x): inp = [T(10, 10), T(15, 15)] self._verify_runtime_estimation(fn, (inp,)) + # lack of profiler on XPU + @expectedFailureXPU def test_all_reduce(self): def fn(x): r = _c10d.all_reduce(x, "sum", "0") @@ -295,6 +303,8 @@ def fn(x): inp = T(10, 10) self._verify_runtime_estimation(fn, (inp,)) + # lack of profiler on XPU + @expectedFailureXPU def test_all_reduce_coalesced(self): def fn(x): rs = _c10d.all_reduce_coalesced(x, "sum", "0") @@ -303,6 +313,8 @@ def fn(x): inp = [T(10, 10), T(15, 15)] self._verify_runtime_estimation(fn, (inp,)) + # lack of profiler on XPU + @expectedFailureXPU def test_all_gather_into_tensor(self): def fn(x): rs = _c10d.all_gather_into_tensor( @@ -315,6 +327,8 @@ def fn(x): inp = T(10, 10) self._verify_runtime_estimation(fn, (inp,)) + # lack of profiler on XPU + @expectedFailureXPU def test_all_gather_into_tensor_coalesced(self): def fn(x): rs = _c10d.all_gather_into_tensor_coalesced( @@ -327,6 +341,8 @@ def fn(x): inp = [T(10, 10), T(15, 15)] self._verify_runtime_estimation(fn, (inp,)) + # lack of profiler on XPU + @expectedFailureXPU def test_reduce_scatter_tensor(self): def fn(x): rs = _c10d.reduce_scatter_tensor( @@ -340,6 +356,8 @@ def fn(x): inp = T(self.WORLD_SIZE, 10) self._verify_runtime_estimation(fn, (inp,)) + # lack of profiler on XPU + @expectedFailureXPU def test_reduce_scatter_tensor_coalesced(self): def fn(x): rs = _c10d.reduce_scatter_tensor_coalesced( diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 498cc50eb9cf5..29609404df09b 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -350,12 +350,10 @@ def register_backend( # assume default devices "cpu" and "cuda", but warn warnings.warn( f"Device capability of {name} unspecified, assuming `cpu` and " - "`cuda` or `xpu`. Please specify it via the `devices` argument of " + "`cuda`. Please specify it via the `devices` argument of " "`register_backend`." ) - Backend.backend_capability[name.lower()] = ( - ["cpu", "cuda", "xpu"] if torch.xpu.is_available() else ["cpu", "cuda"] - ) + Backend.backend_capability[name.lower()] = ["cpu", "cuda"] elif isinstance(devices, str): # Single device string specified. Simply convert to list. Backend.backend_capability[name.lower()] = [devices] diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py index 0e74d9049b94b..c1f75697fe889 100644 --- a/torch/testing/_internal/common_distributed.py +++ b/torch/testing/_internal/common_distributed.py @@ -338,17 +338,26 @@ def requires_gloo(): def requires_nccl_version(version, msg): - if not TEST_CUDA: - return lambda f: f - if not c10d.is_nccl_available(): - return skip_but_pass_in_sandcastle( - "c10d was not compiled with the NCCL backend", - ) + if TEST_CUDA: + if not c10d.is_nccl_available(): + return skip_but_pass_in_sandcastle( + "c10d was not compiled with the NCCL backend", + ) + else: + return skip_but_pass_in_sandcastle_if( + torch.cuda.nccl.version() < version, + f"Requires NCCL version greater than or equal to: {version}, found: {torch.cuda.nccl.version()}, reason: {msg}", + ) else: - return skip_but_pass_in_sandcastle_if( - torch.cuda.nccl.version() < version, - f"Requires NCCL version greater than or equal to: {version}, found: {torch.cuda.nccl.version()}, reason: {msg}", - ) + + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + + return wrapper + + return decorator def requires_nccl(): diff --git a/torch/testing/_internal/distributed/fake_pg.py b/torch/testing/_internal/distributed/fake_pg.py index e160f2fe50611..0a2814c246459 100644 --- a/torch/testing/_internal/distributed/fake_pg.py +++ b/torch/testing/_internal/distributed/fake_pg.py @@ -28,5 +28,5 @@ def _create_fake_pg(common_opts, backend_opts): dist.Backend.register_backend( - "fake", _create_fake_pg, extended_api=True, devices=["cpu", "cuda", "hpu", "xpu"] + "fake", _create_fake_pg, extended_api=True, devices=["cpu", "cuda", "hpu"] ) From ff6870d13435e499f16f639cb10ff80d1c8e5bb4 Mon Sep 17 00:00:00 2001 From: David Berard Date: Thu, 11 Sep 2025 13:14:17 +0000 Subject: [PATCH 131/693] [BE][flex attention] compute RMSE in float64 (#162088) I saw a failure where the reference error was 0.0, and the compiled error was 0.035. Although the failure still occurs with or without this change, it was confusing to see RMSE of 0.0. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162088 Approved by: https://github.com/drisspg --- test/inductor/test_flex_attention.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/inductor/test_flex_attention.py b/test/inductor/test_flex_attention.py index b8d8dc815eb9a..21d67ffc0ab33 100644 --- a/test/inductor/test_flex_attention.py +++ b/test/inductor/test_flex_attention.py @@ -146,6 +146,8 @@ def rmse(ref, res): """ Calculate root mean squared error """ + ref = ref.to(torch.float64) + res = res.to(torch.float64) return torch.sqrt(torch.mean(torch.square(ref - res))) From a956066b4e7f8adf1bd0ad13caa8ba71ab2bc577 Mon Sep 17 00:00:00 2001 From: Ramya Ramineni <62723901+rraminen@users.noreply.github.com> Date: Fri, 12 Sep 2025 00:13:26 +0000 Subject: [PATCH 132/693] [ROCm] Define uint32 t when ROCM_VERSION >= 70000 (#160587) This PR fixes the errors like below: ``` [rank3]: RuntimeError: The following operation failed in the TorchScript interpreter. [rank3]: Traceback of TorchScript (most recent call last): [rank3]: RuntimeError: /tmp/comgr-28f951/input/CompileSourceACC062:67:7: error: unknown type name 'uint32_t'; did you mean '__hip_internal::uint32_t'? [rank3]: 67 | uint32_t int32; [rank3]: | ^~~~~~~~ [rank3]: | __hip_internal::uint32_t ``` Earlier uint32_t was defined in HIP headers in std namespace. Now it is moved to __hip_internal namespace in hip headers. This change is made in ROCm 7.0. Pull Request resolved: https://github.com/pytorch/pytorch/pull/160587 Approved by: https://github.com/jeffdaily --- .../jit/codegen/fuser/cuda/resource_strings.h | 71 ++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/torch/csrc/jit/codegen/fuser/cuda/resource_strings.h b/torch/csrc/jit/codegen/fuser/cuda/resource_strings.h index 9728d27d4d79b..0ac2c79d1e98a 100644 --- a/torch/csrc/jit/codegen/fuser/cuda/resource_strings.h +++ b/torch/csrc/jit/codegen/fuser/cuda/resource_strings.h @@ -260,7 +260,7 @@ typedef __half half; )"; #endif -#if defined(USE_ROCM) +#if defined(USE_ROCM) && ROCM_VERSION < 70000 constexpr auto bfloat16_support_literal = R"( #ifndef __align__ @@ -317,6 +317,75 @@ __device__ __nv_bfloat16 __float2bfloat16(const float a) { return val; } +__device__ float __bfloat162float(const __nv_bfloat16 a) { + union + { + uint32_t int32; + float fp32; + } u = {uint32_t(a.__x) << 16}; + return u.fp32; +} +#endif /* defined(__cplusplus) */ +)"; +#elif defined(USE_ROCM) && ROCM_VERSION >= 70000 +constexpr auto bfloat16_support_literal = + R"( +#ifndef __align__ +#define __align__(x) __attribute__((aligned(x))) +#endif + +typedef unsigned int uint32_t; + +typedef struct __align__(2) { + unsigned short x; +} +__nv_bfloat16_raw; + +#if defined(__cplusplus) +struct __align__(2) __nv_bfloat16 { + __host__ __device__ __nv_bfloat16() {} + + __host__ __device__ __nv_bfloat16& operator=(const __nv_bfloat16_raw& hr) { + __x = hr.x; + return *this; + } + + unsigned short __x; +}; + +__device__ unsigned short __internal_float2bfloat16( + const float f, + unsigned int& sign, + unsigned int& remainder) { + unsigned int x; + + x = __float_as_uint(f); + + if ((x & 0x7fffffffU) > 0x7f800000U) { + sign = 0U; + remainder = 0U; + return static_cast(0x7fffU); + } + sign = x >> 31; + remainder = x << 16; + return static_cast(x >> 16); +} + +/* Definitions of intrinsics */ +__device__ __nv_bfloat16 __float2bfloat16(const float a) { + __nv_bfloat16 val; + __nv_bfloat16_raw r; + unsigned int sign; + unsigned int remainder; + r.x = __internal_float2bfloat16(a, sign, remainder); + if ((remainder > 0x80000000U) || + ((remainder == 0x80000000U) && ((r.x & 0x1U) != 0U))) { + r.x++; + } + val = r; + return val; +} + __device__ float __bfloat162float(const __nv_bfloat16 a) { union { From 84d8ec73f11059b1e4f818cdf1d2eee6ffde7e87 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Tue, 9 Sep 2025 08:51:14 -0700 Subject: [PATCH 133/693] [CD] Build Mac wheels using `setup-python` action (#162136) Biggest difference between both conda and homebrew CPython builds and one from python.org, is that later are universal binaries and they are always trying to build universal extension... Workaround lots of universal binary build attempts by explicitly specifying both `_PYTHON_PLATFORM` and `--plat-name` as well as `ARCH_FLAGS` Suppressed actionlint warning on use of `freethreaded` flag which is document in https://github.com/actions/setup-python/tree/v5 TODO: Remove lots of temporary workarounds when `3.14` is out in October 2025 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162136 Approved by: https://github.com/atalman, https://github.com/huydhn ghstack dependencies: #162297, #162265 --- .ci/wheel/build_wheel.sh | 30 +- .../macos_binary_build_workflow.yml.j2 | 52 +-- ...-arm64-binary-libtorch-release-nightly.yml | 18 +- ...rated-macos-arm64-binary-wheel-nightly.yml | 336 ++++-------------- tools/linter/adapters/actionlint_linter.py | 2 + 5 files changed, 94 insertions(+), 344 deletions(-) diff --git a/.ci/wheel/build_wheel.sh b/.ci/wheel/build_wheel.sh index e63a68e4f1934..2d5f4d30b4c82 100755 --- a/.ci/wheel/build_wheel.sh +++ b/.ci/wheel/build_wheel.sh @@ -85,7 +85,7 @@ mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true # Create an isolated directory to store this builds pytorch checkout and conda # installation if [[ -z "$MAC_PACKAGE_WORK_DIR" ]]; then - MAC_PACKAGE_WORK_DIR="$(pwd)/tmp_wheel_conda_${DESIRED_PYTHON}_$(date +%H%M%S)" + MAC_PACKAGE_WORK_DIR="$(pwd)/tmp_wheel_${DESIRED_PYTHON}_$(date +%H%M%S)" fi mkdir -p "$MAC_PACKAGE_WORK_DIR" || true if [[ -n ${GITHUB_ACTIONS} ]]; then @@ -96,11 +96,11 @@ fi whl_tmp_dir="${MAC_PACKAGE_WORK_DIR}/dist" mkdir -p "$whl_tmp_dir" -mac_version='macosx_11_0_arm64' +mac_version='macosx-11_0-arm64' libtorch_arch='arm64' # Create a consistent wheel package name to rename the wheel to -wheel_filename_new="${TORCH_PACKAGE_NAME}-${build_version}${build_number_prefix}-cp${python_nodot}-none-${mac_version}.whl" +wheel_filename_new="${TORCH_PACKAGE_NAME}-${build_version}${build_number_prefix}-cp${python_nodot}-none-${mac_version//[-,]/_}.whl" ########################################################### @@ -125,7 +125,6 @@ popd export TH_BINARY_BUILD=1 export INSTALL_TEST=0 # dont install test binaries into site-packages export MACOSX_DEPLOYMENT_TARGET=11.0 -export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} EXTRA_CONDA_INSTALL_FLAGS="" CONDA_ENV_CREATE_FLAGS="" @@ -133,25 +132,19 @@ RENAME_WHEEL=true case $desired_python in 3.14t) echo "Using 3.14 deps" + mac_version='macosx-11.0-arm64' NUMPY_PINNED_VERSION="==2.1.0" - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" RENAME_WHEEL=false ;; 3.14) echo "Using 3.14t deps" + mac_version='macosx-11.0-arm64' NUMPY_PINNED_VERSION="==2.1.0" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" RENAME_WHEEL=false ;; 3.13t) echo "Using 3.13 deps" NUMPY_PINNED_VERSION="==2.1.0" - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" - desired_python="3.13" RENAME_WHEEL=false ;; 3.13) @@ -176,17 +169,12 @@ case $desired_python in ;; esac -# Install into a fresh env -tmp_env_name="wheel_py$python_nodot" -conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} -source activate "$tmp_env_name" - PINNED_PACKAGES=( "numpy${NUMPY_PINNED_VERSION}" ) -retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements-build.txt" -pip install requests ninja typing-extensions -retry pip install -r "${pytorch_rootdir}/requirements.txt" || true +python -mvenv ~/${desired_python}-build +source ~/${desired_python}-build/bin/activate +retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements.txt" retry brew install libomp # For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule @@ -199,7 +187,7 @@ export BUILD_TEST=OFF pushd "$pytorch_rootdir" echo "Calling setup.py bdist_wheel at $(date)" -python setup.py bdist_wheel -d "$whl_tmp_dir" --plat-name ${mac_version} +_PYTHON_HOST_PLATFORM=${mac_version} ARCHFLAGS="-arch arm64" python setup.py bdist_wheel -d "$whl_tmp_dir" --plat-name "${mac_version//[-.]/_}" echo "Finished setup.py bdist_wheel at $(date)" diff --git a/.github/templates/macos_binary_build_workflow.yml.j2 b/.github/templates/macos_binary_build_workflow.yml.j2 index f4b2a66d2acda..7f307447c3576 100644 --- a/.github/templates/macos_binary_build_workflow.yml.j2 +++ b/.github/templates/macos_binary_build_workflow.yml.j2 @@ -22,6 +22,16 @@ name: !{{ build_environment }} echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" {%- endmacro %} +{%- macro setup_python(py_ver) -%} + - name: Setup Python + uses: actions/setup-python@v6 + with: + # TODO: Removeme once 3.14 is out + # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 + python-version: "!{{ (py_ver.strip('t') + '.4') if '3.14' not in py_ver else '3.14.0-rc.2' }}" + freethreaded: !{{ "true" if py_ver.endswith('t') else "false" }} +{%- endmacro %} + on: # TODO: Migrate to new ciflow trigger, reference https://github.com/pytorch/pytorch/pull/70321 push: @@ -61,23 +71,13 @@ jobs: {%- endif %} steps: !{{ set_runner_specific_vars() }} - - name: Install conda and dependencies - run: | - # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on - curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" - chmod +x "${RUNNER_TEMP}/conda.sh" - /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" - echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" + !{{ setup_python(config.get("python_version", "3.10")) }} !{{ common.checkout(deep_clone=False, directory="pytorch") }} - name: Populate binary env run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -94,8 +94,6 @@ jobs: {%- if config["package_type"] == "wheel" %} - name: Test PyTorch wheel run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -106,33 +104,9 @@ jobs: SMOKE_TEST_PARAMS="" - EXTRA_CONDA_INSTALL_FLAGS="" - CONDA_ENV_CREATE_FLAGS="" - # shellcheck disable=SC2153 - case $DESIRED_PYTHON in - 3.14t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.14) - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.13t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" - desired_python="3.13" - ;; - *) - # shellcheck disable=SC2153 - desired_python=${DESIRED_PYTHON} - ;; - esac - # shellcheck disable=SC2086 - conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} - conda activate test_conda_env + python -mvenv test_venv + source test_venv/bin/activate pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v # shellcheck disable=SC2086 diff --git a/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml b/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml index 500f8fa07af6b..cd912650eb17d 100644 --- a/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml +++ b/.github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml @@ -60,13 +60,13 @@ jobs: echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" # shellcheck disable=SC2129 echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" - - name: Install conda and dependencies - run: | - # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on - curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" - chmod +x "${RUNNER_TEMP}/conda.sh" - /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" - echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" + - name: Setup Python + uses: actions/setup-python@v6 + with: + # TODO: Removeme once 3.14 is out + # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 + python-version: "3.10.4" + freethreaded: false - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -81,13 +81,9 @@ jobs: working-directory: pytorch - name: Populate binary env run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" diff --git a/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml b/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml index 6aee57b503aa2..8522d2d369930 100644 --- a/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml +++ b/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml @@ -56,13 +56,13 @@ jobs: echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" # shellcheck disable=SC2129 echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" - - name: Install conda and dependencies - run: | - # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on - curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" - chmod +x "${RUNNER_TEMP}/conda.sh" - /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" - echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" + - name: Setup Python + uses: actions/setup-python@v6 + with: + # TODO: Removeme once 3.14 is out + # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 + python-version: "3.10.4" + freethreaded: false - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -77,13 +77,9 @@ jobs: working-directory: pytorch - name: Populate binary env run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -99,8 +95,6 @@ jobs: "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" - name: Test PyTorch wheel run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -111,33 +105,9 @@ jobs: SMOKE_TEST_PARAMS="" - EXTRA_CONDA_INSTALL_FLAGS="" - CONDA_ENV_CREATE_FLAGS="" - # shellcheck disable=SC2153 - case $DESIRED_PYTHON in - 3.14t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.14) - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.13t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" - desired_python="3.13" - ;; - *) - # shellcheck disable=SC2153 - desired_python=${DESIRED_PYTHON} - ;; - esac - # shellcheck disable=SC2086 - conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} - conda activate test_conda_env + python -mvenv test_venv + source test_venv/bin/activate pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v # shellcheck disable=SC2086 @@ -196,13 +166,13 @@ jobs: echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" # shellcheck disable=SC2129 echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" - - name: Install conda and dependencies - run: | - # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on - curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" - chmod +x "${RUNNER_TEMP}/conda.sh" - /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" - echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" + - name: Setup Python + uses: actions/setup-python@v6 + with: + # TODO: Removeme once 3.14 is out + # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 + python-version: "3.11.4" + freethreaded: false - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -217,13 +187,9 @@ jobs: working-directory: pytorch - name: Populate binary env run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -239,8 +205,6 @@ jobs: "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" - name: Test PyTorch wheel run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -251,33 +215,9 @@ jobs: SMOKE_TEST_PARAMS="" - EXTRA_CONDA_INSTALL_FLAGS="" - CONDA_ENV_CREATE_FLAGS="" - # shellcheck disable=SC2153 - case $DESIRED_PYTHON in - 3.14t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.14) - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.13t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" - desired_python="3.13" - ;; - *) - # shellcheck disable=SC2153 - desired_python=${DESIRED_PYTHON} - ;; - esac - # shellcheck disable=SC2086 - conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} - conda activate test_conda_env + python -mvenv test_venv + source test_venv/bin/activate pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v # shellcheck disable=SC2086 @@ -336,13 +276,13 @@ jobs: echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" # shellcheck disable=SC2129 echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" - - name: Install conda and dependencies - run: | - # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on - curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" - chmod +x "${RUNNER_TEMP}/conda.sh" - /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" - echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" + - name: Setup Python + uses: actions/setup-python@v6 + with: + # TODO: Removeme once 3.14 is out + # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 + python-version: "3.12.4" + freethreaded: false - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -357,13 +297,9 @@ jobs: working-directory: pytorch - name: Populate binary env run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -379,8 +315,6 @@ jobs: "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" - name: Test PyTorch wheel run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -391,33 +325,9 @@ jobs: SMOKE_TEST_PARAMS="" - EXTRA_CONDA_INSTALL_FLAGS="" - CONDA_ENV_CREATE_FLAGS="" - # shellcheck disable=SC2153 - case $DESIRED_PYTHON in - 3.14t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.14) - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.13t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" - desired_python="3.13" - ;; - *) - # shellcheck disable=SC2153 - desired_python=${DESIRED_PYTHON} - ;; - esac - # shellcheck disable=SC2086 - conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} - conda activate test_conda_env + python -mvenv test_venv + source test_venv/bin/activate pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v # shellcheck disable=SC2086 @@ -476,13 +386,13 @@ jobs: echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" # shellcheck disable=SC2129 echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" - - name: Install conda and dependencies - run: | - # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on - curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" - chmod +x "${RUNNER_TEMP}/conda.sh" - /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" - echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" + - name: Setup Python + uses: actions/setup-python@v6 + with: + # TODO: Removeme once 3.14 is out + # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 + python-version: "3.13.4" + freethreaded: false - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -497,13 +407,9 @@ jobs: working-directory: pytorch - name: Populate binary env run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -519,8 +425,6 @@ jobs: "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" - name: Test PyTorch wheel run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -531,33 +435,9 @@ jobs: SMOKE_TEST_PARAMS="" - EXTRA_CONDA_INSTALL_FLAGS="" - CONDA_ENV_CREATE_FLAGS="" - # shellcheck disable=SC2153 - case $DESIRED_PYTHON in - 3.14t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.14) - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.13t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" - desired_python="3.13" - ;; - *) - # shellcheck disable=SC2153 - desired_python=${DESIRED_PYTHON} - ;; - esac - # shellcheck disable=SC2086 - conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} - conda activate test_conda_env + python -mvenv test_venv + source test_venv/bin/activate pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v # shellcheck disable=SC2086 @@ -616,13 +496,13 @@ jobs: echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" # shellcheck disable=SC2129 echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" - - name: Install conda and dependencies - run: | - # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on - curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" - chmod +x "${RUNNER_TEMP}/conda.sh" - /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" - echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" + - name: Setup Python + uses: actions/setup-python@v6 + with: + # TODO: Removeme once 3.14 is out + # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 + python-version: "3.13.4" + freethreaded: true - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -637,13 +517,9 @@ jobs: working-directory: pytorch - name: Populate binary env run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -659,8 +535,6 @@ jobs: "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" - name: Test PyTorch wheel run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -671,33 +545,9 @@ jobs: SMOKE_TEST_PARAMS="" - EXTRA_CONDA_INSTALL_FLAGS="" - CONDA_ENV_CREATE_FLAGS="" - # shellcheck disable=SC2153 - case $DESIRED_PYTHON in - 3.14t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.14) - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.13t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" - desired_python="3.13" - ;; - *) - # shellcheck disable=SC2153 - desired_python=${DESIRED_PYTHON} - ;; - esac - # shellcheck disable=SC2086 - conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} - conda activate test_conda_env + python -mvenv test_venv + source test_venv/bin/activate pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v # shellcheck disable=SC2086 @@ -756,13 +606,13 @@ jobs: echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" # shellcheck disable=SC2129 echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" - - name: Install conda and dependencies - run: | - # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on - curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" - chmod +x "${RUNNER_TEMP}/conda.sh" - /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" - echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" + - name: Setup Python + uses: actions/setup-python@v6 + with: + # TODO: Removeme once 3.14 is out + # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 + python-version: "3.14.0-rc.2" + freethreaded: false - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -777,13 +627,9 @@ jobs: working-directory: pytorch - name: Populate binary env run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -799,8 +645,6 @@ jobs: "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" - name: Test PyTorch wheel run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -811,33 +655,9 @@ jobs: SMOKE_TEST_PARAMS="" - EXTRA_CONDA_INSTALL_FLAGS="" - CONDA_ENV_CREATE_FLAGS="" - # shellcheck disable=SC2153 - case $DESIRED_PYTHON in - 3.14t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.14) - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.13t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" - desired_python="3.13" - ;; - *) - # shellcheck disable=SC2153 - desired_python=${DESIRED_PYTHON} - ;; - esac - # shellcheck disable=SC2086 - conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} - conda activate test_conda_env + python -mvenv test_venv + source test_venv/bin/activate pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v # shellcheck disable=SC2086 @@ -896,13 +716,13 @@ jobs: echo "PYTORCH_FINAL_PACKAGE_DIR=${RUNNER_TEMP}/artifacts" >> "${GITHUB_ENV}" # shellcheck disable=SC2129 echo "MAC_PACKAGE_WORK_DIR=${RUNNER_TEMP}" >> "${GITHUB_ENV}" - - name: Install conda and dependencies - run: | - # Install conda, setup-miniconda messes with the path that messes with the ruby stuff we do later on - curl --retry 3 --retry-all-errors -o "${RUNNER_TEMP}/conda.sh" "https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-MacOSX-$(uname -m).sh" - chmod +x "${RUNNER_TEMP}/conda.sh" - /bin/bash "${RUNNER_TEMP}/conda.sh" -b -p "${RUNNER_TEMP}/anaconda" - echo "${RUNNER_TEMP}/anaconda/bin" >> "${GITHUB_PATH}" + - name: Setup Python + uses: actions/setup-python@v6 + with: + # TODO: Removeme once 3.14 is out + # .4 version is min minor for 3.10, and also no-gil version of 3.13 needs at least 3.13.3 + python-version: "3.14.0-rc.2" + freethreaded: true - name: Checkout PyTorch uses: actions/checkout@v4 with: @@ -917,13 +737,9 @@ jobs: working-directory: pytorch - name: Populate binary env run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" "${PYTORCH_ROOT}/.circleci/scripts/binary_populate_env.sh" - name: Build PyTorch binary run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -939,8 +755,6 @@ jobs: "${PYTORCH_ROOT}/.ci/wheel/build_wheel.sh" - name: Test PyTorch wheel run: | - # shellcheck disable=SC1091 - source "${RUNNER_TEMP}/anaconda/bin/activate" set -eux -o pipefail # shellcheck disable=SC1090 source "${BINARY_ENV_FILE:-/Users/distiller/project/env}" @@ -951,33 +765,9 @@ jobs: SMOKE_TEST_PARAMS="" - EXTRA_CONDA_INSTALL_FLAGS="" - CONDA_ENV_CREATE_FLAGS="" - # shellcheck disable=SC2153 - case $DESIRED_PYTHON in - 3.14t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.14) - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" - desired_python="3.14.0rc1" - ;; - 3.13t) - CONDA_ENV_CREATE_FLAGS="python-freethreading" - EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" - desired_python="3.13" - ;; - *) - # shellcheck disable=SC2153 - desired_python=${DESIRED_PYTHON} - ;; - esac - # shellcheck disable=SC2086 - conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} - conda activate test_conda_env + python -mvenv test_venv + source test_venv/bin/activate pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v # shellcheck disable=SC2086 diff --git a/tools/linter/adapters/actionlint_linter.py b/tools/linter/adapters/actionlint_linter.py index bebb95c499f8a..019f0fe896bcd 100644 --- a/tools/linter/adapters/actionlint_linter.py +++ b/tools/linter/adapters/actionlint_linter.py @@ -73,6 +73,8 @@ def check_file( binary, "-ignore", '"runs-on" section must be sequence node but got mapping node with "!!map" tag', + "-ignore", + 'input "freethreaded" is not defined in action "actions/setup-python@v', file, ] ) From 563921619b3e820b170475b9278ff94ee6e1a32c Mon Sep 17 00:00:00 2001 From: Yuxingwang-intel Date: Fri, 12 Sep 2025 00:17:05 +0000 Subject: [PATCH 134/693] Fix the regression issue caused by non-arrch64 platforms not hitting the MKLDNN path. (#162168) This issue was introduced by the commit in issue #161065. Added an extra check to provide a proper path for other platforms. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162168 Approved by: https://github.com/mingfeima, https://github.com/malfet Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com> --- aten/src/ATen/native/LinearAlgebra.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/aten/src/ATen/native/LinearAlgebra.cpp b/aten/src/ATen/native/LinearAlgebra.cpp index b62c584641dba..616e6ec60e13d 100644 --- a/aten/src/ATen/native/LinearAlgebra.cpp +++ b/aten/src/ATen/native/LinearAlgebra.cpp @@ -1360,7 +1360,8 @@ Tensor outer(const Tensor& self, const Tensor& vec2) { #endif -#if defined(__aarch64__) && AT_MKLDNN_ACL_ENABLED() +#if !defined(__aarch64__) || AT_MKLDNN_ACL_ENABLED() +// Used by default on x86 platforms and on AArch64+ACL static inline int64_t get_mkldnn_matmul_min_dim() { static auto value = [&] { const int64_t default_min_dim = [&] { @@ -1395,8 +1396,6 @@ static inline bool apply_mkldnn_matmul_heur(int64_t m, int64_t k, int64_t n) { return at::globalContext().userEnabledMkldnn() && m > min_dim && k > min_dim && n > min_dim && m * k * n > min_size; } #endif - - static void addmm_impl_cpu_( Tensor &result, const Tensor &self, Tensor m1, Tensor m2, const Scalar& beta, const Scalar& alpha) { TORCH_INTERNAL_ASSERT(self.dim() == 2 && m1.dim() == 2 && m2.dim() == 2); @@ -1772,8 +1771,8 @@ static inline void bmm_out_or_baddbmm_(const Tensor& self_or_result_, const Tens return (strides[2] == 1 && (sizes[1] == 1 || strides[1] >= sizes[2])) || (strides[1] == 1 && (sizes[2] == 1 || strides[2] >= sizes[1])); }; - -#if defined(__aarch64__) && AT_MKLDNN_ACL_ENABLED() +#if !defined(__aarch64__) || AT_MKLDNN_ACL_ENABLED() + // Always apply mkldnn heuristic on x86 platform, but on ARM only if compiled with ACL bool apply_heur = apply_mkldnn_matmul_heur(batch1.sizes()[1], batch1.sizes()[2], batch2.sizes()[2]); if (apply_heur && use_mkldnn_matmul(batch1, batch2, self_or_result)) { try { @@ -1785,7 +1784,6 @@ static inline void bmm_out_or_baddbmm_(const Tensor& self_or_result_, const Tens } } #endif - if (contraction_size * res_rows * res_cols < 400) { if (is_bmm_out) { AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(kBFloat16, kHalf, batch1.scalar_type(), "bmm", [&] { From 4a757e1e1734698a22170ac584c54d5a0ba9e1d6 Mon Sep 17 00:00:00 2001 From: Aaryaman Vasishta Date: Fri, 12 Sep 2025 00:18:43 +0000 Subject: [PATCH 135/693] [ROCm] Support torch.cuda._compile_kernel (#162510) Supports `torch.cuda._compile_kernel` on ROCm. Related to https://github.com/pytorch/pytorch/pull/151484 Tested on Windows with gfx1201. Testing on Linux pending. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162510 Approved by: https://github.com/mycpuorg, https://github.com/msaroufim --- test/test_cuda.py | 23 +++++---- torch/cuda/_utils.py | 108 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 102 insertions(+), 29 deletions(-) diff --git a/test/test_cuda.py b/test/test_cuda.py index d6ce00d9e8db4..b5db4bac59f9a 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -6734,7 +6734,6 @@ def test_cuda_autocast_deprecated_warning(self): os.environ.get("USE_LEGACY_DRIVER", None) == "1", "Doesn't work with older driver" ) class TestCompileKernel(TestCase): - @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") @unittest.skipIf(not TEST_CUDA, "No CUDA") def test_compile_kernel(self): # Simple vector addition kernel @@ -6844,7 +6843,6 @@ def test_compile_kernel(self): with self.assertRaises(RuntimeError): _compile_kernel(invalid_kernel_source, "invalid_kernel") - @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") @unittest.skipIf(not TEST_CUDA, "No CUDA") def test_compile_kernel_large_shared_memory(self): kernel_source = """ @@ -6902,14 +6900,20 @@ def test_compile_kernel_large_shared_memory(self): self.assertEqual(output_data.cpu(), expected) # Test error handling with more than supported shared memory size - max_smem = get_device_properties().shared_memory_per_block_optin + if torch.version.hip: + max_smem = ( + 65536 + if get_device_properties().gcnArchName not in ["gfx950"] + else 160 * 1024 + ) + else: + max_smem = get_device_properties().shared_memory_per_block_optin excessive_shared_mem = max_smem * 2 with self.assertRaises(RuntimeError): kernel.set_shared_memory_config(excessive_shared_mem) @tf32_on_and_off(0.005) - @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") @unittest.skipIf(not TEST_CUDA, "No CUDA") def test_compile_kernel_advanced(self): # Test matrix multiplication @@ -6960,7 +6964,10 @@ def test_compile_kernel_advanced(self): # Test with different compute capability if specified device_props = torch.cuda.get_device_properties(torch.cuda.current_device()) - compute_cap = f"{device_props.major}{device_props.minor}" + if not torch.version.hip: + compute_cap = f"{device_props.major}{device_props.minor}" + else: + compute_cap = f"{device_props.gcnArchName}" # Recompile with explicit compute capability matmul_kernel_explicit = _compile_kernel( @@ -6979,7 +6986,6 @@ def test_compile_kernel_advanced(self): # Verify results self.assertEqual(C_explicit, expected) - @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") @unittest.skipIf(not TEST_CUDA, "No CUDA") def test_compile_kernel_as_custom_op(self): # Define a simple vector addition kernel @@ -7039,7 +7045,6 @@ def _(a, b): expected = a + b torch.testing.assert_close(result, expected, rtol=1e-5, atol=1e-5) - @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") @unittest.skipIf(not TEST_CUDA, "No CUDA") def test_compile_kernel_custom_op_validation(self): kernel_source = """ @@ -7087,7 +7092,6 @@ def _(input_tensor, scalar): expected = input_data + scalar_val torch.testing.assert_close(result, expected, rtol=1e-5, atol=1e-5) - @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") @unittest.skipIf(not TEST_CUDA, "No CUDA") def test_compile_kernel_double_precision(self): """Test that Python floats are correctly handled as doubles in kernels.""" @@ -7122,12 +7126,13 @@ def test_compile_kernel_double_precision(self): ) torch.testing.assert_close(output, expected, rtol=1e-14, atol=1e-14) - @unittest.skipIf(TEST_WITH_ROCM, "ROCM does not support nvrtc") @unittest.skipIf(not TEST_CUDA, "No CUDA") def test_compile_kernel_cuda_headers(self): """Test that kernels can include and use CUDA headers like cuda_fp16.h.""" kernel_source = """ + #ifndef __HIPCC__ #include + #endif extern "C" __global__ void half_precision_kernel(__half* output, double input_value, int n) { diff --git a/torch/cuda/_utils.py b/torch/cuda/_utils.py index cc2411f52b7af..dc269fa629aaf 100644 --- a/torch/cuda/_utils.py +++ b/torch/cuda/_utils.py @@ -8,20 +8,40 @@ from torch._utils import _get_device_index as _torch_get_device_index -# Load CUDA driver and NVRTC -def _get_cuda_library() -> ctypes.CDLL: +def _get_hip_runtime_library() -> ctypes.CDLL: + if sys.platform == "win32": + lib = ctypes.CDLL(f"amdhip64_{torch.version.hip[0]}.dll") + else: # Unix-based systems + lib = ctypes.CDLL("libamdhip64.so") + lib.cuGetErrorString = lib.hipGetErrorString # type: ignore[attr-defined] + lib.cuModuleLoadData = lib.hipModuleLoadData # type: ignore[attr-defined] + lib.cuModuleGetFunction = lib.hipModuleGetFunction # type: ignore[attr-defined] + lib.cuLaunchKernel = lib.hipModuleLaunchKernel # type: ignore[attr-defined] + lib.cuFuncSetAttribute = lib.hipFuncSetAttribute # type: ignore[attr-defined] + return lib + + +def _get_cuda_runtime_library() -> ctypes.CDLL: if sys.platform == "win32": return ctypes.CDLL("nvcuda.dll") else: # Unix-based systems return ctypes.CDLL("libcuda.so.1") +# Load GPU driver runtime +def _get_gpu_runtime_library() -> ctypes.CDLL: + if torch.version.hip: + return _get_hip_runtime_library() + else: + return _get_cuda_runtime_library() + + # Helper: check CUDA errors def _check_cuda(result: int) -> None: if result == 0: return err_str = ctypes.c_char_p() - libcuda = _get_cuda_library() # Get reference to CUDA library + libcuda = _get_gpu_runtime_library() # Get reference to CUDA library libcuda.cuGetErrorString(result, ctypes.byref(err_str)) error_message = ( err_str.value.decode() if err_str.value is not None else "Unknown CUDA error" @@ -29,23 +49,49 @@ def _check_cuda(result: int) -> None: raise RuntimeError(f"CUDA error: {error_message}") +def _get_hiprtc_library() -> ctypes.CDLL: + if sys.platform == "win32": + version_str = "".join(["0", torch.version.hip[0], "0", torch.version.hip[2]]) + lib = ctypes.CDLL(f"hiprtc{version_str}.dll") + else: + lib = ctypes.CDLL("libhiprtc.so") + + # Provide aliases for HIP RTC functions to match NVRTC API + lib.nvrtcGetErrorString = lib.hiprtcGetErrorString # type: ignore[attr-defined] + lib.nvrtcCreateProgram = lib.hiprtcCreateProgram # type: ignore[attr-defined] + lib.nvrtcDestroyProgram = lib.hiprtcDestroyProgram # type: ignore[attr-defined] + lib.nvrtcCompileProgram = lib.hiprtcCompileProgram # type: ignore[attr-defined] + lib.nvrtcGetPTXSize = lib.hiprtcGetCodeSize # type: ignore[attr-defined] + lib.nvrtcGetPTX = lib.hiprtcGetCode # type: ignore[attr-defined] + lib.nvrtcGetProgramLogSize = lib.hiprtcGetProgramLogSize # type: ignore[attr-defined] + lib.nvrtcGetProgramLog = lib.hiprtcGetProgramLog # type: ignore[attr-defined] + return lib + + def _get_nvrtc_library() -> ctypes.CDLL: - # Since PyTorch already loads NVRTC, we can use the system library - # which should be compatible with PyTorch's version if sys.platform == "win32": return ctypes.CDLL("nvrtc64_120_0.dll") else: return ctypes.CDLL("libnvrtc.so") -def _get_nvrtc_compatible_flags() -> list[str]: +def _get_gpu_rtc_library() -> ctypes.CDLL: + # Since PyTorch already loads the GPU RTC library, we can use the system library + # which should be compatible with PyTorch's version + if torch.version.hip: + return _get_hiprtc_library() + else: + return _get_nvrtc_library() + + +def _get_gpu_rtc_compatible_flags() -> list[str]: """ - Get NVCC flags that are compatible with NVRTC compilation. + Get HIPCC/NVCC flags that are compatible with NVRTC compilation. Returns: - List of NVCC flags that can be safely used with NVRTC. + List of HIPCC/NVCC flags that can be safely used with NVRTC. """ - from torch.utils.cpp_extension import COMMON_NVCC_FLAGS + from torch.utils.cpp_extension import COMMON_HIPCC_FLAGS, COMMON_NVCC_FLAGS nvrtc_unsupported_flags = { "--expt-relaxed-constexpr", @@ -56,6 +102,9 @@ def _get_nvrtc_compatible_flags() -> list[str]: flag for flag in COMMON_NVCC_FLAGS if flag not in nvrtc_unsupported_flags ] + if torch.version.hip: + compatible_flags.extend(COMMON_HIPCC_FLAGS) + return compatible_flags @@ -86,7 +135,7 @@ def _nvrtc_compile( import torch.cuda # Load NVRTC library - libnvrtc = _get_nvrtc_library() + libnvrtc = _get_gpu_rtc_library() # NVRTC constants NVRTC_SUCCESS = 0 @@ -119,11 +168,17 @@ def check_nvrtc(result: int) -> None: # Get compute capability if not provided if compute_capability is None: props = torch.cuda.get_device_properties(torch.cuda.current_device()) - compute_capability = f"{props.major}{props.minor}" + if torch.version.hip: + compute_capability = f"{props.gcnArchName}" + else: + compute_capability = f"{props.major}{props.minor}" # Prepare compilation options options = [] - options.append(f"--gpu-architecture=sm_{compute_capability}".encode()) + if torch.version.hip: + options.append(f"--offload-arch={compute_capability}".encode()) + else: + options.append(f"--gpu-architecture=sm_{compute_capability}".encode()) # Auto-detect and add CUDA include paths from torch.utils.cpp_extension import include_paths @@ -142,7 +197,7 @@ def check_nvrtc(result: int) -> None: for option in nvcc_options: options.append(option.encode("utf-8")) - nvrtc_compatible_flags = _get_nvrtc_compatible_flags() + nvrtc_compatible_flags = _get_gpu_rtc_compatible_flags() options.extend([flag.encode("utf-8") for flag in nvrtc_compatible_flags]) # Convert options to C array @@ -181,7 +236,10 @@ def check_nvrtc(result: int) -> None: check_nvrtc(libnvrtc.nvrtcGetPTX(prog, ptx)) libnvrtc.nvrtcDestroyProgram(ctypes.byref(prog)) - return ptx.value + # For HIP, hipRTC generates raw CO binaries instead of PTX, + # and for some reason, ".value" causes the string to be truncated, + # likely due to the presence of '\0' in the string. So we use .raw instead. + return ptx.raw if torch.version.hip else ptx.value class _CudaModule: @@ -194,9 +252,9 @@ def __getattr__(self, name: str) -> "_CudaKernel": return self._kernels[name] # Import the CUDA library inside the method - from torch.cuda._utils import _get_cuda_library + from torch.cuda._utils import _get_gpu_runtime_library - libcuda = _get_cuda_library() + libcuda = _get_gpu_runtime_library() func = ctypes.c_void_p() try: @@ -244,7 +302,7 @@ def __call__( """ import torch - libcuda = torch.cuda._utils._get_cuda_library() + libcuda = torch.cuda._utils._get_gpu_runtime_library() if not args: args = [] @@ -326,11 +384,21 @@ def set_shared_memory_config(self, shared_mem_bytes: int) -> None: self._max_shared_mem_bytes = shared_mem_bytes return - libcuda = _get_cuda_library() + libcuda = _get_gpu_runtime_library() # Get device properties to validate against limits device_props = torch.cuda.get_device_properties() - max_shared_mem = getattr(device_props, "shared_memory_per_block_optin", 49152) + # HIP doesn't have shared_memory_per_block_optin in device properties, so we hard-code it here + if torch.version.hip: + # navi, CDNA1-CDNA3 allows a max of 64KB shared memory + # CDNA4 allows a max of 160KB shared memory + max_shared_mem = ( + 65536 if device_props.gcnArchName not in ["gfx950"] else 160 * 1024 + ) + else: + max_shared_mem = getattr( + device_props, "shared_memory_per_block_optin", 49152 + ) if shared_mem_bytes > max_shared_mem: raise RuntimeError( @@ -372,7 +440,7 @@ def _cuda_load_module( import torch.cuda # Load CUDA driver library - libcuda = _get_cuda_library() + libcuda = _get_gpu_runtime_library() # Convert PTX to bytes if it's a string if isinstance(ptx, str): From 501e19137ada7acaa2de8c89946f67b87711da10 Mon Sep 17 00:00:00 2001 From: Avik Chaudhuri Date: Fri, 12 Sep 2025 00:33:35 +0000 Subject: [PATCH 136/693] fix var args for shape guards (#162633) Summary: Fixes #162599 Test Plan: added test based on repro Rollback Plan: Differential Revision: D82144520 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162633 Approved by: https://github.com/tugsbayasgalan --- test/export/test_export.py | 41 +++++++++++++++++ torch/export/_unlift.py | 90 +++++++++++++++++++++++++++----------- 2 files changed, 105 insertions(+), 26 deletions(-) diff --git a/test/export/test_export.py b/test/export/test_export.py index c35cd8fee3852..b4596eab95baf 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -4854,6 +4854,47 @@ def forward(self, x, repeat): exported = export(model, inputs).module() self.assertEqual(model(*inputs), exported(*inputs)) + def test_dynamic_shapes_wrapped_with_shape_guards(self): + class Neuron(torch.nn.Module): + def __init__(self, n_dims: int = 5, n_targets: int = 3): + super().__init__() + self.linear = torch.nn.Linear(n_dims, n_targets) + + def forward(self, x, y): + return torch.sigmoid(self.linear(x + y)) + + args = (torch.randn(2, 5), torch.randn(2, 5)) + batch = torch.export.Dim.DYNAMIC + + n = Neuron() + compiled = export(n, args, dynamic_shapes=({0: batch}, {0: batch})) + expected = n(*args) + mod = compiled.module() + got = mod(*args) + self.assertTrue(torch.allclose(expected, got)) + + class Wrapped(Neuron): + def forward(self, *args): + return super().forward(*args) + + w = Wrapped() + + if is_retracebility_test(self._testMethodName): + with self.assertRaisesRegex( + torch._dynamo.exc.UserError, + "Detected mismatch between the structure of `inputs` and `dynamic_shapes`" + ": `inputs` has 2 elements, but `dynamic_shapes` has 1 elements", + ): + export(w, args, dynamic_shapes={"args": ({0: batch}, {0: batch})}) + else: + compiled = export( + w, args, dynamic_shapes={"args": ({0: batch}, {0: batch})} + ) + expected = w(*args) + mod = compiled.module() + got = mod(*args) + self.assertTrue(torch.allclose(expected, got)) + def test_dynamic_shapes_builder_basic(self): class M(torch.nn.Module): def forward(self, x, y, z): diff --git a/torch/export/_unlift.py b/torch/export/_unlift.py index 3b6f2f0266f69..ae4c09b7c8c2f 100644 --- a/torch/export/_unlift.py +++ b/torch/export/_unlift.py @@ -82,6 +82,64 @@ def _check_inputs_match(args, kwargs, in_spec: pytree.TreeSpec) -> list: return flat_args_with_path +def _force_ep_signature_match(ep_guards_code: list[str], input_paths): + # TODO (tmanlaibaatar) + # This is band-aid solution to export new tracer replacing + # shape env sources to flat_args. The real fix should be replacing + # shape env sources to original user sources but this is quite + # involved because you need to carefully construct new sources using + # dynamo and replace all instances of it inside shape env. But it is + # lot easier to manipulate after we turn them into strings and only + # time we use these guards is during retracing or running exported program, + # so it is probably ok to have "not useful" guards on ep for now. + name_mapping = {} + for idx, path in enumerate(input_paths): + name_mapping[f"L['flat_args'][{idx}]"] = f"L{pytree.keystr(path)}" + + new_guards_code = [] + for guard in ep_guards_code: + for old_name, new_name in name_mapping.items(): + guard = guard.replace(old_name, new_name) + new_guards_code.append(guard) + + return new_guards_code + + +def _force_gm_signature_match(ep_guards_code: list[str], signature): + """ + The signature of the originally exported module may not match + the signature of the unlifted graph module extracted from the + exported program. The guards code extracted from the exported + program is based on the former, but the generated guards fn is + based on the latter; thus we need to reconcile any such diff. + """ + + import re + + # Handle case where signatures may differ in var args. + orig_arg_names = set() + for g in ep_guards_code: + # match substrings of the form L[''][] + orig_arg_names.update(re.findall(r"L\[\'([^\']+)\'\]\[([0-9]+)\]", g)) + + sig_arg_names = set() + for n in signature.parameters: + # match substrings of the form _ + sig_arg_names.update(re.findall(r"(.+)_([0-9]+)", n)) + + # replace L[''][] with L['_'] + new_guards_code = ep_guards_code + for match in orig_arg_names: + if match in sig_arg_names: + base, idx = match + new_guards_code = [ + g.replace(f"L['{base}'][{idx}]", f"L['{base}_{idx}']") + for g in new_guards_code + ] + + return new_guards_code + + def _convert_guards_code_to_fn( guards_code: list[str], paths_of_placeholders: list[pytree.KeyPath], @@ -740,35 +798,15 @@ def _unlift_exported_program_lifted_states( graph = unlift_gm.graph placeholders = graph.find_nodes(op="placeholder") if check_guards and placeholders and ep.example_inputs: - sig = inspect.signature(unlift_gm.forward) - input_paths = _get_input_paths( - ep.example_inputs, - sig, - ) - - # TODO (tmanlaibaatar) - # This is band-aid solution to export new tracer replacing - # shape env sources to flat_args. The real fix should be replacing - # shape env sources to original user sources but this is quite - # involved because you need to carefully construct new sources using - # dynamo and replace all instances of it inside shape env. But it is - # lot easier to manipulate after we turn them into strings and only - # time we use these guards is during retracing or running exported program, - # so it is probably ok to have "not useful" guards on ep for now. - name_mapping = {} - for idx, path in enumerate(input_paths): - name_mapping[f"L['flat_args'][{idx}]"] = f"L{pytree.keystr(path)}" - - ep_guards = [] - for guard in ep._guards_code: - for old_name, new_name in name_mapping.items(): - guard = guard.replace(old_name, new_name) - ep_guards.append(guard) - + gm_sig = inspect.signature(unlift_gm.forward) + input_paths = _get_input_paths(ep.example_inputs, gm_sig) guards_code = _get_input_guards_for_graph( placeholders, ep.range_constraints, input_paths ) - guards_code.extend(ep_guards) + + ep_guards_code = _force_ep_signature_match(ep._guards_code, input_paths) + ep_guards_code = _force_gm_signature_match(ep_guards_code, gm_sig) + guards_code.extend(ep_guards_code) unlift_gm._guards_fn = _convert_guards_code_to_fn(guards_code, input_paths) root_nn_module_stack = torch.fx._utils.first_call_function_nn_module_stack( From 7a9c4d794c5048ee8ba4651f522292dee1116977 Mon Sep 17 00:00:00 2001 From: mengph <15936377392@163.com> Date: Fri, 12 Sep 2025 01:09:12 +0000 Subject: [PATCH 137/693] [BUG]Fixed handle cannot be hit in the cache in the IPC ExpandableSegment (#161885) Fixed the bug that handle cannot be hit in the ipcMemHandle_to_devptr cache in the IPC scenario of ExpandableSegment. Fixes #161884 Pull Request resolved: https://github.com/pytorch/pytorch/pull/161885 Approved by: https://github.com/albanD --- c10/cuda/CUDACachingAllocator.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/c10/cuda/CUDACachingAllocator.cpp b/c10/cuda/CUDACachingAllocator.cpp index 93ac4f7a4c649..1a15495e5bf69 100644 --- a/c10/cuda/CUDACachingAllocator.cpp +++ b/c10/cuda/CUDACachingAllocator.cpp @@ -504,7 +504,16 @@ struct ExpandableSegment { SegmentRange share(SegmentRange range, std::ostream& buf) { auto begin = segmentLeft(range.ptr); auto end = segmentRight(range.ptr + range.size); - ShareHeader header{getpid(), segment_size_, end - begin}; + + // header.pid needs to be padded with 4 bytes and initialized with + // 0 values ​​to avoid random padding of different bytes each time, + // thereby ensuring that the handle can be correctly matched in + // ipcMemHandle_to_devptr. + ShareHeader header{}; + header.pid = getpid(); + header.segment_size = segment_size_; + header.num_handles = end - begin; + buf.write((const char*)&header, sizeof(ShareHeader)); for (auto i : c10::irange(begin, end)) { // NOLINTNEXTLINE(bugprone-unchecked-optional-access) From ae97eb86f793a2465809fa1f31b0e5fdbc3eb2fc Mon Sep 17 00:00:00 2001 From: Gabriel Ferns Date: Fri, 12 Sep 2025 01:36:40 +0000 Subject: [PATCH 138/693] Reland "Fix conv exhaustive autotuning and expand Exhaustive test coverage" (#161957) reland https://github.com/pytorch/pytorch/pull/159387 Pull Request resolved: https://github.com/pytorch/pytorch/pull/161957 Approved by: https://github.com/coconutruben --- test/inductor/test_max_autotune.py | 85 ++++++++++++------- torch/_inductor/kernel/conv.py | 2 + torch/_inductor/template_heuristics/triton.py | 7 ++ 3 files changed, 62 insertions(+), 32 deletions(-) diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py index 320bdf3462e64..b0046571e9919 100644 --- a/test/inductor/test_max_autotune.py +++ b/test/inductor/test_max_autotune.py @@ -103,7 +103,8 @@ def benchmark(self, *args, out): @instantiate_parametrized_tests class TestMaxAutotune(TestCase): @parametrize("dynamic", (False, True)) - def test_max_autotune_mm_plus_mm_zero_size_input(self, dynamic): + @parametrize("search_space", ("DEFAULT", "EXHAUSTIVE")) + def test_max_autotune_mm_plus_mm_zero_size_input(self, dynamic, search_space): """ Make sure autotuning mm_plus_mm with zero-size input works without crashes. """ @@ -117,7 +118,9 @@ def mm_plus_mm(a, b, c, d): c = torch.randn(m, k).to(GPU_TYPE) d = torch.randn(k, n).to(GPU_TYPE) - with config.patch({"max_autotune": True}): + with config.patch( + {"max_autotune": True, "max_autotune_gemm_search_space": search_space} + ): torch.compile(mm_plus_mm, dynamic=dynamic)(a, b, c, d) @unittest.skipIf( @@ -540,7 +543,8 @@ def addmm(x, a, b): with config.patch({"max_autotune": True}): torch.compile(addmm, dynamic=dynamic)(x, a, b) - def test_autotune_conv1x1(self): + @parametrize("search_space", ("DEFAULT", "EXHAUSTIVE")) + def test_autotune_conv1x1(self, search_space): # Assuming input has 3 channels and we want to produce 16 channels as output conv1x1 = ( torch.nn.Conv2d(in_channels=3, out_channels=16, kernel_size=1) @@ -557,7 +561,11 @@ def test_autotune_conv1x1(self): ) with config.patch( - {"max_autotune": True, "max_autotune_gemm_backends": "TRITON"} + { + "max_autotune": True, + "max_autotune_gemm_backends": "TRITON", + "max_autotune_gemm_search_space": search_space, + } ): @torch.compile() @@ -669,7 +677,9 @@ def f(x, y): self.assertTrue(torch.allclose(act, ref, atol=4 * 1e-3, rtol=4 * 1e-3)) @config.patch(max_autotune=True) - def test_empty_conv_input(self, kernel_size=3): + @parametrize("search_space", ("DEFAULT", "EXHAUSTIVE")) + @parametrize("kernel_size", (1, 3)) + def test_empty_conv_input(self, search_space, kernel_size): x = torch.randn(0, 256, 14, 14, device=GPU_TYPE) weight = torch.randn(256, 256, kernel_size, kernel_size, device=GPU_TYPE) @@ -686,17 +696,15 @@ def f(x, weight): groups=1, ) - opt_f = torch.compile(f) - ref = f(x, weight) - act = opt_f(x, weight) - self.assertTrue(torch.allclose(ref, act, atol=4 * 1e-3, rtol=4 * 1e-3)) - - @config.patch(max_autotune=True) - def test_empty_conv_input_with_1x1_kernel(self): - self.test_empty_conv_input(kernel_size=1) + with config.patch({"max_autotune_gemm_search_space": search_space}): + opt_f = torch.compile(f) + ref = f(x, weight) + act = opt_f(x, weight) + self.assertTrue(torch.allclose(ref, act, atol=4 * 1e-3, rtol=4 * 1e-3)) @config.patch(max_autotune_gemm_backends="TRITON") - def test_baddmm(self): + @parametrize("search_space", ("DEFAULT", "EXHAUSTIVE")) + def test_baddmm(self, search_space): class M(torch.nn.Module): def __init__(self): super().__init__() @@ -715,11 +723,12 @@ def forward(self, x): ) mod = M().to(GPU_TYPE) - m_c = torch.compile(mode="max-autotune")(mod) - out, code = run_and_get_code(m_c, x) - self.assertEqual(out, mod(x), atol=2e-3, rtol=2e-3) + with config.patch({"max_autotune_gemm_search_space": search_space}): + m_c = torch.compile(mode="max-autotune")(mod) + out, code = run_and_get_code(m_c, x) + self.assertEqual(out, mod(x), atol=2e-3, rtol=1e-3) - FileCheck().check("triton_tem_fused_baddbmm").run(code[0]) + FileCheck().check("triton_tem_fused_baddbmm").run(code[0]) @config.patch(max_autotune=True) def test_conv1x1_with_free_symbols(self): @@ -854,7 +863,8 @@ def test_cat_max_autotune_extern(self): def test_cat_max_autotune_triton(self): self._test_cat_max_autotune_impl(using_triton_mm=True) - def test_conv_cat(self): + @parametrize("search_space", ("DEFAULT", "EXHAUSTIVE")) + def test_conv_cat(self, search_space): class ToyModel(torch.nn.Module): def __init__(self): super().__init__() @@ -866,24 +876,28 @@ def forward(self, x): x = self.conv(x) return torch.cat((x, x + 1)) - with torch.no_grad(): - m = ToyModel().to(device=GPU_TYPE) - input_tensor = torch.randn(32, 3, 64, 64).to(device=GPU_TYPE) + with config.patch({"max_autotune_gemm_search_space": search_space}): + with torch.no_grad(): + m = ToyModel().to(device=GPU_TYPE) + input_tensor = torch.randn(32, 3, 64, 64).to(device=GPU_TYPE) - # convolution is not currently plannable - m = torch.compile(m, mode="max-autotune-no-cudagraphs") - out, code = run_and_get_code(m, input_tensor) - self.assertEqual(out, m(input_tensor)) + # convolution is not currently plannable + m = torch.compile(m, mode="max-autotune-no-cudagraphs") + out, code = run_and_get_code(m, input_tensor) + self.assertEqual(out, m(input_tensor)) - if not TEST_WITH_ROCM: - FileCheck().check("def triton_poi_fused_add_cat_").run(code[0]) + if not TEST_WITH_ROCM: + FileCheck().check("def triton_poi_fused_add_cat_").run(code[0]) - def test_conv3d(self): + @parametrize("search_space", ("DEFAULT", "EXHAUSTIVE")) + def test_conv3d(self, search_space): fn = torch.nn.functional.conv3d image = torch.randn([1, 3, 8, 16, 32]) filt = torch.randn([3, 3, 7, 7, 7]) - with config.patch({"max_autotune": True}): + with config.patch( + {"max_autotune": True, "max_autotune_gemm_search_space": search_space} + ): expected = fn(image, filt) actual = torch.compile(fn)(image, filt) torch.testing.assert_close(actual, expected, atol=6e-5, rtol=0.001) @@ -2210,8 +2224,9 @@ def mm(a, b): with config.patch({"max_autotune": True, "autotune_in_subproc": True}): torch.compile(mm, dynamic=dynamic)(a, b) + @parametrize("search_space", ("DEFAULT", "EXHAUSTIVE")) @parametrize("dynamic", (False, True)) - def test_max_autotune_addmm(self, dynamic=False): + def test_max_autotune_addmm(self, search_space, dynamic=False): """ Make sure autotuning addmm in sub processes work without crashes. """ @@ -2224,7 +2239,13 @@ def addmm(x, a, b): x = torch.randn(100).to(GPU_TYPE) a = torch.randn(100, 10).to(GPU_TYPE) b = torch.randn(10, 100).to(GPU_TYPE) - with config.patch({"max_autotune": True, "autotune_in_subproc": True}): + with config.patch( + { + "max_autotune": True, + "autotune_in_subproc": True, + "max_autotune_gemm_search_space": search_space, + } + ): Y_compiled = torch.compile(addmm, dynamic=dynamic)(x, a, b) Y = addmm(x, a, b) torch.testing.assert_close(Y_compiled, Y, atol=1e-2, rtol=1e-2) diff --git a/torch/_inductor/kernel/conv.py b/torch/_inductor/kernel/conv.py index d6e802d00aaad..1dd682fddaeaf 100644 --- a/torch/_inductor/kernel/conv.py +++ b/torch/_inductor/kernel/conv.py @@ -591,10 +591,12 @@ def channels_last_conv(): conv_configs = V.choices.get_conv_configs(device_type) + dtype_size = x.get_dtype().itemsize for cfg in conv_configs( sympy_product([x.get_size()[0], *x.get_size()[2:]]), out_chan, in_chan, + dtype_size=dtype_size, ): if ndim == 2: conv2d_template.maybe_append_choice( diff --git a/torch/_inductor/template_heuristics/triton.py b/torch/_inductor/template_heuristics/triton.py index f4c836247dff6..6a77276b956c4 100644 --- a/torch/_inductor/template_heuristics/triton.py +++ b/torch/_inductor/template_heuristics/triton.py @@ -1148,6 +1148,13 @@ def __init__(self) -> None: for wpeu in [0, int(8 // num_warps)] ] + def _prune_exhaustive_configs( + self, + configs: list[BaseConfig], + dtype_size: int, + ) -> list[BaseConfig]: + return configs + def _filter_configs(self, configs: list[BaseConfig]) -> list[BaseConfig]: """ ROCm specific filtering From ccb450b190a9c24fc53ca8f120bd1cdf36c312c2 Mon Sep 17 00:00:00 2001 From: Shangdi Yu Date: Fri, 12 Sep 2025 01:42:03 +0000 Subject: [PATCH 139/693] [pre_compile] Add check for cuda and hardware version (#162438) if we detect compiled model is using cuda in meaningful way, we should store information about cuda + hardware Example: `SystemInfo(python_version='3.12.9', torch_version='2.9.0a0+gite02b0e6', cuda_version='12.6', triton_version=(3, 4), gpu_name='NVIDIA PG509-210')` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162438 Approved by: https://github.com/zhxchen17 --- torch/_dynamo/aot_compile.py | 15 +++++- torch/_dynamo/convert_frame.py | 1 + torch/_dynamo/graph_utils.py | 42 +++++++++++++++- torch/_dynamo/package.py | 35 ++++++++----- torch/_dynamo/precompile_context.py | 77 +++++++++++++++++++++++++++++ 5 files changed, 155 insertions(+), 15 deletions(-) diff --git a/torch/_dynamo/aot_compile.py b/torch/_dynamo/aot_compile.py index a454c51a33e35..980c230b7bccd 100644 --- a/torch/_dynamo/aot_compile.py +++ b/torch/_dynamo/aot_compile.py @@ -1,5 +1,6 @@ import abc import builtins +import dataclasses import importlib import inspect import logging @@ -10,7 +11,8 @@ import torch import torch.fx -from torch._dynamo.precompile_context import PrecompileContext +from torch._dynamo.graph_utils import _graph_uses_non_cpu +from torch._dynamo.precompile_context import PrecompileContext, SystemInfo from . import convert_frame from .hooks import Hooks @@ -50,6 +52,12 @@ class CompileArtifacts: compiled_fn: SerializableCallable original_code: types.CodeType closure: Optional[tuple[Any, ...]] + use_cuda: bool + system_info: SystemInfo = dataclasses.field(default_factory=SystemInfo.current) + + def check_compatibility(self) -> None: + current_system = SystemInfo.current() + current_system.check_compatibility(self.system_info, self.use_cuda) @dataclass @@ -62,6 +70,8 @@ def guard_check(self, *args: Any, **kwargs: Any) -> bool: return self._artifacts.guard_manager.check(f_locals) def __post_init__(self) -> None: + self._artifacts.check_compatibility() + import_sources = { alias: importlib.import_module(module_name) for alias, module_name in self._artifacts.import_sources.items() @@ -259,6 +269,8 @@ def new_guard_filter_fn( backend_input.graph_module._backend_id = backend_input.backend_id # type: ignore[assignment] output_graph = dynamo_output.tracer_output.output_graph assert output_graph is not None + use_cuda = _graph_uses_non_cpu(output_graph.current_tracer.graph) + import_sources = output_graph.import_sources with ( torch._guards.tracing(TracingContext(backend_input.fake_mode)), @@ -293,6 +305,7 @@ def new_guard_filter_fn( compiled_fn=compiled_fn, original_code=fn.__code__, closure=fn.__closure__, + use_cuda=use_cuda, ) aot_compiled_fn = AOTCompiledFunction(_artifacts=artifacts) return aot_compiled_fn diff --git a/torch/_dynamo/convert_frame.py b/torch/_dynamo/convert_frame.py index ef00676e67748..22c622a57e825 100644 --- a/torch/_dynamo/convert_frame.py +++ b/torch/_dynamo/convert_frame.py @@ -1264,6 +1264,7 @@ def count_args(code: CodeType) -> int: assert check_fn.guards_state is not None package.add_guarded_code(check_fn.guards_state, out_code) package.add_inlined_source(output.tracing_context.traced_code) + package.update_use_cuda(output.current_tracer.graph) compile_id_str = str(compile_id) if compile_id is not None else "Unknown" annotation_str = "Torch-Compiled Region: " + compile_id_str diff --git a/torch/_dynamo/graph_utils.py b/torch/_dynamo/graph_utils.py index 1e54ba95b3883..9e40831772159 100644 --- a/torch/_dynamo/graph_utils.py +++ b/torch/_dynamo/graph_utils.py @@ -1,8 +1,10 @@ from collections import deque -from typing import Any +from typing import Any, Optional +import torch from torch.fx import Graph, map_arg, Node from torch.utils._ordered_set import OrderedSet +from torch.utils._pytree import tree_flatten # flattens with support for slices @@ -75,3 +77,41 @@ def current_path_head() -> Node: pending.append((child, cur_node)) return "no cycle detected" + + +def _graph_uses_non_cpu(graph: Optional[Graph]) -> bool: + if graph is None: + return False + + def _is_non_cpu(x: Any) -> bool: + if isinstance(x, torch.device): + return x.type != "cpu" + if isinstance(x, torch.Tensor): + return x.device.type != "cpu" + return False + + def _flatten_meta(node: Node, key: str) -> list[Any]: + if key not in node.meta: + return [] + flat, _ = tree_flatten(node.meta[key]) + return flat + + for node in graph.nodes: + for key in ("val", "example_value"): + for obj in _flatten_meta(node, key): + if _is_non_cpu(obj): + return True + + # Check for device conversions + if node.op == "call_method": + if node.target == "cuda": + return True + if node.target == "to" and "cuda" in node.args: + return True + + # Check args/kwargs for non-CPU device specs + flat_args, _ = tree_flatten((node.args, node.kwargs)) + for obj in flat_args: + if _is_non_cpu(obj): + return True + return False diff --git a/torch/_dynamo/package.py b/torch/_dynamo/package.py index 9aa00a6a9d1e3..7df89a39798f0 100644 --- a/torch/_dynamo/package.py +++ b/torch/_dynamo/package.py @@ -19,7 +19,6 @@ import logging import os import pickle -import platform import shutil import sys import types @@ -30,7 +29,12 @@ import torch import torch._inductor.package from torch._dynamo.exc import PackageError -from torch._dynamo.precompile_context import PrecompileCacheArtifact, PrecompileContext +from torch._dynamo.graph_utils import _graph_uses_non_cpu +from torch._dynamo.precompile_context import ( + PrecompileCacheArtifact, + PrecompileContext, + SystemInfo, +) from torch._inductor.runtime.cache_dir_utils import cache_dir from torch.compiler._cache import CacheArtifactFactory @@ -275,13 +279,18 @@ def _find_code_source(obj: Any) -> Optional[str]: class _DynamoCacheEntry: codes: list[_DynamoCodeCacheEntry] inlined_sources: set[InlinedSource] - python_version: str = platform.python_version() - torch_version: str = torch.__version__ + use_cuda: bool + system_info: SystemInfo = dataclasses.field(default_factory=SystemInfo.current) @property def backend_ids(self) -> set[_BackendId]: return {backend_id for code in self.codes for backend_id in code.backend_ids} + def check_versions(self) -> None: + """Check if the current system is compatible with the system used to create this cache entry.""" + current_system_info = SystemInfo.current() + self.system_info.check_compatibility(current_system_info, self.use_cuda) + @CacheArtifactFactory.register class _DynamoCacheArtifact(PrecompileCacheArtifact[_DynamoCacheEntry]): @@ -369,6 +378,8 @@ def __init__( self._current_entry: Optional[_DynamoCodeCacheEntry] = None self._installed_globals: dict[types.ModuleType, list[str]] = {} + # whether cuda is used + self._use_cuda = False # For debugging/testing purpose only. self._cached_backends: dict[_BackendId, Any] = {} @@ -397,14 +408,7 @@ def initialize( assert self._innermost_fn is not None if dynamo is not None: assert isinstance(dynamo, _DynamoCacheEntry) - if dynamo.python_version != platform.python_version(): - raise RuntimeError( - f"Compile package was created with a different Python version: {dynamo.python_version}" - ) - if dynamo.torch_version != torch.__version__: - raise RuntimeError( - f"Compile package was created with a different PyTorch version: {dynamo.torch_version}" - ) + dynamo.check_versions() if not ignore_inlined_sources: for code in dynamo.inlined_sources: m = importlib.import_module(code.module) @@ -534,6 +538,9 @@ def add_inlined_source(self, sources: list[types.CodeType]) -> None: ) ) + def update_use_cuda(self, graph: Optional[torch.fx.Graph]) -> None: + self._use_cuda = _graph_uses_non_cpu(graph) + def bypass_current_entry(self) -> None: assert self._current_entry is not None self._current_entry.bypassed = True @@ -670,7 +677,9 @@ def install(self, backends: dict[_BackendId, Any]) -> None: def cache_entry(self) -> _DynamoCacheEntry: self.validate() return _DynamoCacheEntry( - codes=list(self._codes.values()), inlined_sources=self._inlined_sources + codes=list(self._codes.values()), + inlined_sources=self._inlined_sources, + use_cuda=self._use_cuda, ) @staticmethod diff --git a/torch/_dynamo/precompile_context.py b/torch/_dynamo/precompile_context.py index 38f97e583375d..2ad0e25d258a5 100644 --- a/torch/_dynamo/precompile_context.py +++ b/torch/_dynamo/precompile_context.py @@ -1,12 +1,15 @@ import copy +import dataclasses import logging import pickle +import platform from abc import abstractmethod from collections import defaultdict from itertools import chain from typing import Any, Callable, Generic, Optional, TypeVar, Union from typing_extensions import override +import torch from torch.compiler._cache import ( _serialize_single_cache, CacheArtifact, @@ -17,6 +20,7 @@ ) from torch.utils._appending_byte_serializer import AppendingByteSerializer from torch.utils._ordered_set import OrderedSet +from torch.utils._triton import get_triton_version """ @@ -243,3 +247,76 @@ def _ensure_cache_artifacts_registered(cls) -> None: from torch._functorch._aot_autograd.autograd_cache import ( # noqa: F401 BundledAOTAutogradCacheArtifact, ) + + +@dataclasses.dataclass(frozen=True) +class SystemInfo: + """ + System information including Python, PyTorch, and GPU details. + This information is used to ensure compiled artifacts can only be loaded + with compatible system configurations. + """ + + python_version: str + torch_version: str + cuda_version: Optional[str] + triton_version: Optional[tuple[int, int]] + gpu_name: Optional[str] + + @classmethod + def current(cls) -> "SystemInfo": + """Create a SystemInfo instance with current system information.""" + # Get GPU name if CUDA is available + gpu_name = None + if torch.cuda.is_available(): + try: + gpu_name = torch.cuda.get_device_name() + except Exception: + # If we can't get GPU info, leave as None + pass + + return cls( + python_version=platform.python_version(), + torch_version=torch.__version__, + cuda_version=torch.version.cuda, + triton_version=get_triton_version((0, 0)), + gpu_name=gpu_name, + ) + + def check_compatibility(self, other: "SystemInfo", use_cuda: bool = False) -> None: + """ + Check if this SystemInfo is compatible with another SystemInfo. + Raises RuntimeError if incompatible. + """ + if self.python_version != other.python_version: + raise RuntimeError( + f"Compile package was created with a different Python version: {self.python_version}" + ) + + if self.torch_version != other.torch_version: + raise RuntimeError( + f"Compile package was created with a different PyTorch version: {self.torch_version}" + ) + + if use_cuda: + if not torch.cuda.is_available(): + raise RuntimeError("CUDA is not available") + if self.cuda_version != other.cuda_version: + raise RuntimeError( + f"Compile package was created with a different CUDA version: {self.cuda_version}" + ) + + if ( + other.triton_version != (0, 0) + and self.triton_version != other.triton_version + ): + raise RuntimeError( + f"Compile package was created with a different Triton version: {self.triton_version}" + ) + + # Check GPU name if CUDA was used + if other.gpu_name is not None and self.gpu_name != other.gpu_name: + raise RuntimeError( + f"Compile package was created with different GPU: " + f"cached={self.gpu_name}, current={other.gpu_name}" + ) From 7eb92b076f6bc4c87ff42267ddf4205e4d4de9e2 Mon Sep 17 00:00:00 2001 From: Janani Sriram Date: Fri, 12 Sep 2025 02:12:33 +0000 Subject: [PATCH 140/693] [Inductor][FP8] Validate exhaustive autotuning for FP8 Inductor templates (#162678) Summary: Validate exhaustive autotuning for FP8 Inductor templates: scaled MM templates require `block_k >= 32`. Before, exhaustive autotuning defaulted to a limited set of autotuning configs, as limitations for exhaustively autotuning on FP8 shapes had not been tested. Test Plan: ``` CUDA_VISIBLE_DEVICES=0 TRITON_PRINT_AUTOTUNING=1 TRITON_ALWAYS_COMPILE=1 TORCH_LOGS=+inductor TORCHINDUCTOR_FORCE_DISABLE_CACHES=1 ENABLE_PERSISTENT_TMA_MATMUL=1 TORCHINDUCTOR_MAX_AUTOTUNE_GEMM=1 TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACE=DEFAULT buck2 run mode/{opt,inplace} pytorch/t ritonbench:run -- --op fp8_gemm --only torch_fp8_gemm,pt2_fp8_gemm --metrics tflops,accuracy --input-loader=/home/jananisriram/personal/exhaustive_autotune_rowwise_persistent_tma/json_fi les/rowwise_ptma_0.json --output="/home/jananisriram/personal/exhaustive_autotune_rowwise_persistent_tma/autotune/gpu0_bench.csv" --atol=1e-2 --rtol=0.5 2>&1 | tee ~/personal/exhaustive_ autotune_rowwise_persistent_tma/autotune/gpu0.log ``` Rollback Plan: Differential Revision: D82174075 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162678 Approved by: https://github.com/coconutruben --- torch/_inductor/template_heuristics/triton.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/torch/_inductor/template_heuristics/triton.py b/torch/_inductor/template_heuristics/triton.py index 6a77276b956c4..7fb1541d172ae 100644 --- a/torch/_inductor/template_heuristics/triton.py +++ b/torch/_inductor/template_heuristics/triton.py @@ -1798,6 +1798,15 @@ class ScaledTMAConfigMixin(TMAWorkspaceMixin, BaseScaledMMConfigMixin): This inherits from BaseScaledMMConfigMixin and adds TMA-specific options. """ + def _filter_configs(self, configs: list[BaseConfig]) -> list[BaseConfig]: + """ + TMA specific filtering: + - num_warps=2 not safe for TMA + - block_k >= 32 required for TMA (requires inner-most dimension >= 32) + """ + configs = [c for c in configs if c.num_warps != 2 and c.block_k >= 32] + return super()._filter_configs(configs) + def _get_template_configs_impl( self, kernel_inputs: KernelInputs, @@ -1901,11 +1910,10 @@ def __init__(self) -> None: super().__init__() # Override mm_configs to use scaled_mm_configs self.mm_configs = self.scaled_mm_configs - # NOTE: overriding exhaustive configs here to be the same as mm_configs - # as we haven't validated exhaustive support here yet - # TODO(coconutruben): remove this once we have validated exhaustive support - # for scaled_mm - self.exhaustive_configs = self.scaled_mm_configs + + def _filter_configs(self, configs: list[BaseConfig]) -> list[BaseConfig]: + configs = [c for c in configs if c.block_k >= 32] + return super()._filter_configs(configs) @register_template_heuristic( @@ -1920,11 +1928,6 @@ def __init__(self) -> None: super().__init__() # Override mm_configs to use scaled_persistent_mm_configs for TMA self.mm_configs = self.scaled_persistent_mm_configs - # NOTE: overriding exhaustive configs here to be the same as mm_configs - # as we haven't validated exhaustive support here yet - # TODO(coconutruben): remove this once we have validated exhaustive support - # for scaled_mm - self.exhaustive_configs = self.scaled_persistent_mm_configs @register_template_heuristic( From c140bf217f5ca5071ab9dbc1bcf9d4006242f44a Mon Sep 17 00:00:00 2001 From: thenumberouscode Date: Fri, 12 Sep 2025 03:16:23 +0000 Subject: [PATCH 141/693] [indexing] Prevent integer overflow from large step values in C++ (#161707) Fixes https://github.com/pytorch/pytorch/issues/160868 hmmm, I found an existing fix PR after I've finished this one. For reference, the old PR was https://github.com/pytorch/pytorch/pull/147433/files. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161707 Approved by: https://github.com/leslie-fang-intel, https://github.com/CaoE, https://github.com/mlazos --- aten/src/ATen/native/TensorShape.cpp | 2 +- test/inductor/test_torchinductor.py | 25 +++++++++++++++++++++++++ torch/_decomp/decompositions.py | 3 ++- 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp index c2d0856c3cd4c..aea88dd1cc94b 100644 --- a/aten/src/ATen/native/TensorShape.cpp +++ b/aten/src/ATen/native/TensorShape.cpp @@ -3063,7 +3063,7 @@ Tensor slice( } auto storage_offset = self.storage_offset() + start_val * strides[dim]; auto len = end_val - start_val; - sizes[dim] = (len + step - 1) / step; // round-up + sizes[dim] = (len / step) + (len % step != 0); // safely round-up strides[dim] *= step; Tensor result; diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py index 6a6e3c674179b..ac5b538189b38 100644 --- a/test/inductor/test_torchinductor.py +++ b/test/inductor/test_torchinductor.py @@ -4309,6 +4309,31 @@ def fn(x, y): self.assertEqual(torch.compile(fn)(x1, y), fn(x1, y)) self.assertEqual(torch.compile(fn)(x2, y), fn(x2, y)) + def test_slice_copy(self): + class Model(nn.Module): + def __init__(self, start=449, step=(2**63 - 1)): + super().__init__() + self.start = start + self.step = step + + def forward(self, x: torch.Tensor): + sliced = torch.slice_copy( + x, dim=0, start=self.start, end=None, step=self.step + ) + return torch.reciprocal(sliced) + + with config.patch({"implicit_fallbacks": True}): + # bad case + self.common( + Model(), + (torch.randn(875),), + ) + # normal case + self.common( + Model(step=10), + (torch.randn(875),), + ) + def test_slice1(self): def fn(a): return ( diff --git a/torch/_decomp/decompositions.py b/torch/_decomp/decompositions.py index ba09c6173c5f3..1918373b342e3 100644 --- a/torch/_decomp/decompositions.py +++ b/torch/_decomp/decompositions.py @@ -759,7 +759,8 @@ def slice_forward( storage_offset = self.storage_offset() + start_val * strides[dim] len = end_val - start_val - sizes[dim] = (len + step - 1) // step + # safely round-up for corresponding c++ impl + sizes[dim] = (len // step) + (1 if len % step != 0 else 0) strides[dim] *= step if self.is_quantized: From 222ec8d28ecd46a2c3ecfc660937ec640275c167 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Fri, 12 Sep 2025 03:39:13 +0000 Subject: [PATCH 142/693] Revert "AMD CPU CI - Add freezing + fix label trigger (#162176)" This reverts commit 9cac1b92595ec7836101d51dbe1415081042c7a0. Reverted https://github.com/pytorch/pytorch/pull/162176 on behalf of https://github.com/huydhn due to Sorry for reverting this but hardcoding the input online 122 does not make sense ([comment](https://github.com/pytorch/pytorch/pull/162176#issuecomment-3283532452)) --- .../workflows/inductor-perf-test-nightly-x86-zen.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml index d6a1c95ad4007..170de752ab875 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml @@ -43,11 +43,6 @@ on: required: false type: boolean default: false - freezing: - description: Run freezing? - required: false - type: boolean - default: true benchmark_configs: description: The list of configs used the benchmark required: false @@ -107,7 +102,7 @@ jobs: if: github.event.schedule == '0 7 * * *' with: build-environment: linux-jammy-py3.9-gcc11-build - dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true + dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} timeout-minutes: 720 @@ -121,9 +116,10 @@ jobs: name: inductor-test uses: ./.github/workflows/_linux-test.yml needs: inductor-build + if: github.event_name == 'workflow_dispatch' with: build-environment: linux-jammy-py3.9-gcc11-build - dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true + dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }} docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} timeout-minutes: 720 From 3cd734584d4a5c7c03ab6e58abe1469a06b790a1 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 12 Sep 2025 03:41:39 +0000 Subject: [PATCH 143/693] bring back the old vllm's use_existing_torch.py (#162747) vllm's pr will override our dependencies for torch. quick fix to add the use_existing_torch.py. syncing with vllm now regarding the uv approach they have Pull Request resolved: https://github.com/pytorch/pytorch/pull/162747 Approved by: https://github.com/huydhn --- .ci/lumen_cli/cli/lib/core/vllm/vllm_build.py | 11 +++++++++++ .ci/lumen_cli/cli/lib/core/vllm/vllm_test.py | 13 ++++++++++++- .github/ci_commit_pins/vllm.txt | 2 +- .github/ci_configs/vllm/use_existing_torch.py | 17 +++++++++++++++++ 4 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 .github/ci_configs/vllm/use_existing_torch.py diff --git a/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py b/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py index 8db48065cb052..415e05d07551b 100644 --- a/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py +++ b/.ci/lumen_cli/cli/lib/core/vllm/vllm_build.py @@ -66,6 +66,11 @@ class VllmBuildParameters: "DOCKERFILE_PATH", ".github/ci_configs/vllm/Dockerfile.tmp_vllm" ) + # the cleaning script to remove torch dependencies from pip + cleaning_script: Path = env_path_field( + "cleaning_script", ".github/ci_configs/vllm/use_existing_torch.py" + ) + # OUTPUT_DIR: where docker buildx (local exporter) will write artifacts output_dir: Path = env_path_field("OUTPUT_DIR", "external/vllm") @@ -160,6 +165,7 @@ def run(self): logger.info("Running vllm build with inputs: %s", inputs) vllm_commit = clone_vllm() + self.cp_torch_cleaning_script(inputs) self.cp_dockerfile_if_exist(inputs) # cp torch wheels from root direct to vllm workspace if exist self.cp_torch_whls_if_exist(inputs) @@ -205,6 +211,11 @@ def cp_torch_whls_if_exist(self, inputs: VllmBuildParameters) -> str: copy(inputs.torch_whls_path, tmp_dir) return tmp_dir + def cp_torch_cleaning_script(self, inputs: VllmBuildParameters): + script = get_path(inputs.cleaning_script, resolve=True) + vllm_script = Path(f"./{self.work_directory}/use_existing_torch.py") + copy(script, vllm_script) + def cp_dockerfile_if_exist(self, inputs: VllmBuildParameters): if not inputs.use_local_dockerfile: logger.info("using vllm default dockerfile.torch_nightly for build") diff --git a/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py b/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py index 76401e33f29fd..224f078788702 100644 --- a/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py +++ b/.ci/lumen_cli/cli/lib/core/vllm/vllm_test.py @@ -11,7 +11,7 @@ from cli.lib.common.cli_helper import BaseRunner from cli.lib.common.envs_helper import env_path_field, env_str_field, get_env -from cli.lib.common.path_helper import copy, remove_dir +from cli.lib.common.path_helper import copy, get_path, remove_dir from cli.lib.common.pip_helper import ( pip_install_first_match, pip_install_packages, @@ -43,6 +43,10 @@ class VllmTestParameters: torch_cuda_arch_list: str = env_str_field("TORCH_CUDA_ARCH_LIST", "8.9") + cleaning_script: Path = env_path_field( + "cleaning_script", ".github/ci_configs/vllm/use_existing_torch.py" + ) + def __post_init__(self): if not self.torch_whls_path.exists(): raise ValueError("missing torch_whls_path") @@ -92,11 +96,13 @@ def prepare(self): self._set_envs(params) clone_vllm(dst=self.work_directory) + self.cp_torch_cleaning_script(params) with working_directory(self.work_directory): remove_dir(Path("vllm")) self._install_wheels(params) self._install_dependencies() # verify the torches are not overridden by test dependencies + check_versions() def run(self): @@ -125,6 +131,11 @@ def run(self): # double check the torches are not overridden by other packages check_versions() + def cp_torch_cleaning_script(self, params: VllmTestParameters): + script = get_path(params.cleaning_script, resolve=True) + vllm_script = Path(f"./{self.work_directory}/use_existing_torch.py") + copy(script, vllm_script) + def _install_wheels(self, params: VllmTestParameters): logger.info("Running vllm test with inputs: %s", params) if not pkg_exists("torch"): diff --git a/.github/ci_commit_pins/vllm.txt b/.github/ci_commit_pins/vllm.txt index e4ac57f1eb501..450e7ae4d2adb 100644 --- a/.github/ci_commit_pins/vllm.txt +++ b/.github/ci_commit_pins/vllm.txt @@ -1 +1 @@ -cc99baf14dacc2497d0c5ed84e076ef2c37f6a4d +f510715882304796a96e33028b4f6de1b026c2c7 diff --git a/.github/ci_configs/vllm/use_existing_torch.py b/.github/ci_configs/vllm/use_existing_torch.py new file mode 100644 index 0000000000000..f55db97850d9c --- /dev/null +++ b/.github/ci_configs/vllm/use_existing_torch.py @@ -0,0 +1,17 @@ +import glob + + +requires_files = glob.glob("requirements/*.txt") +requires_files += ["pyproject.toml"] +for file in requires_files: + print(f">>> cleaning {file}") + with open(file) as f: + lines = f.readlines() + if "torch" in "".join(lines).lower(): + print("removed:") + with open(file, "w") as f: + for line in lines: + if "torch" not in line.lower(): + f.write(line) + print(f"<<< done cleaning {file}") + print() From e8eeb060348f250975124abb957b1d7d9c4af9a0 Mon Sep 17 00:00:00 2001 From: atalman Date: Fri, 12 Sep 2025 03:43:03 +0000 Subject: [PATCH 144/693] Move inductor jobs 3.9->3.10 (#162323) Related to: https://github.com/pytorch/pytorch/issues/161167 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162323 Approved by: https://github.com/huydhn, https://github.com/Skylion007 Co-authored-by: Huy Do --- .ci/docker/build.sh | 3 +-- .ci/pytorch/test.sh | 5 ----- .github/workflows/inductor-nightly.yml | 4 ++-- .github/workflows/inductor-perf-test-nightly-x86-zen.yml | 6 +++--- .github/workflows/inductor-perf-test-nightly-x86.yml | 6 +++--- .github/workflows/inductor-periodic.yml | 4 ++-- .github/workflows/inductor-unittest.yml | 4 ++-- .github/workflows/inductor.yml | 4 ++-- .github/workflows/operator_benchmark.yml | 6 +++--- .github/workflows/trunk.yml | 4 ++-- .../cpu_inductor_amp_freezing_torchbench_inference.csv | 4 ++-- .../cpu_inductor_freezing_torchbench_inference.csv | 4 ++-- .../cpu_inductor_torchbench_inference.csv | 4 ++-- .../dynamic_cpu_inductor_torchbench_inference.csv | 4 ++-- ..._autotune_inductor_amp_freezing_torchbench_inference.csv | 4 ++-- .../rocm/aot_eager_torchbench_inference.csv | 4 ++-- .../rocm/dynamic_aot_eager_torchbench_inference.csv | 4 ++-- .../rocm/dynamic_inductor_torchbench_inference.csv | 4 ++-- .../rocm/dynamo_eager_torchbench_inference.csv | 4 ++-- .../rocm/inductor_torchbench_inference.csv | 4 ++-- 20 files changed, 40 insertions(+), 46 deletions(-) diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 89967cef96b12..be85fdcb542d0 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -214,8 +214,7 @@ case "$tag" in TRITON=yes ;; pytorch-linux-jammy-py3-gcc11-inductor-benchmarks) - # TODO (huydhn): Upgrade this to Python >= 3.10 - ANACONDA_PYTHON_VERSION=3.9 + ANACONDA_PYTHON_VERSION=3.10 GCC_VERSION=11 VISION=yes KATEX=yes diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh index e0d47259676b7..fa6d0aeeb2a4e 100755 --- a/.ci/pytorch/test.sh +++ b/.ci/pytorch/test.sh @@ -1721,11 +1721,6 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then elif [[ "${TEST_CONFIG}" == *inductor* ]]; then install_torchvision test_inductor_shard "${SHARD_NUMBER}" - if [[ "${SHARD_NUMBER}" == 1 ]]; then - if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.9-gcc11-build ]]; then - test_inductor_distributed - fi - fi elif [[ "${TEST_CONFIG}" == *einops* ]]; then test_einops elif [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then diff --git a/.github/workflows/inductor-nightly.yml b/.github/workflows/inductor-nightly.yml index fe0f102406b6a..78602e05586b7 100644 --- a/.github/workflows/inductor-nightly.yml +++ b/.github/workflows/inductor-nightly.yml @@ -37,7 +37,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" test-matrix: | @@ -56,7 +56,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: nightly-dynamo-benchmarks-build with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image: ${{ needs.nightly-dynamo-benchmarks-build.outputs.docker-image }} test-matrix: ${{ needs.nightly-dynamo-benchmarks-build.outputs.test-matrix }} timeout-minutes: 720 diff --git a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml index 170de752ab875..a9a839df61af2 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml @@ -75,7 +75,7 @@ jobs: needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -101,7 +101,7 @@ jobs: needs: inductor-build if: github.event.schedule == '0 7 * * *' with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} @@ -118,7 +118,7 @@ jobs: needs: inductor-build if: github.event_name == 'workflow_dispatch' with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }} docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} diff --git a/.github/workflows/inductor-perf-test-nightly-x86.yml b/.github/workflows/inductor-perf-test-nightly-x86.yml index f894b8fdc6e03..0533184df2e0e 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86.yml @@ -80,7 +80,7 @@ jobs: needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -107,7 +107,7 @@ jobs: needs: inductor-build if: github.event.schedule == '0 7 * * *' with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} @@ -124,7 +124,7 @@ jobs: needs: inductor-build if: github.event_name == 'workflow_dispatch' with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }}-freezing-${{ inputs.freezing }} docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml index 21d965eaeaada..e2395087326a2 100644 --- a/.github/workflows/inductor-periodic.yml +++ b/.github/workflows/inductor-periodic.yml @@ -154,7 +154,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" test-matrix: | @@ -200,7 +200,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: periodic-dynamo-benchmarks-cpu-build with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.periodic-dynamo-benchmarks-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/inductor-unittest.yml b/.github/workflows/inductor-unittest.yml index 2125a8559363b..6ab276a57fc4d 100644 --- a/.github/workflows/inductor-unittest.yml +++ b/.github/workflows/inductor-unittest.yml @@ -110,7 +110,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" test-matrix: | @@ -127,7 +127,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: inductor-cpu-build with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/inductor.yml b/.github/workflows/inductor.yml index 4189d24a7b14f..2616141c0dc2a 100644 --- a/.github/workflows/inductor.yml +++ b/.github/workflows/inductor.yml @@ -79,7 +79,7 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" test-matrix: | @@ -101,7 +101,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: inductor-cpu-build with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image: ${{ needs.inductor-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/operator_benchmark.yml b/.github/workflows/operator_benchmark.yml index aaf32c160f0dc..dd262d31b8fc2 100644 --- a/.github/workflows/operator_benchmark.yml +++ b/.github/workflows/operator_benchmark.yml @@ -29,7 +29,7 @@ jobs: name: opbenchmark-build uses: ./.github/workflows/_linux-build.yml with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -42,7 +42,7 @@ jobs: name: opbenchmark-on-demand-build uses: ./.github/workflows/_linux-build.yml with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -55,7 +55,7 @@ jobs: uses: ./.github/workflows/_linux-test.yml needs: opbenchmark-build with: - build-environment: linux-jammy-py3.9-gcc11-build + build-environment: linux-jammy-py3.10-gcc11-build docker-image: ${{ needs.opbenchmark-build.outputs.docker-image }} test-matrix: ${{ needs.opbenchmark-build.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 4dd465d70803d..5b1a12812003f 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -240,7 +240,7 @@ jobs: needs: get-label-type with: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-jammy-py3.9-gcc11 + build-environment: linux-jammy-py3.10-gcc11 docker-image-name: ci-image:pytorch-linux-jammy-py3-gcc11-inductor-benchmarks test-matrix: | { include: [ @@ -255,7 +255,7 @@ jobs: - verify-cachebench-cpu-build - target-determination with: - build-environment: linux-jammy-py3.9-gcc11 + build-environment: linux-jammy-py3.10-gcc11 docker-image: ${{ needs.verify-cachebench-cpu-build.outputs.docker-image }} test-matrix: ${{ needs.verify-cachebench-cpu-build.outputs.test-matrix }} secrets: inherit diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv index e68aa2fa5351f..a4dbaeb7b546d 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,5 +doctr_det_predictor,pass,3 -doctr_reco_predictor,pass,4 +doctr_reco_predictor,pass,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv index aec659fdcd654..885029ba8c56e 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,5 +doctr_det_predictor,pass,3 -doctr_reco_predictor,pass,4 +doctr_reco_predictor,pass,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv index 4f2eec1493520..aa7a3161afcc6 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,5 +doctr_det_predictor,pass,3 -doctr_reco_predictor,pass,4 +doctr_reco_predictor,pass,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv index c8db4d5823203..f26dea6f692ef 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv @@ -82,11 +82,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,5 +doctr_det_predictor,pass,3 -doctr_reco_predictor,pass,4 +doctr_reco_predictor,pass,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv index f4c9ffddd9974..39149853947c3 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_max_autotune_inductor_amp_freezing_torchbench_inference.csv @@ -98,11 +98,11 @@ dlrm,pass,0 -doctr_det_predictor,pass,5 +doctr_det_predictor,pass,3 -doctr_reco_predictor,pass,4 +doctr_reco_predictor,pass,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv index 6f316b219bb92..bf70642a855ef 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,5 +doctr_det_predictor,eager_fail_to_run,3 -doctr_reco_predictor,eager_fail_to_run,4 +doctr_reco_predictor,eager_fail_to_run,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv index 4b5138ce9c367..e019365ccbfdb 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,5 +doctr_det_predictor,eager_fail_to_run,3 -doctr_reco_predictor,eager_fail_to_run,4 +doctr_reco_predictor,eager_fail_to_run,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv index a3fc7cf192371..fed8ebded682c 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,5 +doctr_det_predictor,eager_fail_to_run,3 -doctr_reco_predictor,eager_fail_to_run,4 +doctr_reco_predictor,eager_fail_to_run,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv index 6f316b219bb92..bf70642a855ef 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,5 +doctr_det_predictor,eager_fail_to_run,3 -doctr_reco_predictor,eager_fail_to_run,4 +doctr_reco_predictor,eager_fail_to_run,1 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv index 8ccf95da9659e..014e23e41cb31 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_torchbench_inference.csv @@ -106,11 +106,11 @@ dlrm,pass,0 -doctr_det_predictor,eager_fail_to_run,5 +doctr_det_predictor,eager_fail_to_run,3 -doctr_reco_predictor,eager_fail_to_run,4 +doctr_reco_predictor,eager_fail_to_run,1 From 872ed60679b675d90110ee5c23509107c1825b51 Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Fri, 12 Sep 2025 03:51:49 +0000 Subject: [PATCH 145/693] [mxfp8 torch._scaled_grouped_mm] fix meta registration for 3d tensor (#162765) Meta registration checks for torch._scaled_grouped_mm has a bug for 3d "B" tensors. Namely, the scale shape for such a tensor should be 2d with shape (G, blocked_K * blocked_N), but it currently enforces an expected 3d shape of (G, blocked_K, blocked_N). See Blas.cpp for correct validation logic [here](https://github.com/pytorch/pytorch/blob/8e217a9f6dc81e3d12697b04c3e611d82d9d866a/aten/src/ATen/native/cuda/Blas.cpp#L1622). Pull Request resolved: https://github.com/pytorch/pytorch/pull/162765 Approved by: https://github.com/ngimel --- torch/_meta_registrations.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/torch/_meta_registrations.py b/torch/_meta_registrations.py index 8b8c2c16b9c77..9f2c9d1c44d66 100644 --- a/torch/_meta_registrations.py +++ b/torch/_meta_registrations.py @@ -7547,18 +7547,18 @@ def check_scale(scale_name, scale, mat, scaled_dim, scale_multiplier=1): # scale sizes at compile time. if is_mxfp8: torch._check( - mat.ndim == scale.ndim, - lambda: f"For MXFP8, scale should have same number of dimensions as target tensor, but {scale_name} has mat.ndim={mat.ndim} and scale.ndim={scale.ndim}", # noqa: B950 + scale.ndim == mat.ndim - 1, + lambda: f"For MXFP8, 3d tensor should have 2d scales, but {scale_name} has mat.ndim={mat.ndim} and scale.ndim={scale.ndim}", # noqa: B950 ) # TODO: This logic only holds for RHS tensor in 2d-3d case. # We'll need to update it to handle LHS 3d tensor in 3d-2d and 3d-3d cases. - G, K, N = scale.shape + G, K, N = mat.shape block_size = 32 blocked_K = round_up(K / block_size, 4) blocked_N = round_up(N, 128) torch._check( - mat.shape[-2] == blocked_K and mat.shape[-1] == blocked_N, - lambda: f"For MXFP8, expected mat.shape={mat.shape} to have scale shape of ({G},{blocked_K},{blocked_N}), but got {scale.shape}", # noqa: B950 + scale.shape[0] == G and scale.shape[1] == blocked_K * blocked_N, + lambda: f"For MXFP8, expected mat.shape={mat.shape} to have scale shape of ({G},{blocked_K * blocked_N}), but got {scale.shape}", # noqa: B950 ) else: torch._check( From 31345fb4f7a766a12976a4e396fe31a0498e319f Mon Sep 17 00:00:00 2001 From: Klaus Zimmermann Date: Thu, 11 Sep 2025 14:27:34 +0200 Subject: [PATCH 146/693] Make functorch notebook symlinks PEP 517 valid (#157813) Pull Request resolved: https://github.com/pytorch/pytorch/pull/157813 Approved by: https://github.com/zou3519, https://github.com/atalman --- .flake8 | 2 +- .lintrunner.toml | 3 +-- functorch/COMPILE_README.md | 2 +- functorch/docs/source/conf.py | 4 ++-- functorch/docs/source/index.rst | 14 +++++++------- functorch/docs/source/notebooks | 1 - .../source/tutorials}/_src/plot_ensembling.py | 0 .../tutorials}/_src/plot_jacobians_and_hessians.py | 0 .../tutorials}/_src/plot_per_sample_gradients.py | 0 .../tutorials}/aot_autograd_optimizations.ipynb | 2 +- .../source/tutorials}/ensembling.ipynb | 2 +- .../source/tutorials}/jacobians_hessians.ipynb | 2 +- .../source/tutorials}/minifier.ipynb | 0 .../source/tutorials}/neural_tangent_kernels.ipynb | 2 +- .../source/tutorials}/per_sample_grads.ipynb | 2 +- .../source/tutorials}/whirlwind_tour.ipynb | 2 +- pyproject.toml | 2 +- 17 files changed, 19 insertions(+), 21 deletions(-) delete mode 120000 functorch/docs/source/notebooks rename functorch/{notebooks => docs/source/tutorials}/_src/plot_ensembling.py (100%) rename functorch/{notebooks => docs/source/tutorials}/_src/plot_jacobians_and_hessians.py (100%) rename functorch/{notebooks => docs/source/tutorials}/_src/plot_per_sample_gradients.py (100%) rename functorch/{notebooks => docs/source/tutorials}/aot_autograd_optimizations.ipynb (99%) rename functorch/{notebooks => docs/source/tutorials}/ensembling.ipynb (99%) rename functorch/{notebooks => docs/source/tutorials}/jacobians_hessians.ipynb (99%) rename functorch/{notebooks => docs/source/tutorials}/minifier.ipynb (100%) rename functorch/{notebooks => docs/source/tutorials}/neural_tangent_kernels.ipynb (99%) rename functorch/{notebooks => docs/source/tutorials}/per_sample_grads.ipynb (99%) rename functorch/{notebooks => docs/source/tutorials}/whirlwind_tour.ipynb (99%) diff --git a/.flake8 b/.flake8 index fc9ab167fbeef..fa73b7b880fd3 100644 --- a/.flake8 +++ b/.flake8 @@ -73,7 +73,7 @@ exclude = ./docs/src, ./functorch/docs, ./functorch/examples, - ./functorch/notebooks, + ./functorch/docs/source/tutorials, ./scripts, ./test/generated_type_hints_smoketest.py, ./third_party, diff --git a/.lintrunner.toml b/.lintrunner.toml index 944829fa38977..1f79f1eb971de 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -13,7 +13,7 @@ exclude_patterns = [ '**/fb/**', 'functorch/docs/**', 'functorch/examples/**', - 'functorch/notebooks/**', + 'functorch/docs/source/tutorials/**', 'torch/_inductor/fx_passes/serialized_patterns/**', 'torch/_inductor/autoheuristic/artifacts/**', 'scripts/**', @@ -1568,7 +1568,6 @@ include_patterns = [ exclude_patterns = [ 'caffe2/**', 'functorch/docs/**', - 'functorch/notebooks/**', 'torch/_inductor/fx_passes/serialized_patterns/**', 'torch/_inductor/autoheuristic/artifacts/**', 'test/dynamo/cpython/**', diff --git a/functorch/COMPILE_README.md b/functorch/COMPILE_README.md index 964cda6fbec0e..47ca484e7c07e 100644 --- a/functorch/COMPILE_README.md +++ b/functorch/COMPILE_README.md @@ -72,4 +72,4 @@ aot_function(f, ts_compiler, ts_compiler)(torch.randn(3, requires_grad=True)) * Min-cut [recomputation](https://dev-discuss.pytorch.org/t/min-cut-optimal-recomputation-i-e-activation-checkpointing-with-aotautograd/467) with AOT Autograd. ## Tutorials -You can use this [tutorial](https://pytorch.org/functorch/nightly/notebooks/aot_autograd_optimizations.html) to play with AOT Autograd. +You can use this [tutorial](https://pytorch.org/functorch/nightly/tutorials/aot_autograd_optimizations.html) to play with AOT Autograd. diff --git a/functorch/docs/source/conf.py b/functorch/docs/source/conf.py index 4a47b74f81f37..749a8435c03bc 100644 --- a/functorch/docs/source/conf.py +++ b/functorch/docs/source/conf.py @@ -50,7 +50,7 @@ "myst_nb", ] -# sys.path.insert(0, os.path.abspath('./notebooks')) +# sys.path.insert(0, os.path.abspath('./tutorials')) # build the templated autosummary files # autosummary_generate = True @@ -131,7 +131,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ["notebooks/colab**", "notebooks/_src/**"] +exclude_patterns = ["tutorials/colab**", "tutorials/_src/**"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" diff --git a/functorch/docs/source/index.rst b/functorch/docs/source/index.rst index 5f988524fb0e3..894fb9995afe5 100644 --- a/functorch/docs/source/index.rst +++ b/functorch/docs/source/index.rst @@ -55,7 +55,7 @@ Check out our `whirlwind tour `_ or some of our tutorials mentio :caption: functorch: Getting Started install - notebooks/whirlwind_tour.ipynb + tutorials/whirlwind_tour.ipynb ux_limitations .. toctree:: @@ -70,9 +70,9 @@ Check out our `whirlwind tour `_ or some of our tutorials mentio :maxdepth: 1 :caption: functorch Tutorials - notebooks/jacobians_hessians.ipynb - notebooks/ensembling.ipynb - notebooks/per_sample_grads.ipynb - notebooks/neural_tangent_kernels.ipynb - notebooks/aot_autograd_optimizations.ipynb - notebooks/minifier.ipynb + tutorials/jacobians_hessians.ipynb + tutorials/ensembling.ipynb + tutorials/per_sample_grads.ipynb + tutorials/neural_tangent_kernels.ipynb + tutorials/aot_autograd_optimizations.ipynb + tutorials/minifier.ipynb diff --git a/functorch/docs/source/notebooks b/functorch/docs/source/notebooks deleted file mode 120000 index d4082256dcfe3..0000000000000 --- a/functorch/docs/source/notebooks +++ /dev/null @@ -1 +0,0 @@ -../../notebooks/ \ No newline at end of file diff --git a/functorch/notebooks/_src/plot_ensembling.py b/functorch/docs/source/tutorials/_src/plot_ensembling.py similarity index 100% rename from functorch/notebooks/_src/plot_ensembling.py rename to functorch/docs/source/tutorials/_src/plot_ensembling.py diff --git a/functorch/notebooks/_src/plot_jacobians_and_hessians.py b/functorch/docs/source/tutorials/_src/plot_jacobians_and_hessians.py similarity index 100% rename from functorch/notebooks/_src/plot_jacobians_and_hessians.py rename to functorch/docs/source/tutorials/_src/plot_jacobians_and_hessians.py diff --git a/functorch/notebooks/_src/plot_per_sample_gradients.py b/functorch/docs/source/tutorials/_src/plot_per_sample_gradients.py similarity index 100% rename from functorch/notebooks/_src/plot_per_sample_gradients.py rename to functorch/docs/source/tutorials/_src/plot_per_sample_gradients.py diff --git a/functorch/notebooks/aot_autograd_optimizations.ipynb b/functorch/docs/source/tutorials/aot_autograd_optimizations.ipynb similarity index 99% rename from functorch/notebooks/aot_autograd_optimizations.ipynb rename to functorch/docs/source/tutorials/aot_autograd_optimizations.ipynb index 9a2db0fa9d1c5..a09793259769b 100644 --- a/functorch/notebooks/aot_autograd_optimizations.ipynb +++ b/functorch/docs/source/tutorials/aot_autograd_optimizations.ipynb @@ -6,7 +6,7 @@ "source": [ "# AOT Autograd - How to use and optimize?\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", diff --git a/functorch/notebooks/ensembling.ipynb b/functorch/docs/source/tutorials/ensembling.ipynb similarity index 99% rename from functorch/notebooks/ensembling.ipynb rename to functorch/docs/source/tutorials/ensembling.ipynb index 1ecc8738b0b5f..e7e4857f1872d 100644 --- a/functorch/notebooks/ensembling.ipynb +++ b/functorch/docs/source/tutorials/ensembling.ipynb @@ -11,7 +11,7 @@ "\n", "This example illustrates how to vectorize model ensembling using vmap.\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", diff --git a/functorch/notebooks/jacobians_hessians.ipynb b/functorch/docs/source/tutorials/jacobians_hessians.ipynb similarity index 99% rename from functorch/notebooks/jacobians_hessians.ipynb rename to functorch/docs/source/tutorials/jacobians_hessians.ipynb index 4acf2ec609ff3..4e2f7d0908afc 100644 --- a/functorch/notebooks/jacobians_hessians.ipynb +++ b/functorch/docs/source/tutorials/jacobians_hessians.ipynb @@ -5,7 +5,7 @@ "source": [ "# Jacobians, Hessians, hvp, vhp, and more: composing functorch transforms\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", diff --git a/functorch/notebooks/minifier.ipynb b/functorch/docs/source/tutorials/minifier.ipynb similarity index 100% rename from functorch/notebooks/minifier.ipynb rename to functorch/docs/source/tutorials/minifier.ipynb diff --git a/functorch/notebooks/neural_tangent_kernels.ipynb b/functorch/docs/source/tutorials/neural_tangent_kernels.ipynb similarity index 99% rename from functorch/notebooks/neural_tangent_kernels.ipynb rename to functorch/docs/source/tutorials/neural_tangent_kernels.ipynb index 9d041be909268..0d4704cedf450 100644 --- a/functorch/notebooks/neural_tangent_kernels.ipynb +++ b/functorch/docs/source/tutorials/neural_tangent_kernels.ipynb @@ -7,7 +7,7 @@ "source": [ "# Neural Tangent Kernels\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", diff --git a/functorch/notebooks/per_sample_grads.ipynb b/functorch/docs/source/tutorials/per_sample_grads.ipynb similarity index 99% rename from functorch/notebooks/per_sample_grads.ipynb rename to functorch/docs/source/tutorials/per_sample_grads.ipynb index e2317351f7eb1..5d5fc7206e170 100644 --- a/functorch/notebooks/per_sample_grads.ipynb +++ b/functorch/docs/source/tutorials/per_sample_grads.ipynb @@ -9,7 +9,7 @@ "source": [ "# Per-sample-gradients\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", diff --git a/functorch/notebooks/whirlwind_tour.ipynb b/functorch/docs/source/tutorials/whirlwind_tour.ipynb similarity index 99% rename from functorch/notebooks/whirlwind_tour.ipynb rename to functorch/docs/source/tutorials/whirlwind_tour.ipynb index deae3418966ba..4e0236db40894 100644 --- a/functorch/notebooks/whirlwind_tour.ipynb +++ b/functorch/docs/source/tutorials/whirlwind_tour.ipynb @@ -7,7 +7,7 @@ "source": [ "# Whirlwind Tour\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", diff --git a/pyproject.toml b/pyproject.toml index afc5aba2ccd3c..874c629273d7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -278,7 +278,7 @@ keep-runtime-typing = true "PYI021", # docstring-in-stub "PYI053", # string-or-bytes-too-long ] -"functorch/notebooks/**" = [ +"functorch/docs/source/tutorials/**" = [ "F401", ] "test/export/**" = [ From 6e8f17c58029e5fa6bc222b2445ebbc0cbdc17c7 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Fri, 12 Sep 2025 03:56:18 +0000 Subject: [PATCH 147/693] [RELAND] Always build USE_DISTRIBUTED (#160449) and Make distributed modules importable even when backend not built (#159889) (#162594) Summary: Original: D81957844 and D81957923 Also, https://github.com/pytorch/pytorch/pull/162142 is patched in as well #buildall Test Plan: sandcastle and oss ci Rollback Plan: Reviewed By: H-Huang Pull Request resolved: https://github.com/pytorch/pytorch/pull/162594 Approved by: https://github.com/H-Huang, https://github.com/dcci --- .ci/pytorch/macos-build.sh | 7 +- .ci/pytorch/macos-test.sh | 4 + .ci/wheel/build_wheel.sh | 3 +- BUILD.bazel | 3 +- CMakeLists.txt | 12 +- buckbuild.bzl | 4 +- c10/ovrsource_defs.bzl | 4 +- caffe2/CMakeLists.txt | 144 +++++----- cmake/Dependencies.cmake | 2 +- cmake/Summary.cmake | 12 +- docs/source/conf.py | 7 - test/cpp/dist_autograd/CMakeLists.txt | 2 +- test/distributed/tensor/test_fake.py | 41 +++ test/export/test_export.py | 10 +- test/test_numa_binding.py | 5 +- tools/build_pytorch_libs.py | 3 +- torch/CMakeLists.txt | 50 ++-- torch/_C/_distributed_c10d.pyi | 9 + torch/csrc/Exceptions.h | 2 - torch/csrc/Module.cpp | 10 - torch/csrc/autograd/functions/init.cpp | 4 - torch/csrc/distributed/c10d/HashStore.cpp | 1 - torch/csrc/distributed/c10d/Work.cpp | 2 +- torch/csrc/distributed/c10d/init.cpp | 1 + torch/csrc/inductor/aoti_torch/shim_cpu.cpp | 4 - torch/csrc/jit/python/pybind_utils.h | 6 +- .../csrc/jit/python/python_sugared_value.cpp | 3 +- torch/csrc/jit/runtime/interpreter.h | 14 +- torch/csrc/jit/serialization/pickler.h | 2 - torch/csrc/jit/serialization/unpickler.h | 2 - .../standalone/execution_trace_observer.cpp | 9 - torch/csrc/profiler/util.cpp | 6 +- torch/csrc/profiler/util.h | 2 - torch/distributed/_C_stubs.py | 150 ++++++++++ torch/distributed/__init__.py | 258 +++++++++--------- torch/distributed/_dist2.py | 2 +- torch/distributed/_distributed_c10d.py | 245 +++++++++++++++++ torch/distributed/_functional_collectives.py | 12 +- .../_shard/sharded_tensor/reshard.py | 2 +- .../chunk_sharding_spec_ops/embedding_bag.py | 2 +- .../distributed/_symmetric_memory/__init__.py | 22 +- .../_symmetric_memory/_nvshmem_triton.py | 2 +- torch/distributed/_tools/fake_collectives.py | 4 +- .../algorithms/model_averaging/utils.py | 4 - torch/distributed/constants.py | 15 +- torch/distributed/device_mesh.py | 44 +-- torch/distributed/distributed_c10d.py | 70 +++-- torch/distributed/elastic/control_plane.py | 2 +- torch/distributed/nn/functional.py | 4 - torch/distributed/rpc/__init__.py | 2 +- torch/distributed/tensor/_collective_utils.py | 4 +- .../testing/_internal/distributed/fake_pg.py | 2 +- 52 files changed, 778 insertions(+), 458 deletions(-) create mode 100644 test/distributed/tensor/test_fake.py create mode 100644 torch/distributed/_C_stubs.py create mode 100644 torch/distributed/_distributed_c10d.py diff --git a/.ci/pytorch/macos-build.sh b/.ci/pytorch/macos-build.sh index d7447e7d48582..d41c3c08e6288 100755 --- a/.ci/pytorch/macos-build.sh +++ b/.ci/pytorch/macos-build.sh @@ -35,11 +35,10 @@ fi print_cmake_info if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then - # Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls - USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel + USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel else - # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests - # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448 + # NB: we always build with distributed; USE_DISTRIBUTED turns off all + # backends (specifically the gloo backend), so test that this case works too USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64 fi if which sccache > /dev/null; then diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh index a859901191e03..79d47da431712 100755 --- a/.ci/pytorch/macos-test.sh +++ b/.ci/pytorch/macos-test.sh @@ -13,9 +13,13 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available( fi popd +python -mpip install -r requirements.txt + # enable debug asserts in serialization export TORCH_SERIALIZATION_DEBUG=1 +python -mpip install --no-input -r requirements.txt + setup_test_python() { # The CircleCI worker hostname doesn't resolve to an address. # This environment variable makes ProcessGroupGloo default to diff --git a/.ci/wheel/build_wheel.sh b/.ci/wheel/build_wheel.sh index 2d5f4d30b4c82..98b50c0ceeafe 100755 --- a/.ci/wheel/build_wheel.sh +++ b/.ci/wheel/build_wheel.sh @@ -177,7 +177,8 @@ source ~/${desired_python}-build/bin/activate retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements.txt" retry brew install libomp -# For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule +# For USE_DISTRIBUTED=1 on macOS, this enables gloo, which needs libuv, which +# is build as part of tensorpipe submodule export USE_DISTRIBUTED=1 export USE_MKLDNN=OFF diff --git a/BUILD.bazel b/BUILD.bazel index d4202e7a2c1e4..635f39eed2cee 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -22,7 +22,6 @@ COMMON_COPTS = [ "-DHAVE_SHM_UNLINK=1", "-D_FILE_OFFSET_BITS=64", "-DUSE_FBGEMM", - "-DUSE_DISTRIBUTED", "-DAT_PER_OPERATOR_HEADERS", "-DATEN_THREADING=NATIVE", "-DNO_CUDNN_DESTROY_HANDLE", @@ -811,7 +810,7 @@ cc_library( name = "torch_python", srcs = libtorch_python_core_sources + if_cuda(libtorch_python_cuda_sources) - + if_cuda(libtorch_python_distributed_sources) + + libtorch_python_distributed_sources + GENERATED_AUTOGRAD_PYTHON, hdrs = glob([ "torch/csrc/generic/*.cpp", diff --git a/CMakeLists.txt b/CMakeLists.txt index efad5419aaffa..f3e4b28bcff98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,8 +181,9 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le)") set(CPU_POWER ON) endif() -# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not -# tested and likely won't work without additional changes. +# For non-supported platforms, turn USE_DISTRIBUTED off by default. +# NB: USE_DISTRIBUTED simply disables the backend; distributed code +# still gets built if(NOT LINUX AND NOT WIN32) set(USE_DISTRIBUTED OFF @@ -262,11 +263,11 @@ option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF) option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF) option(USE_NATIVE_ARCH "Use -march=native" OFF) cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF) -option(USE_DISTRIBUTED "Use distributed" ON) +option(USE_DISTRIBUTED "Enable default distributed backends" ON) cmake_dependent_option(USE_NCCL "Use NCCL" ON "USE_DISTRIBUTED;USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF) cmake_dependent_option(USE_XCCL "Use XCCL" ON - "USE_XPU;UNIX;NOT APPLE" OFF) + "USE_DISTRIBUTED;USE_XPU;UNIX;NOT APPLE" OFF) cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF) cmake_dependent_option(USE_RCCL "Use RCCL" ON "USE_NCCL;NOT WIN32" OFF) cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF) @@ -431,11 +432,10 @@ if(WIN32) PATH_SUFFIXES lib NO_DEFAULT_PATH) if(NOT libuv_tmp_LIBRARY) - set(USE_DISTRIBUTED OFF) set(USE_GLOO OFF) message( WARNING - "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. " + "Libuv is not installed in current conda env. Set USE_GLOO to OFF. " "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv." ) else() diff --git a/buckbuild.bzl b/buckbuild.bzl index e079d98395441..3e3af13f9118a 100644 --- a/buckbuild.bzl +++ b/buckbuild.bzl @@ -156,7 +156,7 @@ ROOT = "//" if IS_OSS else "//xplat/caffe2" # for targets in subfolders ROOT_PATH = "//" if IS_OSS else "//xplat/caffe2/" -C10 = "//c10:c10" if IS_OSS else "//xplat/caffe2/c10:c10" +C10 = "//c10:c10" if IS_OSS else ("//xplat/caffe2/c10:c10_ovrsource" if is_arvr_mode() else "//xplat/caffe2/c10:c10") # a dictionary maps third party library name to fbsource and oss target THIRD_PARTY_LIBS = { @@ -948,6 +948,7 @@ def define_buck_targets( [ ("torch/csrc/api/include", "torch/**/*.h"), ("", "torch/csrc/**/*.h"), + ("", "torch/csrc/**/*.hpp"), ("", "torch/nativert/**/*.h"), ("", "torch/headeronly/**/*.h"), ("", "torch/script.h"), @@ -2033,6 +2034,7 @@ def define_buck_targets( ("", "caffe2/utils/*.h"), ("", "caffe2/core/*.h"), ("", "torch/csrc/*.h"), + ("", "torch/csrc/*.hpp"), ("", "torch/csrc/api/include/torch/*.h"), ("", "torch/csrc/autograd/*.h"), ("", "torch/csrc/autograd/*/*.h"), diff --git a/c10/ovrsource_defs.bzl b/c10/ovrsource_defs.bzl index aafe5a4de8c42..532404f21bbaf 100644 --- a/c10/ovrsource_defs.bzl +++ b/c10/ovrsource_defs.bzl @@ -18,9 +18,9 @@ cuda_supported_platforms = [ def define_c10_ovrsource(name, is_mobile): if is_mobile: - pp_flags = ["-DC10_MOBILE=1"] + pp_flags = ["-DC10_MOBILE=1", "-DC10_USE_GLOG"] else: - pp_flags = [] + pp_flags = ["-DC10_USE_GLOG"] oxx_static_library( name = name, diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 99d4b2cd5aa93..b5d47bb4b5dff 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -540,11 +540,9 @@ if(NOT INTERN_BUILD_MOBILE AND NOT BUILD_LITE_INTERPRETER) ${TORCH_SRC_DIR}/csrc/utils/byte_order.cpp ) - if(USE_DISTRIBUTED) - append_filelist("libtorch_distributed_base_sources" TORCH_SRCS) - if(NOT WIN32) - append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS) - endif() + append_filelist("libtorch_distributed_base_sources" TORCH_SRCS) + if(NOT WIN32) + append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS) endif() endif() @@ -573,32 +571,30 @@ if(USE_CUDA) list(APPEND Caffe2_GPU_SRCS ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) endif() - if(USE_DISTRIBUTED) - append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS) - if(NOT WIN32) - append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS) - set_source_files_properties( - ${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupNCCL.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/utils.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CudaDMAConnectivity.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp - PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" - ) - endif() + append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS) + if(NOT WIN32) + append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS) + set_source_files_properties( + ${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupNCCL.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/utils.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CudaDMAConnectivity.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp + PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" + ) + endif() - set(ASYNC_MM_FILE "${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/AsyncMM.cu") - # Disable the warning to make cutlass warp-specialized cooperative kernel build for gcc-9 - if(CMAKE_COMPILER_IS_GNUCXX) - set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-Wno-unused-but-set-variable") - endif() - if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND CUDA_NVCC_FLAGS MATCHES ".*compute_90.*") - set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a") - endif() + set(ASYNC_MM_FILE "${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/AsyncMM.cu") + # Disable the warning to make cutlass warp-specialized cooperative kernel build for gcc-9 + if(CMAKE_COMPILER_IS_GNUCXX) + set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-Wno-unused-but-set-variable") + endif() + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND CUDA_NVCC_FLAGS MATCHES ".*compute_90.*") + set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a") endif() set_source_files_properties( ${TORCH_ROOT}/aten/src/ATen/cuda/detail/LazyNVRTC.cpp @@ -631,11 +627,9 @@ if(USE_ROCM) list(APPEND Caffe2_HIP_SRCS ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) endif() - if(USE_DISTRIBUTED) - append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS) - if(NOT WIN32) - append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS) - endif() + append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS) + if(NOT WIN32) + append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS) endif() # caffe2_nvrtc's stubs to driver APIs are useful for HIP. # See NOTE [ ATen NVRTC Stub and HIP ] @@ -1356,12 +1350,10 @@ if(BUILD_TEST) add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit) add_subdirectory(${TORCH_ROOT}/test/cpp/nativert ${CMAKE_BINARY_DIR}/test_nativert) add_subdirectory(${TORCH_ROOT}/test/inductor ${CMAKE_BINARY_DIR}/test_inductor) - if(USE_DISTRIBUTED) - add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d) - if(NOT WIN32) - add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd) - add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc) - endif() + add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d) + if(NOT WIN32) + add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd) + add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc) endif() if(NOT NO_API) add_subdirectory(${TORCH_ROOT}/test/cpp/api ${CMAKE_BINARY_DIR}/test_api) @@ -1466,47 +1458,41 @@ if(BUILD_LITE_INTERPRETER) endif() endif() - -# Pass USE_DISTRIBUTED to torch_cpu, as some codes in jit/pickler.cpp and -# jit/unpickler.cpp need to be compiled only when USE_DISTRIBUTED is set -if(USE_DISTRIBUTED) - target_compile_definitions(torch_cpu PUBLIC USE_DISTRIBUTED) - if(USE_GLOO AND USE_C10D_GLOO) - target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO) - endif() - if(USE_UCC AND USE_C10D_UCC) - target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC) - if(USE_CUDA) - target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC) - endif() - endif() - if(USE_NCCL AND USE_C10D_NCCL) - if(USE_ROCM) - target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL) - else() - target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL) - endif() - endif() - if(USE_MPI AND USE_C10D_MPI) - if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set_source_files_properties( - "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp" - PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) - endif() - target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI) - endif() - # Pass USE_RPC in order to reduce use of - # #if defined(USE_DISTRIBUTED) && !defined(_WIN32) - # need to be removed when RPC is supported - if(NOT WIN32) - target_compile_definitions(torch_cpu PUBLIC USE_RPC) +if(USE_GLOO AND USE_C10D_GLOO) + target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO) +endif() +if(USE_UCC AND USE_C10D_UCC) + target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC) + if(USE_CUDA) + target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC) endif() - # Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp - # can only be compiled with USE_TENSORPIPE is set. - if(USE_TENSORPIPE) - target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE) +endif() +if(USE_NCCL AND USE_C10D_NCCL) + if(USE_ROCM) + target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL) + else() + target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL) endif() endif() +if(USE_MPI AND USE_C10D_MPI) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set_source_files_properties( + "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp" + PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) + endif() + target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI) +endif() +# Pass USE_RPC in order to reduce use of +# #if defined(USE_DISTRIBUTED) && !defined(_WIN32) +# need to be removed when RPC is supported +if(NOT WIN32) + target_compile_definitions(torch_cpu PUBLIC USE_RPC) +endif() +# Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp +# can only be compiled with USE_TENSORPIPE is set. +if(USE_TENSORPIPE) + target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE) +endif() if(NOT INTERN_BUILD_MOBILE) if(${CAFFE2_LINK_LOCAL_PROTOBUF}) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 6ad56d3b9b44e..08ffdaf8cf451 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1134,7 +1134,7 @@ if(USE_CUDA AND CUDA_VERSION VERSION_LESS 13.0) include_directories(SYSTEM ${CUB_INCLUDE_DIRS}) endif() -if(USE_DISTRIBUTED AND USE_TENSORPIPE) +if(USE_TENSORPIPE) if(MSVC) message(WARNING "Tensorpipe cannot be used on Windows.") else() diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index ffd4b5298a890..fb64e99bccf22 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -192,13 +192,11 @@ function(caffe2_print_configuration_summary) message(STATUS " USE_PYTORCH_QNNPACK : ${USE_PYTORCH_QNNPACK}") message(STATUS " USE_XNNPACK : ${USE_XNNPACK}") message(STATUS " USE_DISTRIBUTED : ${USE_DISTRIBUTED}") - if(${USE_DISTRIBUTED}) - message(STATUS " USE_MPI : ${USE_MPI}") - message(STATUS " USE_GLOO : ${USE_GLOO}") - message(STATUS " USE_GLOO_WITH_OPENSSL : ${USE_GLOO_WITH_OPENSSL}") - message(STATUS " USE_GLOO_IBVERBS : ${USE_GLOO_IBVERBS}") - message(STATUS " USE_TENSORPIPE : ${USE_TENSORPIPE}") - endif() + message(STATUS " USE_MPI : ${USE_MPI}") + message(STATUS " USE_GLOO : ${USE_GLOO}") + message(STATUS " USE_GLOO_WITH_OPENSSL : ${USE_GLOO_WITH_OPENSSL}") + message(STATUS " USE_GLOO_IBVERBS : ${USE_GLOO_IBVERBS}") + message(STATUS " USE_TENSORPIPE : ${USE_TENSORPIPE}") if(NOT "${SELECTED_OP_LIST}" STREQUAL "") message(STATUS " SELECTED_OP_LIST : ${SELECTED_OP_LIST}") endif() diff --git a/docs/source/conf.py b/docs/source/conf.py index 44ad4de8115f6..d1504757f9c54 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -3333,13 +3333,6 @@ def coverage_post_process(app, exception): if not isinstance(app.builder, CoverageBuilder): return - if not torch.distributed.is_available(): - raise RuntimeError( - "The coverage tool cannot run with a version " - "of PyTorch that was built with USE_DISTRIBUTED=0 " - "as this module's API changes." - ) - # These are all the modules that have "automodule" in an rst file # These modules are the ones for which coverage is checked # Here, we make sure that no module is missing from that list diff --git a/test/cpp/dist_autograd/CMakeLists.txt b/test/cpp/dist_autograd/CMakeLists.txt index 14fd7f7ae9a2b..86a6c924288bb 100644 --- a/test/cpp/dist_autograd/CMakeLists.txt +++ b/test/cpp/dist_autograd/CMakeLists.txt @@ -1,4 +1,4 @@ -if(USE_DISTRIBUTED AND NOT WIN32) +if(NOT WIN32) set(DIST_AUTOGRAD_TEST_DIR "${TORCH_ROOT}/test/cpp/dist_autograd") set(DIST_AUTOGRAD_TEST_SOURCES ${TORCH_ROOT}/test/cpp/common/main.cpp diff --git a/test/distributed/tensor/test_fake.py b/test/distributed/tensor/test_fake.py new file mode 100644 index 0000000000000..099c6e87f5f18 --- /dev/null +++ b/test/distributed/tensor/test_fake.py @@ -0,0 +1,41 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates +# Owner(s): ["oncall: distributed"] + +import torch +from torch._subclasses.fake_tensor import FakeTensorMode +from torch.distributed.tensor import DTensor +from torch.distributed.tensor.placement_types import Shard +from torch.testing._internal.common_utils import run_tests, TestCase +from torch.testing._internal.distributed.fake_pg import FakeStore + + +class TestFakeDTensor(TestCase): + def test_fake_dtensor_operations(self): + # Use FakeTensorMode to handle CUDA tensors without actual CUDA + fake_mode = FakeTensorMode() + world_size = 4 + + fake_store = FakeStore() + torch.distributed.init_process_group( + "fake", store=fake_store, rank=0, world_size=world_size + ) + device_mesh = torch.distributed.device_mesh.init_device_mesh( + "cuda", + (2, world_size // 2), + ) + + # Create fake CUDA tensor using FakeTensorMode + with fake_mode: + x = torch.randn(1, 1, device="cuda") + x = DTensor.from_local(x, device_mesh, [Shard(0), Shard(1)]) + + # Test basic DTensor operations + self.assertIsInstance(x, DTensor) + + # Test sum operation + r = x.sum(1) + self.assertIsInstance(r, DTensor) + + +if __name__ == "__main__": + run_tests() diff --git a/test/export/test_export.py b/test/export/test_export.py index b4596eab95baf..2c466f162a893 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -60,10 +60,7 @@ from torch.fx.experimental.proxy_tensor import make_fx from torch.fx.experimental.symbolic_shapes import ShapeEnv from torch.testing import FileCheck -from torch.testing._internal.common_cuda import ( - PLATFORM_SUPPORTS_FLASH_ATTENTION, - xfailIfDistributedNotSupported, -) +from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_FLASH_ATTENTION from torch.testing._internal.common_utils import ( find_library_location, IS_FBCODE, @@ -15772,7 +15769,6 @@ def distributed_env(self, world_size): finally: torch.distributed.destroy_process_group() - @xfailIfDistributedNotSupported def test_distributed_all_reduce(self): class Foo(torch.nn.Module): def __init__(self): @@ -15790,7 +15786,6 @@ def forward(self, x): inp = (torch.randn(4, 4),) self.assertTrue(torch.allclose(ep.module()(*inp), m(*inp))) - @xfailIfDistributedNotSupported def test_distributed_all_gather(self): class Foo(torch.nn.Module): def forward(self, x): @@ -15806,7 +15801,6 @@ def forward(self, x): torch.allclose(a, b) for a, b in zip(ep.module()(*inp), m(*inp)) ) - @xfailIfDistributedNotSupported def test_distributed_all_gather_into_tensor(self): class Foo(torch.nn.Module): def forward(self, x): @@ -15820,7 +15814,6 @@ def forward(self, x): inp = (torch.randn(2),) self.assertTrue(torch.allclose(ep.module()(*inp), m(*inp))) - @xfailIfDistributedNotSupported @testing.expectedFailureCppRuntime def test_distributed_all_to_all_single(self): class Foo(torch.nn.Module): @@ -15838,7 +15831,6 @@ def forward(self, x): ) self.assertEqual(len(nodes), 1) - @xfailIfDistributedNotSupported @testing.expectedFailureCppRuntime def test_distributed_reduce_scatter_tensor(self): class Foo(torch.nn.Module): diff --git a/test/test_numa_binding.py b/test/test_numa_binding.py index 764156ff9b98a..d38032ba22603 100644 --- a/test/test_numa_binding.py +++ b/test/test_numa_binding.py @@ -7,7 +7,7 @@ from dataclasses import dataclass from multiprocessing.context import SpawnProcess from typing import Any, Optional -from unittest import skipUnless +from unittest import skipIf, skipUnless from unittest.mock import mock_open, patch import torch @@ -22,7 +22,7 @@ AffinityMode, NumaOptions, ) -from torch.testing._internal.common_utils import run_tests, TestCase +from torch.testing._internal.common_utils import IS_MACOS, run_tests, TestCase @dataclass(frozen=True) @@ -680,6 +680,7 @@ def test_core_complex_tiebreak_prefers_lower_cache_key(self) -> None: set(range(0, 2)), ) + @skipIf(IS_MACOS, "sched_getaffinity doesn't exist") def test_binds_to_node_0_if_node_stored_as_minus_one(self) -> None: self._add_mock_hardware( num_sockets=1, diff --git a/tools/build_pytorch_libs.py b/tools/build_pytorch_libs.py index 9d43de80f1298..457b224354fb2 100644 --- a/tools/build_pytorch_libs.py +++ b/tools/build_pytorch_libs.py @@ -88,8 +88,7 @@ def build_pytorch( ) -> None: my_env = _create_build_env() if ( - not check_negative_env_flag("USE_DISTRIBUTED") - and not check_negative_env_flag("USE_CUDA") + not check_negative_env_flag("USE_CUDA") and not check_negative_env_flag("USE_NCCL") and not check_env_flag("USE_SYSTEM_NCCL") ): diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index 866c40ad1c12e..adc9aad4a05c3 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -276,32 +276,30 @@ add_custom_command( WORKING_DIRECTORY "${TORCH_ROOT}" ) -if(USE_DISTRIBUTED) - if(WIN32) - append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS) - else() - append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS) - endif() - # Disable certain warnings for GCC-9.X - if(CMAKE_COMPILER_IS_GNUCXX) - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") - endif() - # NCCL is a private dependency of libtorch, but libtorch_python includes - # some private headers of libtorch, which in turn include NCCL. As a hacky - # alternative to making NCCL a public dependency of libtorch, we make it - # a private dependency of libtorch_python as well. - if(USE_NCCL) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl) - endif() - # Same for MPI. - if(USE_MPI) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX) - endif() - list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D) +if(WIN32) + append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS) +else() + append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS) endif() +# Disable certain warnings for GCC-9.X +if(CMAKE_COMPILER_IS_GNUCXX) + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") +endif() +# NCCL is a private dependency of libtorch, but libtorch_python includes +# some private headers of libtorch, which in turn include NCCL. As a hacky +# alternative to making NCCL a public dependency of libtorch, we make it +# a private dependency of libtorch_python as well. +if(USE_NCCL) + list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl) +endif() +# Same for MPI. +if(USE_MPI) + list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX) +endif() +list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D) if(USE_NCCL AND NOT WIN32) list(APPEND TORCH_PYTHON_SRCS @@ -369,10 +367,6 @@ if(BUILD_LIBTORCHLESS) target_compile_definitions(torch_python PRIVATE USE_C10D_NCCL) endif() - if(USE_DISTRIBUTED) - target_compile_definitions(torch_python PRIVATE USE_DISTRIBUTED) - endif() - if(USE_MPI AND USE_C10D_MPI) target_compile_definitions(torch_python PRIVATE USE_C10D_MPI) endif() diff --git a/torch/_C/_distributed_c10d.pyi b/torch/_C/_distributed_c10d.pyi index ad3d8e3abf245..79e437063b8cb 100644 --- a/torch/_C/_distributed_c10d.pyi +++ b/torch/_C/_distributed_c10d.pyi @@ -851,3 +851,12 @@ class ProcessGroupXCCL(Backend): def _set_process_group(pg: ProcessGroup) -> None: ... def _current_process_group() -> ProcessGroup: ... +def _dump_nccl_trace_json( + includeCollectives: Optional[bool] = ..., + onlyActive: Optional[bool] = ..., +) -> bytes: ... +def _dump_nccl_trace( + includeCollectives: Optional[bool] = ..., + includeStackTraces: Optional[bool] = ..., + onlyActive: Optional[bool] = ..., +) -> bytes: ... diff --git a/torch/csrc/Exceptions.h b/torch/csrc/Exceptions.h index 60a7bb644df01..d43d2b02a23ef 100644 --- a/torch/csrc/Exceptions.h +++ b/torch/csrc/Exceptions.h @@ -15,9 +15,7 @@ #include #include -#if defined(USE_DISTRIBUTED) #include -#endif inline void PyErr_SetString(PyObject* type, const std::string& message) { PyErr_SetString(type, message.c_str()); diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index 675a4c4310052..ac2b03d2651cc 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -120,14 +120,10 @@ #endif #endif -#ifdef USE_DISTRIBUTED -#ifdef USE_C10D #include #include #include #include -#endif -#endif #if defined(USE_VALGRIND) #include @@ -552,11 +548,7 @@ static PyObject* THPModule_getBackcompatKeepdimWarn( } static PyObject* THPModule_hasDistributed(PyObject* _unused, PyObject* noargs) { -#ifdef USE_DISTRIBUTED Py_RETURN_TRUE; -#else - Py_RETURN_FALSE; -#endif } static PyObject* THPModule_showConfig(PyObject* module, PyObject* noargs) { @@ -1993,7 +1985,6 @@ PyObject* initModule() { #ifdef USE_XPU THPUtils_addPyMethodDefs(methods, THXPModule_methods()); #endif -#if defined(USE_DISTRIBUTED) && defined(USE_C10D) THPUtils_addPyMethodDefs( methods, torch::distributed::c10d::python_functions()); #ifndef _WIN32 @@ -2003,7 +1994,6 @@ PyObject* initModule() { methods, torch::distributed::autograd::python_functions()); THPUtils_addPyMethodDefs( methods, torch::distributed::rpc::testing::python_functions()); -#endif #endif static struct PyModuleDef torchmodule = { diff --git a/torch/csrc/autograd/functions/init.cpp b/torch/csrc/autograd/functions/init.cpp index 5e19010f9ae3c..05c8901e1f60d 100644 --- a/torch/csrc/autograd/functions/init.cpp +++ b/torch/csrc/autograd/functions/init.cpp @@ -8,9 +8,7 @@ #include #include #include -#ifdef USE_DISTRIBUTED #include -#endif #include #include #include @@ -150,11 +148,9 @@ void THPAutograd_initFunctions() { static PyTypeObject CopyBackwardsClass; addClass(module, CopyBackwardsClass, "CopyBackwards"); -#ifdef USE_DISTRIBUTED static PyTypeObject SendRpcBackwardClass; addClass( module, SendRpcBackwardClass, "SendRpcBackward"); -#endif static PyTypeObject CopySlicesClass; addClass(module, CopySlicesClass, "CopySlices"); diff --git a/torch/csrc/distributed/c10d/HashStore.cpp b/torch/csrc/distributed/c10d/HashStore.cpp index 15befd9ec34e2..1055afc4847d0 100644 --- a/torch/csrc/distributed/c10d/HashStore.cpp +++ b/torch/csrc/distributed/c10d/HashStore.cpp @@ -1,6 +1,5 @@ #include -#include #include #include diff --git a/torch/csrc/distributed/c10d/Work.cpp b/torch/csrc/distributed/c10d/Work.cpp index cdec9185ce537..2c1ee42727d8a 100644 --- a/torch/csrc/distributed/c10d/Work.cpp +++ b/torch/csrc/distributed/c10d/Work.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp index 7e79fef8392f6..128fab6593b37 100644 --- a/torch/csrc/distributed/c10d/init.cpp +++ b/torch/csrc/distributed/c10d/init.cpp @@ -46,6 +46,7 @@ #include #include #include + #include #include diff --git a/torch/csrc/inductor/aoti_torch/shim_cpu.cpp b/torch/csrc/inductor/aoti_torch/shim_cpu.cpp index b1c864bf3fbba..a610685fe9557 100644 --- a/torch/csrc/inductor/aoti_torch/shim_cpu.cpp +++ b/torch/csrc/inductor/aoti_torch/shim_cpu.cpp @@ -1,7 +1,5 @@ -#ifdef USE_DISTRIBUTED #include -#endif #include #include @@ -533,7 +531,6 @@ AOTITorchError aoti_torch_cpu__weight_int4pack_mm_cpu_tensor( }); } -#ifdef USE_DISTRIBUTED AOTITorchError aoti_torch_cpu__c10d_functional_all_reduce_( AtenTensorHandle inp, const char* reduce_op, @@ -566,4 +563,3 @@ AOTITorchError aoti_torch_cpu__c10d_functional_wait_tensor( *ret0 = new_tensor_handle(std::move(tmp_result)); }); } -#endif diff --git a/torch/csrc/jit/python/pybind_utils.h b/torch/csrc/jit/python/pybind_utils.h index 5ae84e3e0c68b..2c0c1ea4b9cf2 100644 --- a/torch/csrc/jit/python/pybind_utils.h +++ b/torch/csrc/jit/python/pybind_utils.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include #include @@ -24,10 +26,6 @@ #include #include #include -#ifdef USE_DISTRIBUTED -#include -#include -#endif #include #include diff --git a/torch/csrc/jit/python/python_sugared_value.cpp b/torch/csrc/jit/python/python_sugared_value.cpp index 8b16e089aa50e..808fe7d3605ba 100644 --- a/torch/csrc/jit/python/python_sugared_value.cpp +++ b/torch/csrc/jit/python/python_sugared_value.cpp @@ -1225,7 +1225,7 @@ std::shared_ptr toSugaredValue( } else if (obj.ptr() == py::module::import("torch").attr("_check").ptr()) { return std::make_shared(); #ifdef USE_RPC - // RPC module is only available when build flag "USE_DISTRIBUTED" is on. + // This is not defined on WINDOWS } else if ( isRpcAvailable && obj.ptr() == @@ -1238,7 +1238,6 @@ std::shared_ptr toSugaredValue( return SpecialFormValue::create(prim::rpc_sync); } else if ( isRpcAvailable && - // RPC module is only available when build flag "USE_DISTRIBUTED" is on. obj.ptr() == py::module::import("torch.distributed.rpc").attr("remote").ptr()) { return SpecialFormValue::create(prim::rpc_remote); diff --git a/torch/csrc/jit/runtime/interpreter.h b/torch/csrc/jit/runtime/interpreter.h index 6ae9f52a0cda2..be582cfb7cdd8 100644 --- a/torch/csrc/jit/runtime/interpreter.h +++ b/torch/csrc/jit/runtime/interpreter.h @@ -128,13 +128,8 @@ struct InterpreterContinuation { std::optional tls_state = std::nullopt) : state(std::move(state_)), stack(std::move(stack_)), - tls_state_(std::move(tls_state)) -#ifdef USE_DISTRIBUTED - , - dist_autograd_context_id_(dist_autograd_context_id) -#endif - { - } + tls_state_(std::move(tls_state)), + dist_autograd_context_id_(dist_autograd_context_id) {} void operator()(); @@ -142,9 +137,10 @@ struct InterpreterContinuation { InterpreterState state; Stack stack; std::optional tls_state_ = std::nullopt; -#ifdef USE_DISTRIBUTED - int64_t dist_autograd_context_id_; +#ifndef USE_RPC + [[maybe_unused]] #endif + int64_t dist_autograd_context_id_; }; // what is the tensors type, including state from the current execution context diff --git a/torch/csrc/jit/serialization/pickler.h b/torch/csrc/jit/serialization/pickler.h index 526c840bc10e8..e3379f4de65ac 100644 --- a/torch/csrc/jit/serialization/pickler.h +++ b/torch/csrc/jit/serialization/pickler.h @@ -79,9 +79,7 @@ class TORCH_API Pickler { void pushTuple(const IValue& ivalue); void pushString(const std::string& string); void pushDevice(const IValue& ivalue); -#ifdef USE_DISTRIBUTED void pushRRef(const IValue& ivalue); -#endif // unmemoized version void pushStringImpl(const std::string& string); void pushStorageOfTensor(const at::Tensor& tensor); diff --git a/torch/csrc/jit/serialization/unpickler.h b/torch/csrc/jit/serialization/unpickler.h index 702a1d8816e7f..208cf554ad2bb 100644 --- a/torch/csrc/jit/serialization/unpickler.h +++ b/torch/csrc/jit/serialization/unpickler.h @@ -140,9 +140,7 @@ class TORCH_API Unpickler { void rebuildParameter(); void rebuildTensorFromTypeV2(); void rebuildSparseTensor(); -#ifdef USE_DISTRIBUTED void rebuildRRef(); -#endif PickleOpCode readInstruction(); PickleOpCode readOpCode() { return static_cast(read()); diff --git a/torch/csrc/profiler/standalone/execution_trace_observer.cpp b/torch/csrc/profiler/standalone/execution_trace_observer.cpp index 1c88e80d4021c..e46c141cd3f4d 100644 --- a/torch/csrc/profiler/standalone/execution_trace_observer.cpp +++ b/torch/csrc/profiler/standalone/execution_trace_observer.cpp @@ -30,15 +30,12 @@ #include #include -#ifdef USE_DISTRIBUTED #include -#endif // USE_DISTRIBUTED using namespace at; // Collective property attributes // https://github.com/pytorch/pytorch/issues/124674 -#ifdef USE_DISTRIBUTED constexpr auto kETCommsName = "collective_name"; constexpr auto kETInMsgNelems = "in_msg_nelems"; constexpr auto kETOutMsgNelems = "out_msg_nelems"; @@ -49,7 +46,6 @@ constexpr auto kETGlobalRankStride = "global_rank_stride"; constexpr auto kETGroupSize = "pg_size"; constexpr auto kETProcessGroupName = "pg_name"; constexpr auto kETProcessGroupDesc = "pg_desc"; -#endif // USE_DISTRIBUTED namespace torch::profiler::impl { @@ -269,7 +265,6 @@ static std::ofstream openOutputFile(const std::string& name) { return stream; } -#ifdef USE_DISTRIBUTED static std::string getAttrJson( const std::string& name, const std::string& type, @@ -282,7 +277,6 @@ static std::string getAttrJson( type, value); } -#endif static void writeJsonNode( std::ofstream& out, @@ -660,7 +654,6 @@ static void handleKernelBackendInfo( inline std::string getCommsNodeAttrs(const RecordFunction& fn) { // NOLINT std::vector attrs; -#ifdef USE_DISTRIBUTED // We rely on paramcommsdebug object that is available in thread local info auto debugInfo = dynamic_cast( c10::ThreadLocalDebugInfo::get(c10::DebugInfoKind::PARAM_COMMS_INFO)); @@ -704,8 +697,6 @@ inline std::string getCommsNodeAttrs(const RecordFunction& fn) { // NOLINT addAttr(kGroupSize, kETGroupSize, "uint64"); -#endif // USE_DISTRIBUTED - // XXX consider using as string stream? return attrs.empty() ? "" : fmt::format(", {}", fmt::join(attrs, ", ")); } diff --git a/torch/csrc/profiler/util.cpp b/torch/csrc/profiler/util.cpp index 0b2979e6fb7ea..e97699a99fd1c 100644 --- a/torch/csrc/profiler/util.cpp +++ b/torch/csrc/profiler/util.cpp @@ -11,9 +11,7 @@ #ifdef USE_KINETO #include #endif -#ifdef USE_DISTRIBUTED #include -#endif // USE_DISTRIBUTED namespace torch::profiler::impl { @@ -455,7 +453,7 @@ std::unordered_map saveNcclMeta( // @lint-ignore CLANGTIDY const SaveNcclMetaConfig& config) { std::unordered_map map; -#ifdef USE_DISTRIBUTED +#if !defined(BUILD_LITE_INTERPRETER) && !defined(C10_MOBILE) auto debugInfo = dynamic_cast( c10::ThreadLocalDebugInfo::get(c10::DebugInfoKind::PARAM_COMMS_INFO)); @@ -565,7 +563,7 @@ std::unordered_map saveNcclMeta( } } } -#endif // USE_DISTRIBUTED +#endif // !defined(BUILD_LITE_INTERPRETER) && !defined(C10_MOBILE) return map; } diff --git a/torch/csrc/profiler/util.h b/torch/csrc/profiler/util.h index f2ae57fa0e591..dcb4b866a2de3 100644 --- a/torch/csrc/profiler/util.h +++ b/torch/csrc/profiler/util.h @@ -185,7 +185,6 @@ struct HashCombine { } }; -#ifdef USE_DISTRIBUTED constexpr auto kCommsName = "Collective name"; constexpr auto kDtype = "dtype"; constexpr auto kInMsgNelems = "In msg nelems"; @@ -203,6 +202,5 @@ constexpr auto kP2pSrc = "Src Rank"; constexpr auto kP2pDst = "Dst Rank"; constexpr auto kInTensorsStart = "Input Tensors start"; constexpr auto kOutTensorsStart = "Output Tensors start"; -#endif // USE_DISTRIBUTED } // namespace torch::profiler::impl diff --git a/torch/distributed/_C_stubs.py b/torch/distributed/_C_stubs.py new file mode 100644 index 0000000000000..b241006372b6a --- /dev/null +++ b/torch/distributed/_C_stubs.py @@ -0,0 +1,150 @@ +# mypy: allow-untyped-defs +""" +Python stubs for backend-specific distributed components. + +Since _C._distributed_c10d always exists now, this module only provides +stubs for backend-specific functionality that may not be available in all builds +(e.g., NCCL, UCC, MPI, Gloo, etc.). +""" + +from __future__ import annotations + +from typing import Optional, TYPE_CHECKING + +from torch._C._distributed_c10d import Store + + +if TYPE_CHECKING: + from datetime import timedelta + +import torch + + +# Store classes +class HashStore(Store): + """Stub HashStore for builds without this functionality.""" + + def __init__(self, *args, **kwargs): + self._data = {} + + def set(self, key: str, value: str): + self._data[key] = value + + def get(self, key: str) -> bytes: + return self._data.get(key, "").encode() + + +# Backend-specific process group stubs +class ProcessGroupMPI: + """Stub ProcessGroupMPI for non-MPI builds.""" + + def __init__(self, *args, **kwargs): + pass + + +class ProcessGroupNCCL: + """Stub ProcessGroupNCCL for non-NCCL builds.""" + + def __init__(self, *args, **kwargs): + pass + + +class ProcessGroupGloo: + """Stub ProcessGroupGloo for non-Gloo builds.""" + + def __init__(self, *args, **kwargs): + pass + + +class ProcessGroupUCC: + """Stub ProcessGroupUCC for non-UCC builds.""" + + def __init__(self, *args, **kwargs): + pass + + +class ProcessGroupXCCL: + """Stub ProcessGroupXCCL for non-XCCL builds.""" + + def __init__(self, *args, **kwargs): + pass + + +class _ProcessGroupWrapper: + """Stub _ProcessGroupWrapper for non-Gloo builds.""" + + def __init__(self, process_group, *args, **kwargs): + self._process_group = process_group + + def __getattr__(self, name): + return getattr(self._process_group, name) + + +# NCCL-specific function stubs +_DEFAULT_PG_NCCL_TIMEOUT: Optional[timedelta] = None + + +def _hash_tensors(tensors): + """Stub function to hash tensors - returns dummy hash.""" + return 0 + + +def _dump_nccl_trace_json( + includeCollectives: Optional[bool] = None, onlyActive: Optional[bool] = None +) -> bytes: + """Stub function that returns empty JSON trace.""" + return b"{}" + + +def _dump_nccl_trace( + includeCollectives: Optional[bool] = None, + includeStackTraces: Optional[bool] = None, + onlyActive: Optional[bool] = None, +) -> bytes: + """Stub function that returns empty pickle trace.""" + return b"" + + +# NVSHMEM/SymmetricMemory stubs +def _is_nvshmem_available() -> bool: + """Stub function that returns False indicating NVSHMEM is not available.""" + return False + + +def _nvshmemx_cumodule_init(module: int) -> None: + """Stub function for NVSHMEM CU module initialization.""" + + +class _SymmetricMemory: + """Stub _SymmetricMemory class for builds without this functionality.""" + + def __init__(self, *args, **kwargs): + pass + + @classmethod + def empty_strided_p2p(cls, size, stride, dtype, device, group_name=None): + """Stub that returns a regular tensor.""" + return torch.empty(size, dtype=dtype, device=device) + + @classmethod + def rendezvous(cls, tensor, group_name=None): + """Stub that returns None.""" + return None + + @classmethod + def set_group_info(cls, *args, **kwargs): + """Stub that does nothing.""" + + @classmethod + def set_backend(cls, name): + """Stub that does nothing.""" + + @classmethod + def get_backend(cls, device): + """Stub that returns None.""" + return None + + @classmethod + def has_multicast_support(cls, device_type, device_index): + """Stub that returns False.""" + return False diff --git a/torch/distributed/__init__.py b/torch/distributed/__init__.py index 38e2fdbee803a..836b00c51c3a4 100644 --- a/torch/distributed/__init__.py +++ b/torch/distributed/__init__.py @@ -14,16 +14,10 @@ def is_available() -> bool: """ - Return ``True`` if the distributed package is available. - - Otherwise, - ``torch.distributed`` does not expose any other APIs. Currently, - ``torch.distributed`` is available on Linux, MacOS and Windows. Set - ``USE_DISTRIBUTED=1`` to enable it when building PyTorch from source. - Currently, the default value is ``USE_DISTRIBUTED=1`` for Linux and Windows, - ``USE_DISTRIBUTED=0`` for MacOS. + Always returns ``True``. Note that even if distributed is available, + there may not necessarily be any usable backends. """ - return hasattr(torch._C, "_c10d_init") + return True if is_available() and not torch._C._c10d_init(): @@ -36,132 +30,124 @@ def is_available() -> bool: DistStoreError = torch._C._DistStoreError QueueEmptyError = torch._C._DistQueueEmptyError -if is_available(): - from torch._C._distributed_c10d import ( - _broadcast_coalesced, - _compute_bucket_assignment_by_size, - _ControlCollectives, - _DEFAULT_FIRST_BUCKET_BYTES, - _make_nccl_premul_sum, - _register_builtin_comm_hook, - _register_comm_hook, - _StoreCollectives, - _test_python_store, - _verify_params_across_processes, - Backend as _Backend, - BuiltinCommHookType, - DebugLevel, - FileStore, - get_debug_level, - GradBucket, - Logger, - PrefixStore, - ProcessGroup as ProcessGroup, - Reducer, - set_debug_level, - set_debug_level_from_env, - Store, - TCPStore, - Work as _Work, - ) - - class _DistributedPdb(pdb.Pdb): - """ - Supports using PDB from inside a multiprocessing child process. - - Usage: - _DistributedPdb().set_trace() - """ - - def interaction(self, *args, **kwargs): - _stdin = sys.stdin - try: - sys.stdin = open("/dev/stdin") - pdb.Pdb.interaction(self, *args, **kwargs) - finally: - sys.stdin = _stdin - - _breakpoint_cache: dict[int, typing.Any] = {} - - def breakpoint(rank: int = 0, skip: int = 0, timeout_s=3600): - """ - Set a breakpoint, but only on a single rank. All other ranks will wait for you to be - done with the breakpoint before continuing. - - Args: - rank (int): Which rank to break on. Default: ``0`` - skip (int): Skip the first ``skip`` calls to this breakpoint. Default: ``0``. - """ - if skip > 0: - key = hash(str(traceback.format_exc())) - counter = _breakpoint_cache.get(key, 0) + 1 - _breakpoint_cache[key] = counter - if counter <= skip: - log.warning("Skip the breakpoint, counter=%d", counter) - return - - # avoid having the default timeout (if short) interrupt your debug session - if timeout_s is not None: - for group in torch.distributed.distributed_c10d._pg_map: - torch.distributed.distributed_c10d._set_pg_timeout( - timedelta(seconds=timeout_s), group - ) - - if get_rank() == rank: - pdb = _DistributedPdb() - pdb.message( - "\n!!! ATTENTION !!!\n\n" - f"Type 'up' to get to the frame that called dist.breakpoint(rank={rank})\n" - ) - pdb.set_trace() - # If Meta/Python keys are in the TLS, we want to make sure that we ignore them - # and hit the (default) CPU/CUDA implementation of barrier. - meta_in_tls = torch._C._meta_in_tls_dispatch_include() - guard = torch._C._DisableTorchDispatch() # type: ignore[attr-defined] - torch._C._set_meta_in_tls_dispatch_include(False) +from torch.distributed._distributed_c10d import ( + _broadcast_coalesced, + _compute_bucket_assignment_by_size, + _ControlCollectives, + _DEFAULT_FIRST_BUCKET_BYTES, + _make_nccl_premul_sum, + _register_builtin_comm_hook, + _register_comm_hook, + _StoreCollectives, + _test_python_store, + _verify_params_across_processes, + Backend as _Backend, + BuiltinCommHookType, + DebugLevel, + FileStore, + get_debug_level, + GradBucket, + Logger, + PrefixStore, + ProcessGroup as ProcessGroup, + Reducer, + set_debug_level, + set_debug_level_from_env, + Store, + TCPStore, + Work as _Work, +) + + +class _DistributedPdb(pdb.Pdb): + """ + Supports using PDB from inside a multiprocessing child process. + + Usage: + _DistributedPdb().set_trace() + """ + + def interaction(self, *args, **kwargs): + _stdin = sys.stdin try: - barrier() + sys.stdin = open("/dev/stdin") + pdb.Pdb.interaction(self, *args, **kwargs) finally: - torch._C._set_meta_in_tls_dispatch_include(meta_in_tls) - del guard - - if sys.platform != "win32": - from torch._C._distributed_c10d import HashStore - - from .device_mesh import DeviceMesh, init_device_mesh - - # Variables prefixed with underscore are not auto imported - # See the comment in `distributed_c10d.py` above `_backend` on why we expose - # this. - from .distributed_c10d import * # noqa: F403 - from .distributed_c10d import ( - _all_gather_base, - _coalescing_manager, - _CoalescingManager, - _create_process_group_wrapper, - _get_process_group_name, - _rank_not_in_group, - _reduce_scatter_base, - _time_estimator, - get_node_local_rank, - ) - from .remote_device import _remote_device - from .rendezvous import ( - _create_store_from_options, - register_rendezvous_handler, - rendezvous, - ) - - set_debug_level_from_env() - -else: - # This stub is sufficient to get - # python test/test_public_bindings.py -k test_correct_module_names - # working even when USE_DISTRIBUTED=0. Feel free to add more - # stubs as necessary. - # We cannot define stubs directly because they confuse pyre - - class _ProcessGroupStub: - pass - - sys.modules["torch.distributed"].ProcessGroup = _ProcessGroupStub # type: ignore[attr-defined] + sys.stdin = _stdin + + +_breakpoint_cache: dict[int, typing.Any] = {} + + +def breakpoint(rank: int = 0, skip: int = 0, timeout_s=3600): + """ + Set a breakpoint, but only on a single rank. All other ranks will wait for you to be + done with the breakpoint before continuing. + + Args: + rank (int): Which rank to break on. Default: ``0`` + skip (int): Skip the first ``skip`` calls to this breakpoint. Default: ``0``. + """ + if skip > 0: + key = hash(str(traceback.format_exc())) + counter = _breakpoint_cache.get(key, 0) + 1 + _breakpoint_cache[key] = counter + if counter <= skip: + log.warning("Skip the breakpoint, counter=%d", counter) + return + + # avoid having the default timeout (if short) interrupt your debug session + if timeout_s is not None: + for group in torch.distributed.distributed_c10d._pg_map: + torch.distributed.distributed_c10d._set_pg_timeout( + timedelta(seconds=timeout_s), group + ) + + if get_rank() == rank: + pdb = _DistributedPdb() + pdb.message( + "\n!!! ATTENTION !!!\n\n" + f"Type 'up' to get to the frame that called dist.breakpoint(rank={rank})\n" + ) + pdb.set_trace() + # If Meta/Python keys are in the TLS, we want to make sure that we ignore them + # and hit the (default) CPU/CUDA implementation of barrier. + meta_in_tls = torch._C._meta_in_tls_dispatch_include() + guard = torch._C._DisableTorchDispatch() # type: ignore[attr-defined] + torch._C._set_meta_in_tls_dispatch_include(False) + try: + barrier() + finally: + torch._C._set_meta_in_tls_dispatch_include(meta_in_tls) + del guard + + +if sys.platform != "win32": + from torch.distributed._distributed_c10d import HashStore + +from .device_mesh import DeviceMesh, init_device_mesh + +# Variables prefixed with underscore are not auto imported +# See the comment in `distributed_c10d.py` above `_backend` on why we expose +# this. +from .distributed_c10d import * # noqa: F403 +from .distributed_c10d import ( + _all_gather_base, + _coalescing_manager, + _CoalescingManager, + _create_process_group_wrapper, + _get_process_group_name, + _rank_not_in_group, + _reduce_scatter_base, + _time_estimator, + get_node_local_rank, +) +from .remote_device import _remote_device +from .rendezvous import ( + _create_store_from_options, + register_rendezvous_handler, + rendezvous, +) + + +set_debug_level_from_env() diff --git a/torch/distributed/_dist2.py b/torch/distributed/_dist2.py index ce5cb8d7e0cc3..1c27bf55d6834 100644 --- a/torch/distributed/_dist2.py +++ b/torch/distributed/_dist2.py @@ -10,7 +10,7 @@ from typing import Protocol, Union import torch -from torch._C._distributed_c10d import ( +from torch.distributed._distributed_c10d import ( _current_process_group, _set_process_group, ProcessGroup, diff --git a/torch/distributed/_distributed_c10d.py b/torch/distributed/_distributed_c10d.py new file mode 100644 index 0000000000000..beb7830edc1da --- /dev/null +++ b/torch/distributed/_distributed_c10d.py @@ -0,0 +1,245 @@ +# mypy: disable-error-code="assignment" +# noqa: F401 +""" +Centralized module for importing and re-exporting torch._C._distributed_c10d components. + +IMPORTANT PATTERN: +Never access torch._C._distributed_c10d directly in code. Always import from and use +torch.distributed._distributed_c10d which is guaranteed to have all functions available. + +Example: + # WRONG: torch._C._distributed_c10d._set_global_rank(rank) + # RIGHT: + from torch.distributed._distributed_c10d import _set_global_rank + _set_global_rank(rank) +""" + +from typing import TYPE_CHECKING + +# Import all core distributed components from the C extension +# NB: This list has to be spelled out because the _C module doesn't have __all__ +from torch._C._distributed_c10d import ( + _allow_inflight_collective_as_graph_input, + _broadcast_coalesced, + _compute_bucket_assignment_by_size, + _ControlCollectives, + _current_process_group, + _DEFAULT_FIRST_BUCKET_BYTES, + _DEFAULT_PG_TIMEOUT, + _DistributedBackendOptions, + _make_nccl_premul_sum, + _register_builtin_comm_hook, + _register_comm_hook, + _register_process_group, + _register_work, + _resolve_process_group, + _set_allow_inflight_collective_as_graph_input, + _set_global_rank, + _set_process_group, + _StoreCollectives, + _test_python_store, + _unregister_all_process_groups, + _unregister_process_group, + _verify_params_across_processes, + _WorkerServer, + AllgatherOptions, + AllreduceCoalescedOptions, + AllreduceOptions, + AllToAllOptions, + Backend, + BarrierOptions, + BroadcastOptions, + BuiltinCommHookType, + DebugLevel, + FakeProcessGroup, + FakeWork, + FileStore, + GatherOptions, + get_debug_level, + GradBucket, + Logger, + PrefixStore, + ProcessGroup, + ReduceOp, + ReduceOptions, + Reducer, + ReduceScatterOptions, + ScatterOptions, + set_debug_level, + set_debug_level_from_env, + Store, + TCPStore, + Work, +) + + +# Backend-specific components that may not be available +_MPI_AVAILABLE = False +_NCCL_AVAILABLE = False +_GLOO_AVAILABLE = False +_UCC_AVAILABLE = False +_XCCL_AVAILABLE = False + +# HashStore +try: + from torch._C._distributed_c10d import HashStore +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import HashStore + +# NVSHMEM/SymmetricMemory components + +# There are multiple backends for SymmetricMemory, as a result, +# _SymmetricMemory should not be imported together with NVSHMEM related modules. +try: + from torch._C._distributed_c10d import _SymmetricMemory +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import _SymmetricMemory + +try: + from torch._C._distributed_c10d import ( + _is_nvshmem_available, + _nvshmemx_cumodule_init, + ) +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import ( + _is_nvshmem_available, + _nvshmemx_cumodule_init, + ) + +# MPI backend +try: + from torch._C._distributed_c10d import ProcessGroupMPI + + _MPI_AVAILABLE = True +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import ProcessGroupMPI + +# NCCL backend +try: + from torch._C._distributed_c10d import ( + _DEFAULT_PG_NCCL_TIMEOUT, + _dump_nccl_trace, + _dump_nccl_trace_json, + _hash_tensors, + ProcessGroupNCCL, + ) + + _NCCL_AVAILABLE = True +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import ( + _DEFAULT_PG_NCCL_TIMEOUT, + _dump_nccl_trace, + _dump_nccl_trace_json, + _hash_tensors, + ProcessGroupNCCL, + ) + +# Gloo backend +try: + from torch._C._distributed_c10d import _ProcessGroupWrapper, ProcessGroupGloo + + _GLOO_AVAILABLE = True +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import _ProcessGroupWrapper, ProcessGroupGloo + +# UCC backend +try: + from torch._C._distributed_c10d import ProcessGroupUCC + + _UCC_AVAILABLE = True +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import ProcessGroupUCC + +# XCCL backend +try: + from torch._C._distributed_c10d import ProcessGroupXCCL + + _XCCL_AVAILABLE = True +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import ProcessGroupXCCL + +# Provide backwards compatibility by making all symbols available at module level +__all__ = [ + # Basic components + "_broadcast_coalesced", + "_compute_bucket_assignment_by_size", + "_ControlCollectives", + "_DEFAULT_FIRST_BUCKET_BYTES", + "_DEFAULT_PG_TIMEOUT", + "_DEFAULT_PG_NCCL_TIMEOUT", + "_make_nccl_premul_sum", + "_register_builtin_comm_hook", + "_register_comm_hook", + "_StoreCollectives", + "_test_python_store", + "_verify_params_across_processes", + "_allow_inflight_collective_as_graph_input", + "_register_work", + "_set_allow_inflight_collective_as_graph_input", + "_is_nvshmem_available", + "_nvshmemx_cumodule_init", + "_SymmetricMemory", + "_hash_tensors", + "_set_global_rank", + "_dump_nccl_trace", + "_dump_nccl_trace_json", + "Backend", + "BuiltinCommHookType", + "DebugLevel", + "FakeProcessGroup", + "FileStore", + "get_debug_level", + "GradBucket", + "HashStore", + "Logger", + "PrefixStore", + "ProcessGroup", + "Reducer", + "ReduceOp", + "set_debug_level", + "set_debug_level_from_env", + "Store", + "TCPStore", + "Work", + "FakeWork", + # Additional distributed_c10d components + "_DistributedBackendOptions", + "_register_process_group", + "_resolve_process_group", + "_unregister_all_process_groups", + "_unregister_process_group", + "_current_process_group", + "_set_process_group", + "_WorkerServer", + "AllgatherOptions", + "AllreduceCoalescedOptions", + "AllreduceOptions", + "AllToAllOptions", + "BarrierOptions", + "BroadcastOptions", + "GatherOptions", + "ReduceOptions", + "ReduceScatterOptions", + "ScatterOptions", + # Process group implementations + "ProcessGroupMPI", + "ProcessGroupNCCL", + "ProcessGroupGloo", + "ProcessGroupUCC", + "ProcessGroupXCCL", + "_ProcessGroupWrapper", + # Availability flags + "_MPI_AVAILABLE", + "_NCCL_AVAILABLE", + "_GLOO_AVAILABLE", + "_UCC_AVAILABLE", + "_XCCL_AVAILABLE", +] diff --git a/torch/distributed/_functional_collectives.py b/torch/distributed/_functional_collectives.py index c893794fc3011..95feb6cd79714 100644 --- a/torch/distributed/_functional_collectives.py +++ b/torch/distributed/_functional_collectives.py @@ -7,6 +7,10 @@ import torch import torch.distributed as dist import torch.distributed.distributed_c10d as c10d +from torch.distributed._distributed_c10d import ( + _allow_inflight_collective_as_graph_input, + _set_allow_inflight_collective_as_graph_input, +) from torch.distributed.device_mesh import DeviceMesh from torch.fx.experimental.proxy_tensor import get_proxy_mode @@ -858,15 +862,13 @@ def all_reduce_wait_compiled(y): will be registered in the work registry, and the wait_tensor() in compiled region called on the output tensor of the collective will wait on the correct work object. """ - previous = torch._C._distributed_c10d._allow_inflight_collective_as_graph_input() + previous = _allow_inflight_collective_as_graph_input() try: - torch._C._distributed_c10d._set_allow_inflight_collective_as_graph_input(value) + _set_allow_inflight_collective_as_graph_input(value) yield finally: - torch._C._distributed_c10d._set_allow_inflight_collective_as_graph_input( - previous - ) + _set_allow_inflight_collective_as_graph_input(previous) def _make_all_gather_out_tensor(input, group_size): diff --git a/torch/distributed/_shard/sharded_tensor/reshard.py b/torch/distributed/_shard/sharded_tensor/reshard.py index daef9c3586184..2bc3d65e5c8cb 100644 --- a/torch/distributed/_shard/sharded_tensor/reshard.py +++ b/torch/distributed/_shard/sharded_tensor/reshard.py @@ -4,7 +4,7 @@ import torch import torch.distributed as dist import torch.distributed._shard.sharding_spec as shard_spec -from torch._C._distributed_c10d import ProcessGroup +from torch.distributed._distributed_c10d import ProcessGroup from torch.distributed._shard.metadata import ShardMetadata from torch.distributed._shard.sharding_spec._internals import ( get_chunked_dim_size, diff --git a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py index 61808d0adf62a..f02563619d2fa 100644 --- a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py +++ b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py @@ -4,7 +4,7 @@ import torch import torch.distributed as dist -from torch._C._distributed_c10d import ReduceOp +from torch.distributed._distributed_c10d import ReduceOp from torch.distributed._shard.sharded_tensor import ShardedTensor from torch.distributed._shard.sharding_spec import ChunkShardingSpec from torch.distributed._shard.sharding_spec.api import custom_sharding_spec_op diff --git a/torch/distributed/_symmetric_memory/__init__.py b/torch/distributed/_symmetric_memory/__init__.py index 43c2959fdd8d1..8154cd9809139 100644 --- a/torch/distributed/_symmetric_memory/__init__.py +++ b/torch/distributed/_symmetric_memory/__init__.py @@ -15,7 +15,12 @@ import torch.distributed._functional_collectives as funcol import torch.distributed.distributed_c10d as c10d from torch._C._autograd import DeviceType -from torch._C._distributed_c10d import _SymmetricMemory, Work as _Work +from torch.distributed._distributed_c10d import ( + _register_work, + _SymmetricMemory, + ProcessGroup, + Work as _Work, +) _group_name_to_store: dict[str, c10d.Store] = {} @@ -1488,7 +1493,7 @@ def _low_contention_all_gather( src_buf = symm_mem.get_buffer(remote_rank, tensor.shape, tensor.dtype) chunks[remote_rank].copy_(src_buf) symm_mem.barrier() - torch._C._distributed_c10d._register_work(output, Work()) + _register_work(output, Work()) return output @@ -1536,7 +1541,7 @@ def _low_contention_reduce_scatter_with_symm_mem_input( ret = ret.mean(dim=0) else: raise ValueError(f"reduce_op ({reduce_op}) is not supported") - torch._C._distributed_c10d._register_work(ret, Work()) + _register_work(ret, Work()) return ret @@ -1571,7 +1576,7 @@ def _low_contention_reduce_scatter_with_workspace( ret = ret.mean(dim=0) else: raise ValueError(f"reduce_op ({reduce_op}) is not supported") - torch._C._distributed_c10d._register_work(ret, Work()) + _register_work(ret, Work()) return ret @@ -1649,7 +1654,6 @@ def _all_to_all_vdev_2d_offset_meta( if TYPE_CHECKING: - from torch._C._distributed_c10d import ProcessGroup from torch.types import _device, _dtype, _int @@ -1727,8 +1731,6 @@ def rendezvous( group (Union[str, :class:`torch.distributed.ProcessGroup`]): The group identifying the participating processes. This can be either a group name or a process group object. """ - from torch._C._distributed_c10d import ProcessGroup - if isinstance(group, str): group_name = group elif isinstance(group, ProcessGroup): @@ -1746,11 +1748,7 @@ def is_nvshmem_available() -> bool: Check if NVSHMEM is available in current build and on current system. """ - try: - from torch._C._distributed_c10d import _is_nvshmem_available - except ImportError: - # Not all builds have NVSHMEM support. - return False + from torch.distributed._distributed_c10d import _is_nvshmem_available # Check if NVSHMEM is available on current system. return _is_nvshmem_available() diff --git a/torch/distributed/_symmetric_memory/_nvshmem_triton.py b/torch/distributed/_symmetric_memory/_nvshmem_triton.py index c543fdffc1c76..7b7828227d7d1 100644 --- a/torch/distributed/_symmetric_memory/_nvshmem_triton.py +++ b/torch/distributed/_symmetric_memory/_nvshmem_triton.py @@ -75,7 +75,7 @@ def enable_triton(lib_dir: Optional[str] = None) -> dict[str, str]: """ import triton - from torch._C._distributed_c10d import _nvshmemx_cumodule_init + from torch.distributed._distributed_c10d import _nvshmemx_cumodule_init if lib_dir is not None: lib_path = os.path.join(lib_dir, "libnvshmem_device.bc") diff --git a/torch/distributed/_tools/fake_collectives.py b/torch/distributed/_tools/fake_collectives.py index 3b201b395334b..b89970ab33480 100644 --- a/torch/distributed/_tools/fake_collectives.py +++ b/torch/distributed/_tools/fake_collectives.py @@ -2,7 +2,9 @@ from typing import Any import torch -from torch._C._distributed_c10d import ( + +# Import centralized distributed components +from torch.distributed._distributed_c10d import ( _resolve_process_group, FakeWork, ProcessGroup, diff --git a/torch/distributed/algorithms/model_averaging/utils.py b/torch/distributed/algorithms/model_averaging/utils.py index fa8cc184eddc5..3e3243002a9c0 100644 --- a/torch/distributed/algorithms/model_averaging/utils.py +++ b/torch/distributed/algorithms/model_averaging/utils.py @@ -5,10 +5,6 @@ import torch import torch.distributed as dist - -# The two imports below are not always available depending on the -# USE_DISTRIBUTED compile flag. Make sure they raise import error -# if we're trying to use them. from torch.distributed import group, ProcessGroup diff --git a/torch/distributed/constants.py b/torch/distributed/constants.py index c1e604bc86753..bfa8785218645 100644 --- a/torch/distributed/constants.py +++ b/torch/distributed/constants.py @@ -1,7 +1,11 @@ from datetime import timedelta from typing import Optional -from torch._C._distributed_c10d import _DEFAULT_PG_TIMEOUT +# Import from centralized fallback module - no ImportError handling needed +from torch.distributed._distributed_c10d import ( + _DEFAULT_PG_NCCL_TIMEOUT, + _DEFAULT_PG_TIMEOUT, +) __all__ = ["default_pg_timeout", "default_pg_nccl_timeout"] @@ -16,11 +20,4 @@ # Later, we could consider merging them back together at the c++ layer if we can align on a same value. # (only if TORCH_NCCL_BLOCKING_WAIT or TORCH_NCCL_ASYNC_ERROR_HANDLING is set to 1). -try: - from torch._C._distributed_c10d import _DEFAULT_PG_NCCL_TIMEOUT - - default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT -except ImportError: - # if C++ NCCL support is not compiled, we don't have access to the default nccl value. - # if anyone is actually trying to use nccl in this state, it should error. - default_pg_nccl_timeout = None +default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT diff --git a/torch/distributed/device_mesh.py b/torch/distributed/device_mesh.py index 3a9363090bf71..6ee9263db8cd4 100644 --- a/torch/distributed/device_mesh.py +++ b/torch/distributed/device_mesh.py @@ -11,35 +11,14 @@ from typing import Optional, TYPE_CHECKING, Union import torch -from torch.distributed import is_available from torch.utils._typing_utils import not_none __all__ = ["init_device_mesh", "DeviceMesh"] -if not is_available(): - import sys - - # We need to create the stubs when distributed is not available. - # Otherwise, we would fail the doc tests (```./.ci/pytorch/docs-test.sh```), - # since it would try to import ``torch.distributed.device_mesh`` or - # ``torch.distributed.init_device_mesh`` but cannot find them. - - class _DeviceMeshStub: - pass - - def _init_device_mesh_stub(): - pass - - sys.modules["torch.distributed.device_mesh"].DeviceMesh = _DeviceMeshStub # type: ignore[attr-defined] - sys.modules[ - "torch.distributed.device_mesh" - ].init_device_mesh = _init_device_mesh_stub # type: ignore[attr-defined] - - -else: - from torch._C._distributed_c10d import Backend as C10dBackend +if True: # just to temporarily avoid reindentation + from torch.distributed._distributed_c10d import Backend as C10dBackend from torch.distributed.distributed_c10d import ( _get_default_group, _resolve_process_group, @@ -534,15 +513,16 @@ def _setup_world_group_and_device(self): # heuristic to set the current cuda/cuda-like device base on num of gpu devices available in each host # NOTE: This device selection would only work for homogeneous hardware. num_devices_per_host = device_handle.device_count() - if ( - world_size > num_devices_per_host - and world_size % num_devices_per_host != 0 - ): - raise RuntimeError( - f"DeviceMesh only support homogeneous hardware, but found " - f"{world_size} ranks and {num_devices_per_host} {self.device_type} devices!" - ) - device_handle.set_device(get_rank() % num_devices_per_host) + if num_devices_per_host: + if ( + world_size > num_devices_per_host + and world_size % num_devices_per_host != 0 + ): + raise RuntimeError( + f"DeviceMesh only support homogeneous hardware, but found " + f"{world_size} ranks and {num_devices_per_host} {self.device_type} devices!" + ) + device_handle.set_device(get_rank() % num_devices_per_host) return _get_default_group() diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 29609404df09b..c81d9c60eb1fe 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -19,13 +19,21 @@ from typing_extensions import deprecated import torch +import torch.distributed._distributed_c10d as _c10d from torch._C import _DistStoreError as DistStoreError -from torch._C._distributed_c10d import ( +from torch._utils_internal import set_pytorch_distributed_envs_from_justknobs +from torch.distributed._distributed_c10d import ( # Process group implementations; Availability flags _DistributedBackendOptions, + _GLOO_AVAILABLE, + _MPI_AVAILABLE, + _NCCL_AVAILABLE, + _ProcessGroupWrapper, _register_process_group, _resolve_process_group, + _UCC_AVAILABLE, _unregister_all_process_groups, _unregister_process_group, + _XCCL_AVAILABLE, AllgatherOptions, AllreduceCoalescedOptions, AllreduceOptions, @@ -37,6 +45,11 @@ get_debug_level, PrefixStore, ProcessGroup, + ProcessGroupGloo, + ProcessGroupMPI, + ProcessGroupNCCL, + ProcessGroupUCC, + ProcessGroupXCCL, ReduceOp, ReduceOptions, ReduceScatterOptions, @@ -44,7 +57,6 @@ Store, Work, ) -from torch._utils_internal import set_pytorch_distributed_envs_from_justknobs from torch.monitor import _WaitCounter from torch.overrides import handle_torch_function, has_torch_function from torch.utils._typing_utils import not_none @@ -131,17 +143,11 @@ "split_group", ] -_MPI_AVAILABLE = True -_NCCL_AVAILABLE = True -_GLOO_AVAILABLE = True -_UCC_AVAILABLE = True -_XCCL_AVAILABLE = True - _pickler = pickle.Pickler _unpickler = pickle.Unpickler -# Change __module__ of all imported types from torch._C._distributed_c10d that are public +# Change __module__ of all imported types from the distributed wrapper that are public def _export_c_types() -> None: _public_types_to_change_module = [ AllreduceCoalescedOptions, @@ -167,45 +173,26 @@ def _export_c_types() -> None: _export_c_types() -try: - from torch._C._distributed_c10d import ProcessGroupMPI - +# Add process groups to __all__ and set their module based on availability +if _MPI_AVAILABLE: ProcessGroupMPI.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupMPI"] -except ImportError: - _MPI_AVAILABLE = False - -try: - from torch._C._distributed_c10d import ProcessGroupNCCL +if _NCCL_AVAILABLE: ProcessGroupNCCL.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupNCCL"] -except ImportError: - _NCCL_AVAILABLE = False - -try: - from torch._C._distributed_c10d import _ProcessGroupWrapper, ProcessGroupGloo +if _GLOO_AVAILABLE: ProcessGroupGloo.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupGloo"] -except ImportError: - _GLOO_AVAILABLE = False - -try: - from torch._C._distributed_c10d import ProcessGroupUCC +if _UCC_AVAILABLE: ProcessGroupUCC.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupUCC"] -except ImportError: - _UCC_AVAILABLE = False - -try: - from torch._C._distributed_c10d import ProcessGroupXCCL +if _XCCL_AVAILABLE: ProcessGroupXCCL.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupXCCL"] -except ImportError: - _XCCL_AVAILABLE = False logger = logging.getLogger(__name__) @@ -1325,7 +1312,8 @@ def _get_default_store() -> Store: def _update_default_pg(pg) -> None: _world.default_pg = pg rank = pg.rank() if pg is not None and pg != GroupMember.NON_GROUP_MEMBER else -1 - torch._C._distributed_c10d._set_global_rank(rank) + + _c10d._set_global_rank(rank) def get_backend_config(group: Optional[ProcessGroup] = None) -> str: @@ -1962,7 +1950,7 @@ def _new_process_group_helper( if device_id: pg.bound_device_id = device_id - backend_class: torch._C._distributed_c10d.Backend + backend_class: _c10d.Backend for device, backend_str in backend_config.get_device_backend_map().items(): # Use the group name as prefix in the default store, such that # a single store can be reused by multiple groups. @@ -3077,7 +3065,9 @@ def _object_to_tensor(obj, device, group): if get_debug_level() == DebugLevel.DETAIL and is_nccl_available(): backend = get_backend(group) if backend == Backend.NCCL: - hash = torch._C._distributed_c10d._hash_tensors([byte_tensor]) + from torch.distributed._distributed_c10d import _hash_tensors + + hash = _hash_tensors([byte_tensor]) logger.warning( "_object_to_tensor size: %s hash value: %s", byte_tensor.numel(), @@ -3092,7 +3082,9 @@ def _tensor_to_object(tensor, tensor_size, group): if get_debug_level() == DebugLevel.DETAIL and is_nccl_available(): backend = get_backend(group) if backend == Backend.NCCL: - hash = torch._C._distributed_c10d._hash_tensors([tensor]) + from torch.distributed._distributed_c10d import _hash_tensors + + hash = _hash_tensors([tensor]) logger.warning( "_tensor_to_object size: %s hash value: %s", tensor.numel(), hash ) @@ -4969,7 +4961,7 @@ def monitored_barrier( def _create_process_group_wrapper( - wrapped_pg: torch._C._distributed_c10d.Backend, + wrapped_pg: _c10d.Backend, store_prefix: str, store: Store, rank: int, diff --git a/torch/distributed/elastic/control_plane.py b/torch/distributed/elastic/control_plane.py index 817255edd23dc..63334a0ca3f62 100644 --- a/torch/distributed/elastic/control_plane.py +++ b/torch/distributed/elastic/control_plane.py @@ -14,7 +14,7 @@ @contextmanager def _worker_server(socket_path: str) -> Generator[None, None, None]: - from torch._C._distributed_c10d import _WorkerServer + from torch.distributed._distributed_c10d import _WorkerServer server = _WorkerServer(socket_path) try: diff --git a/torch/distributed/nn/functional.py b/torch/distributed/nn/functional.py index eeff877260bcc..2bdf3fe2bdffd 100644 --- a/torch/distributed/nn/functional.py +++ b/torch/distributed/nn/functional.py @@ -2,10 +2,6 @@ import torch import torch.distributed as dist from torch.autograd import Function - -# The two imports below are not always available depending on the -# USE_DISTRIBUTED compile flag. Make sure they raise import error -# if we're trying to use them. from torch.distributed import group, ReduceOp diff --git a/torch/distributed/rpc/__init__.py b/torch/distributed/rpc/__init__.py index adf901d6b6e3e..27a945a92e44c 100644 --- a/torch/distributed/rpc/__init__.py +++ b/torch/distributed/rpc/__init__.py @@ -37,7 +37,6 @@ def is_available() -> bool: import numbers import torch.distributed.autograd as dist_autograd - from torch._C._distributed_c10d import Store from torch._C._distributed_rpc import ( # noqa: F401 _cleanup_python_rpc_handler, _DEFAULT_INIT_METHOD, @@ -70,6 +69,7 @@ def is_available() -> bool: RpcBackendOptions, WorkerInfo, ) + from torch.distributed._distributed_c10d import Store if _is_tensorpipe_available: from torch._C._distributed_rpc import ( # noqa: F401 diff --git a/torch/distributed/tensor/_collective_utils.py b/torch/distributed/tensor/_collective_utils.py index 4fce6fea538a6..f01836c59592b 100644 --- a/torch/distributed/tensor/_collective_utils.py +++ b/torch/distributed/tensor/_collective_utils.py @@ -8,8 +8,10 @@ import torch import torch.distributed._functional_collectives as funcol import torch.distributed.tensor._dtensor_spec as dtensor_spec -from torch._C._distributed_c10d import _resolve_process_group from torch._logging import warning_once + +# Import from centralized fallback module - no conditional imports needed +from torch.distributed._distributed_c10d import _resolve_process_group from torch.distributed.device_mesh import _mesh_resources, DeviceMesh from torch.distributed.distributed_c10d import ( _get_group_size_by_name, diff --git a/torch/testing/_internal/distributed/fake_pg.py b/torch/testing/_internal/distributed/fake_pg.py index 0a2814c246459..035a8bb7c586d 100644 --- a/torch/testing/_internal/distributed/fake_pg.py +++ b/torch/testing/_internal/distributed/fake_pg.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import torch.distributed as dist -from torch._C._distributed_c10d import FakeProcessGroup +from torch.distributed._distributed_c10d import FakeProcessGroup class FakeStore(dist.Store): From 2335f904148e5b8d94f1d49af0bdb72530dce882 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Fri, 12 Sep 2025 04:00:53 +0000 Subject: [PATCH 148/693] [ONNX] Support enable_gqa when dropout is non-zero (#162771) Fixes #162258 Related to https://github.com/microsoft/onnxscript/pull/2558 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162771 Approved by: https://github.com/justinchuby --- test/onnx/exporter/test_small_models_e2e.py | 30 +++++++++ .../_internal/exporter/_torchlib/ops/nn.py | 64 +++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/test/onnx/exporter/test_small_models_e2e.py b/test/onnx/exporter/test_small_models_e2e.py index c5dd4132f5763..54c7d74489a36 100644 --- a/test/onnx/exporter/test_small_models_e2e.py +++ b/test/onnx/exporter/test_small_models_e2e.py @@ -806,6 +806,36 @@ def forward(self, x): # Test with reference evaluator because ORT does not support the op as of version 1.22 onnx_testing.assert_onnx_program(onnx_program, backend="reference") + def test_enable_gqa_in_attention_23_with_dropout(self): + class Model(torch.nn.Module): + def forward(self, q, k, v): + return torch.nn.functional.scaled_dot_product_attention( # pylint: disable=not-callable + q, k, v, enable_gqa=True, dropout_p=0.1 + ) + + model = Model() + + query = torch.randn(2, 4, 8, 16) + key = torch.randn(2, 2, 8, 16) + value = torch.randn(2, 2, 8, 16) + + onnx_program = self.export( + model, + ( + query, + key, + value, + ), + opset_version=23, + ) + # opset23 only uses manually gqa path when dropout is enabled, + # and dropout makes the output non-deterministic, + # so we check for the presence of the ops used in that path. + all_ops = [node.op_type for node in onnx_program.model.graph] + self.assertIn("Unsqueeze", all_ops) + self.assertIn("Expand", all_ops) + self.assertIn("Reshape", all_ops) + if __name__ == "__main__": common_utils.run_tests() diff --git a/torch/onnx/_internal/exporter/_torchlib/ops/nn.py b/torch/onnx/_internal/exporter/_torchlib/ops/nn.py index 90815bc18d6e3..1ea9a4161f431 100644 --- a/torch/onnx/_internal/exporter/_torchlib/ops/nn.py +++ b/torch/onnx/_internal/exporter/_torchlib/ops/nn.py @@ -170,6 +170,9 @@ def aten_scaled_dot_product_attention_23( if is_causal: attn_mask = _causal_attention_mask(query, key, op23) + if enable_gqa: + key, value = _attention_repeat_kv_for_group_query(query, key, value, op23) + if attn_mask is None: return _aten_scaled_dot_product_attention_no_mask_onnx( query, key, value, scale, dropout_p, op23 @@ -180,6 +183,67 @@ def aten_scaled_dot_product_attention_23( ) +def _attention_repeat_kv_for_group_query( + query: TFloat, key: TFloat, value: TFloat, op: Opset +) -> tuple[TFloat, TFloat]: + """Expand key and value for group query attention. + + repeat_interleave is applied on key and value to match the number of heads in query. + + Args: + query: Tensor of shape [B, q_num_heads, q_S, E] + key: Tensor of shape [B, k_num_heads, kv_S, E] + value: Tensor of shape [B, v_num_heads, kv_S, E] + + Returns: + Tuple of (expanded_key, expanded_value) where: + - expanded_key: Tensor of shape [B, q_num_heads, kv_S, E] + - expanded_value: Tensor of shape [B, q_num_heads, kv_S, E + """ + + assert ( + query.shape[1] > key.shape[1] == value.shape[1] + and query.shape[1] % key.shape[1] == 0 + ), ( + "SDPA (GQA or MQA) requires q_num_heads > kv_num_heads & q_num_heads % kv_num_heads == 0" + ) + + # NOTE: QKV are expected to be 4D tensors + + batch_size = op.Shape(query, start=0, end=1) # [B] + q_num_heads = op.Shape(query, start=1, end=2) # [Hq] + kv_num_heads = op.Shape(key, start=1, end=2) # [Hk] + qk_head_size = op.Shape(key, start=3, end=4) # [Dk] + v_head_size = op.Shape(value, start=3, end=4) # [Dv] + new_kv_seq_len = op.Shape(key, start=2, end=3) # [T] + + interleave_dim = op.Div(q_num_heads, kv_num_heads) # Hq / Hk + two = op.Constant(value_int=2) + k_unsqueezed = op.Unsqueeze(key, two) # [B, Hk, 1, T, Dk] + v_unsqueezed = op.Unsqueeze(value, two) # [B, Hv, 1, T, Dv] + + k_expand_shape = op.Concat( + batch_size, kv_num_heads, interleave_dim, new_kv_seq_len, qk_head_size, axis=0 + ) + k_expand = op.Expand(k_unsqueezed, k_expand_shape) + v_expand_shape = op.Concat( + batch_size, kv_num_heads, interleave_dim, new_kv_seq_len, v_head_size, axis=0 + ) + v_expand = op.Expand(v_unsqueezed, v_expand_shape) + + k_attention_shape = op.Concat( + batch_size, q_num_heads, new_kv_seq_len, qk_head_size, axis=0 + ) + v_attention_shape = op.Concat( + batch_size, q_num_heads, new_kv_seq_len, v_head_size, axis=0 + ) + + expanded_key = op.Reshape(k_expand, k_attention_shape) + expanded_value = op.Reshape(v_expand, v_attention_shape) + + return expanded_key, expanded_value + + def _attention_scale(query: TFloat, op: Opset) -> TFloat: """Calculate the scale factor for the attention result. From 62f044e260f8618fbd787aba283d45fd67021ca3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 21:03:27 -0700 Subject: [PATCH 149/693] Bump setuptools from 72.1.0 to 78.1.1 in /.github/requirements (#162701) Bumps [setuptools](https://github.com/pypa/setuptools) from 72.1.0 to 78.1.1. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v72.1.0...v78.1.1) --- updated-dependencies: - dependency-name: setuptools dependency-version: 78.1.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/requirements/pip-requirements-macOS.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/requirements/pip-requirements-macOS.txt b/.github/requirements/pip-requirements-macOS.txt index 3a27cac46f71f..b4ce89d920cb4 100644 --- a/.github/requirements/pip-requirements-macOS.txt +++ b/.github/requirements/pip-requirements-macOS.txt @@ -26,7 +26,7 @@ pytest-xdist==3.3.1 pytest==7.3.2 pyyaml==6.0.2 scipy==1.12.0 -setuptools==72.1.0 +setuptools==78.1.1 sympy==1.13.3 tlparse==0.4.0 tensorboard==2.13.0 From d959eb02cbcc5186aff1b8cc696a6af638bed2b7 Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Fri, 12 Sep 2025 04:18:51 +0000 Subject: [PATCH 150/693] [audio hash update] update the pinned audio hash (#162752) This PR is auto-generated nightly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/nightly.yml). Update the pinned audio hash. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162752 Approved by: https://github.com/pytorchbot --- .github/ci_commit_pins/audio.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ci_commit_pins/audio.txt b/.github/ci_commit_pins/audio.txt index 55fc09b9c034f..f18d293fe6285 100644 --- a/.github/ci_commit_pins/audio.txt +++ b/.github/ci_commit_pins/audio.txt @@ -1 +1 @@ -fa5142928ee157aa65137c4ecff2fe9b1a9e0648 +caba63f0fa29ef9e3d566699f32f11c07c8bda4e From da954f10d6c631d4b85128b4a8b3e5ed8250dc3d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Sep 2025 04:22:59 +0000 Subject: [PATCH 151/693] Bump protobuf from 5.29.4 to 5.29.5 in /.github/requirements (#160844) Bumps [protobuf](https://github.com/protocolbuffers/protobuf) from 5.29.4 to 5.29.5.
Commits
  • f5de0a0 Updating version.json and repo version numbers to: 29.5
  • 8563766 Merge pull request #21858 from shaod2/py-cp-29
  • 05ba1a8 Add recursion depth limits to pure python
  • 1ef3f01 Internal pure python fixes
  • 69cca9b Remove fast-path check for non-clang compilers in MessageCreator. (#21612)
  • 21fdb7a fix: contains check segfaults on empty map (#20446) (#20904)
  • 03c50e3 Re-enable aarch64 tests. (#20853)
  • 128f0aa Add volatile to featuresResolved (#20767)
  • bdd49bb Merge pull request #20755 from protocolbuffers/29.x-202503192110
  • c659468 Updating version.json and repo version numbers to: 29.5-dev
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=protobuf&package-manager=pip&previous-version=5.29.4&new-version=5.29.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/pytorch/pytorch/network/alerts).
Pull Request resolved: https://github.com/pytorch/pytorch/pull/160844 Approved by: https://github.com/msaroufim Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/requirements/pip-requirements-macOS.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/requirements/pip-requirements-macOS.txt b/.github/requirements/pip-requirements-macOS.txt index b4ce89d920cb4..5fc26302a0add 100644 --- a/.github/requirements/pip-requirements-macOS.txt +++ b/.github/requirements/pip-requirements-macOS.txt @@ -15,7 +15,7 @@ optree==0.13.0 packaging==23.1 parameterized==0.8.1 pillow==10.3.0 -protobuf==5.29.4 +protobuf==5.29.5 psutil==5.9.8 pygments==2.15.0 pytest-cpp==2.3.0 From 95191522e0a3465a2d2de01bf71bcb39aa6fcba2 Mon Sep 17 00:00:00 2001 From: can-gaa-hou Date: Fri, 12 Sep 2025 04:24:08 +0000 Subject: [PATCH 152/693] [OpenReg] Implement device autoload mechanism (#158555) # Implement OpenReg device autoload mechanism ## Overview The **Autoload** mechanism in PyTorch simplifies the integration of third-party device backends by enabling automatic discovery and initialization at runtime. Traditionally, integrating a new backend required explicit imports or manual initialization, which could be cumbersome and error-prone. With Autoload, PyTorch dynamically detects and initializes device backends, providing a seamless user experience. This mechanism leverages Python entry points (e.g., `torch.backends`) and dynamic module loading. When PyTorch starts, it scans for registered entry points and invokes their initialization hooks, ensuring that all available backends are ready for use without requiring explicit imports. ## Motivation This PR aims to apply [device autoload mechanism](https://github.com/pytorch/pytorch/issues/122468) to the OpenReg module with some simple changes. ## Change ### Before ```python import torch import torch_openreg x = torch.tensor([1, 2, 3], device="openreg") print(x) ``` ### After ```python import torch # No need to import torch_openreg manually! x = torch.tensor([1, 2, 3], device="openreg") print(x) ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/158555 Approved by: https://github.com/FFFrog, https://github.com/albanD Co-authored-by: Jiawei Li --- docs/source/accelerator/autoload.md | 86 +++++++++++++++++++ docs/source/accelerator/index.md | 1 + .../torch_openreg/README.md | 11 ++- .../torch_openreg/setup.py | 13 +++ .../torch_openreg/torch_openreg/__init__.py | 10 ++- test/test_openreg.py | 1 - test/test_transformers_privateuse1.py | 2 - 7 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 docs/source/accelerator/autoload.md diff --git a/docs/source/accelerator/autoload.md b/docs/source/accelerator/autoload.md new file mode 100644 index 0000000000000..97664adcd735a --- /dev/null +++ b/docs/source/accelerator/autoload.md @@ -0,0 +1,86 @@ +# Autoload Mechanism + +The **Autoload** mechanism in PyTorch simplifies the integration of a custom backend by enabling automatic discovery and initialization at runtime. This eliminates the need for explicit imports or manual initialization, allowing developers to seamlessly integrate a new accelerator or backend into PyTorch. + +## Background + +The **Autoload Device Extension** proposal in PyTorch is centered on improving support for various hardware backend devices, especially those implemented as out-of-the-tree extensions (not part of PyTorch’s main codebase). Currently, users must manually import or load these device-specific extensions to use them, which complicates the experience and increases cognitive overhead. + +In contrast, in-tree devices (devices officially supported within PyTorch) are seamlessly integrated—users don’t need extra imports or steps. The goal of autoloading is to make out-of-the-tree devices just as easy to use, so users can follow the standard PyTorch device programming model without explicit loading or code changes. This would allow existing PyTorch applications to run on new devices without any modification, making hardware support more user-friendly and reducing barriers to adoption. + +For more information about the background of **Autoload**, please refer to its [RFC](https://github.com/pytorch/pytorch/issues/122468). + +## Design + +The core idea of **Autoload** is to Use Python’s plugin discovery (entry points) so PyTorch automatically loads out-of-tree device extensions when torch is imported—no explicit user import needed. + +For more instructions of the design of **Autoload**, please refer to [**How it works**](https://docs.pytorch.org/tutorials/unstable/python_extension_autoload.html#how-it-works). + +## Implementation + +This tutorial will take **OpenReg** as a new out-of-the-tree device and guide you through the steps to enable and use the **Autoload** mechanism. + +### Entry Point Setup + +To enable **Autoload**, register the `_autoload` function as an entry point in `setup.py` file. + +::::{tab-set} + +:::{tab-item} Python + +```{eval-rst} +.. literalinclude:: ../../../test/cpp_extensions/open_registration_extension/torch_openreg/setup.py + :language: python + :start-after: LITERALINCLUDE START: SETUP + :end-before: LITERALINCLUDE END: SETUP + :linenos: + :emphasize-lines: 9-13 +``` + +::: + +:::: + +### Backend Setup + +Define the initialization hook `_autoload` for backend initialization. This hook will be automatically invoked by PyTorch during startup. + +::::{tab-set-code} +```{eval-rst} +.. literalinclude:: ../../../test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py + :language: python + :start-after: LITERALINCLUDE START: AUTOLOAD + :end-before: LITERALINCLUDE END: AUTOLOAD + :linenos: + :emphasize-lines: 10-12 +``` + + +:::: + +## Result + +After setting up the entry point and backend, build and install your backend. Now, we can use the new accelerator without explicitly importing it. + +```{eval-rst} +.. grid:: 2 + + .. grid-item-card:: :octicon:`terminal;1em;` Without Autoload + :class-card: card-prerequisites + + :: + + >>> import torch + >>> import torch_openreg + >>> torch.tensor(1, device="openreg") + tensor(1, device='openreg:0') + + .. grid-item-card:: :octicon:`terminal;1em;` With Autoload + :class-card: card-prerequisites + + :: + + >>> import torch # Automatically import torch_openreg + >>> torch.tensor(1, device="openreg") + tensor(1, device='openreg:0') +``` diff --git a/docs/source/accelerator/index.md b/docs/source/accelerator/index.md index 68db62e075975..70f25812bb9eb 100644 --- a/docs/source/accelerator/index.md +++ b/docs/source/accelerator/index.md @@ -42,6 +42,7 @@ Next, we will delve into each chapter of this guide. Each chapter focuses on a k :glob: :maxdepth: 1 +autoload operators ``` diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/README.md b/test/cpp_extensions/open_registration_extension/torch_openreg/README.md index 83ec85b1055c2..9474c85a1b840 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/README.md +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/README.md @@ -124,6 +124,16 @@ There are 4 DSOs in torch_openreg, and the dependencies between them are as foll - Per-operator Fallback: See `sub.Tensor` - Global Fallback: See `wrapper_cpu_fallback` +### Autoload + +- Autoload Machanism + + When `import torch`, installed accelerators (such as `torch_openreg`) will be automatically loaded, achieving the same experience as the built-in backends. + + - Registering the backend with Python `entry points`: See `setup` in `setup.py` + - Adding a callable function for backend initialization: See `_autoload` in `torch_openreg/__init__.py` + - Dynamically loading the backend without explicit imports: See [Usage Example](#usage-example) + ## Installation and Usage ### Installation @@ -139,7 +149,6 @@ After installation, you can use the `openreg` device in Python just like any oth ```python import torch -import torch_openreg if not torch.openreg.is_available(): print("OpenReg backend is not available in this build.") diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/setup.py b/test/cpp_extensions/open_registration_extension/torch_openreg/setup.py index 0768653e1ac45..8c1496387570d 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/setup.py +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/setup.py @@ -28,6 +28,12 @@ def make_relative_rpath_args(path): def get_pytorch_dir(): + # Disable autoload of the accelerator + + # We must do this for two reasons: + # We only need to get the PyTorch installation directory, so whether the accelerator is loaded or not is irrelevant + # If the accelerator has been previously built and not uninstalled, importing torch will cause a circular import error + os.environ["TORCH_DEVICE_BACKEND_AUTOLOAD"] = "0" import torch return os.path.dirname(os.path.realpath(torch.__file__)) @@ -127,6 +133,7 @@ def main(): ] } + # LITERALINCLUDE START: SETUP setup( packages=find_packages(), package_data=package_data, @@ -135,7 +142,13 @@ def main(): "clean": BuildClean, # type: ignore[misc] }, include_package_data=False, + entry_points={ + "torch.backends": [ + "torch_openreg = torch_openreg:_autoload", + ], + }, ) + # LITERALINCLUDE END: SETUP if __name__ == "__main__": diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py b/test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py index 45b2343070fe1..18cee1615705d 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py @@ -9,7 +9,7 @@ _load_dll_libraries() del _load_dll_libraries - +# LITERALINCLUDE START: AUTOLOAD import torch_openreg._C # type: ignore[misc] import torch_openreg.openreg @@ -17,3 +17,11 @@ torch.utils.rename_privateuse1_backend("openreg") torch._register_device_module("openreg", torch_openreg.openreg) torch.utils.generate_methods_for_privateuse1_backend(for_storage=True) + + +def _autoload(): + # It is a placeholder function here to be registered as an entry point. + pass + + +# LITERALINCLUDE END: AUTOLOAD diff --git a/test/test_openreg.py b/test/test_openreg.py index 7ee8ccefcd093..c0d99f5a6ac1a 100644 --- a/test/test_openreg.py +++ b/test/test_openreg.py @@ -10,7 +10,6 @@ import numpy as np import psutil -import torch_openreg # noqa: F401 import torch from torch.serialization import safe_globals diff --git a/test/test_transformers_privateuse1.py b/test/test_transformers_privateuse1.py index 0aa15260d0949..31023875f886d 100644 --- a/test/test_transformers_privateuse1.py +++ b/test/test_transformers_privateuse1.py @@ -4,8 +4,6 @@ from collections import namedtuple from functools import partial -import torch_openreg # noqa: F401 - import torch from torch.nn.attention import SDPBackend from torch.testing._internal.common_nn import NNTestCase From 5dd14f0b656e747d8e1c7462f9aebad7e52fbd5b Mon Sep 17 00:00:00 2001 From: fduwjj Date: Tue, 9 Sep 2025 15:29:34 -0700 Subject: [PATCH 153/693] [CuTe] Copy code from pycute for device mesh bookkeeping (#162413) We copied the whole module and its unit test into pytorch codebase. (https://github.com/NVIDIA/cutlass/blob/main/python%2Fpycute%2Flayout.py). We did change the indentation of code from 2 spaces to 4 spaces. And add lint suppressor to make mypy happy. Also we need to make changes to unit test to include ownership and use `run_tests, TestCase` so that the test gets picked up by CI. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162413 Approved by: https://github.com/ezyang, https://github.com/Skylion007 --- test/distributed/_pycute/test_coalesce.py | 99 +++++ test/distributed/_pycute/test_complement.py | 96 ++++ test/distributed/_pycute/test_composition.py | 218 +++++++++ test/distributed/_pycute/test_int_tuple.py | 86 ++++ test/distributed/_pycute/test_left_inverse.py | 91 ++++ .../distributed/_pycute/test_right_inverse.py | 100 +++++ test/distributed/_pycute/test_typing.py | 65 +++ torch/distributed/_pycute/__init__.py | 37 ++ torch/distributed/_pycute/int_tuple.py | 233 ++++++++++ torch/distributed/_pycute/layout.py | 413 ++++++++++++++++++ torch/distributed/_pycute/typing.py | 45 ++ 11 files changed, 1483 insertions(+) create mode 100644 test/distributed/_pycute/test_coalesce.py create mode 100644 test/distributed/_pycute/test_complement.py create mode 100644 test/distributed/_pycute/test_composition.py create mode 100644 test/distributed/_pycute/test_int_tuple.py create mode 100644 test/distributed/_pycute/test_left_inverse.py create mode 100644 test/distributed/_pycute/test_right_inverse.py create mode 100644 test/distributed/_pycute/test_typing.py create mode 100644 torch/distributed/_pycute/__init__.py create mode 100644 torch/distributed/_pycute/int_tuple.py create mode 100644 torch/distributed/_pycute/layout.py create mode 100644 torch/distributed/_pycute/typing.py diff --git a/test/distributed/_pycute/test_coalesce.py b/test/distributed/_pycute/test_coalesce.py new file mode 100644 index 0000000000000..3bcf1407d8b5c --- /dev/null +++ b/test/distributed/_pycute/test_coalesce.py @@ -0,0 +1,99 @@ +# ruff: noqa: PGH004, G004, F403 +# flake8: noqa +# Owner(s): ["oncall: distributed"] +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +""" +Unit tests for _pycute.coalesce +""" + +import logging + +from torch.distributed._pycute import * +from torch.testing._internal.common_utils import run_tests, TestCase + + +_LOGGER = logging.getLogger(__name__) + + +class TestCoalesce(TestCase): + def helper_test_coalesce(self, layout): + layoutR = coalesce(layout) + + _LOGGER.debug(f"{layout} => {layoutR}") + + self.assertEqual(size(layoutR), size(layout)) + + for i in range(size(layout)): + self.assertEqual(layoutR(i), layout(i)) + + def test_coalesce(self): + layout = Layout(1, 0) + self.helper_test_coalesce(layout) + + layout = Layout(1, 1) + self.helper_test_coalesce(layout) + + layout = Layout((2, 4)) + self.helper_test_coalesce(layout) + + layout = Layout((2, 4, 6)) + self.helper_test_coalesce(layout) + + layout = Layout((2, 4, 6), (1, 6, 2)) + self.helper_test_coalesce(layout) + + layout = Layout((2, 1, 6), (1, 7, 2)) + self.helper_test_coalesce(layout) + + layout = Layout((2, 1, 6), (4, 7, 8)) + self.helper_test_coalesce(layout) + + layout = Layout((2, (4, 6))) + self.helper_test_coalesce(layout) + + layout = Layout((2, 4), (4, 1)) + self.helper_test_coalesce(layout) + + layout = Layout((2, 4, 6), (24, 6, 1)) + self.helper_test_coalesce(layout) + + layout = Layout((2, 1, 3), (2, 4, 4)) + self.helper_test_coalesce(layout) + + layout = Layout(((2, 2), (2, 2)), ((1, 4), (8, 32))) + self.helper_test_coalesce(layout) + + +if __name__ == "__main__": + run_tests() diff --git a/test/distributed/_pycute/test_complement.py b/test/distributed/_pycute/test_complement.py new file mode 100644 index 0000000000000..fd6413bcd112e --- /dev/null +++ b/test/distributed/_pycute/test_complement.py @@ -0,0 +1,96 @@ +# ruff: noqa: PGH004, G004, F403 +# flake8: noqa +# Owner(s): ["oncall: distributed"] +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +""" +Unit tests for _pycute.complement +""" + +import logging + +from torch.distributed._pycute import * +from torch.testing._internal.common_utils import run_tests, TestCase + + +_LOGGER = logging.getLogger(__name__) + + +class TestComplement(TestCase): + def helper_test_complement(self, layout): + layoutR = complement(layout) + + _LOGGER.debug(f"{layout} => {layoutR}") + + # Post-condition: test disjointness of the codomains + for a in range(size(layout)): + for b in range(size(layoutR)): + assert (layout(a) != layoutR(b)) or (layout(a) == 0 and layoutR(b) == 0) + + def test_complement(self): + test = Layout(1, 0) + self.helper_test_complement(test) + + test = Layout(1, 1) + self.helper_test_complement(test) + + test = Layout(4, 0) + self.helper_test_complement(test) + + test = Layout((2, 4), (1, 2)) + self.helper_test_complement(test) + + test = Layout((2, 3), (1, 2)) + self.helper_test_complement(test) + + test = Layout((2, 4), (1, 4)) + self.helper_test_complement(test) + + test = Layout((2, 4, 8), (8, 1, 64)) + self.helper_test_complement(test) + + test = Layout(((2, 2), (2, 2)), ((1, 4), (8, 32))) + self.helper_test_complement(test) + + test = Layout((2, (3, 4)), (3, (1, 6))) + self.helper_test_complement(test) + + test = Layout((4, 6), (1, 6)) + self.helper_test_complement(test) + + test = Layout((4, 10), (1, 10)) + self.helper_test_complement(test) + + +if __name__ == "__main__": + run_tests() diff --git a/test/distributed/_pycute/test_composition.py b/test/distributed/_pycute/test_composition.py new file mode 100644 index 0000000000000..74b1a53c59940 --- /dev/null +++ b/test/distributed/_pycute/test_composition.py @@ -0,0 +1,218 @@ +# ruff: noqa: PGH004, G004, F403 +# flake8: noqa +# Owner(s): ["oncall: distributed"] +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +""" +Unit tests for _pycute.composition +""" + +import logging + +from torch.distributed._pycute import * +from torch.testing._internal.common_utils import run_tests, TestCase + + +_LOGGER = logging.getLogger(__name__) + + +class TestComposition(TestCase): + def helper_test_composition(self, layoutA, layoutB): + layoutR = composition(layoutA, layoutB) + + _LOGGER.debug(f"{layoutA} o {layoutB} => {layoutR}") + + # True post-condition: Every coordinate c of layoutB with L1D(c) < size(layoutR) is a coordinate of layoutR. + + # Test that R(c) = A(B(c)) for all coordinates c in layoutR + for i in range(size(layoutR)): + self.assertEqual(layoutR(i), layoutA(layoutB(i))) + + def test_composition(self): + layoutA = Layout(1, 0) + layoutB = Layout(1, 0) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(1, 0) + layoutB = Layout(1, 1) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(1, 1) + layoutB = Layout(1, 0) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(1, 1) + layoutB = Layout(1, 1) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(4) + layoutB = Layout(4) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4), (2)) + layoutB = Layout(4) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(4) + layoutB = Layout((4), (2)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4), (0)) + layoutB = Layout(4) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(4) + layoutB = Layout((4), (0)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((1), (0)) + layoutB = Layout(4) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(4) + layoutB = Layout((1), (0)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(4) + layoutB = Layout(2) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4), (2)) + layoutB = Layout(2) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(4) + layoutB = Layout((2), (2)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4), (2)) + layoutB = Layout((2), (2)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(12) + layoutB = Layout((4, 3)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((12), (2)) + layoutB = Layout((4, 3)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(12) + layoutB = Layout((4, 3), (3, 1)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((12), (2)) + layoutB = Layout((4, 3), (3, 1)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(12) + layoutB = Layout((2, 3), (2, 4)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 3)) + layoutB = Layout((4, 3)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 3)) + layoutB = Layout(12) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 3)) + layoutB = Layout((6), (2)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 3)) + layoutB = Layout((6, 2), (2, 1)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 3), (3, 1)) + layoutB = Layout((4, 3)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 3), (3, 1)) + layoutB = Layout(12) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 3), (3, 1)) + layoutB = Layout((6), (2)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 3), (3, 1)) + layoutB = Layout((6, 2), (2, 1)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((8, 8)) + layoutB = Layout(((2, 2, 2), (2, 2, 2)), ((1, 16, 4), (8, 2, 32))) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((8, 8), (8, 1)) + layoutB = Layout(((2, 2, 2), (2, 2, 2)), ((1, 16, 4), (8, 2, 32))) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(((2, 2, 2), (2, 2, 2)), ((1, 16, 4), (8, 2, 32))) + layoutB = Layout(8, 4) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout(((4, 2)), ((1, 16))) + layoutB = Layout((4, 2), (2, 1)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((2, 2), (2, 1)) + layoutB = Layout((2, 2), (2, 1)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 8, 2)) + layoutB = Layout((2, 2, 2), (2, 8, 1)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 8, 2), (2, 8, 1)) + layoutB = Layout((2, 2, 2), (1, 8, 2)) + self.helper_test_composition(layoutA, layoutB) + + layoutA = Layout((4, 8, 2), (2, 8, 1)) + layoutB = Layout((4, 2, 2), (2, 8, 1)) + self.helper_test_composition(layoutA, layoutB) + + # Pre-coalesced LHS + layoutA = Layout((4, 6, 8), (1, 4, 7)) + layoutB = Layout((6), (1)) + self.helper_test_composition(layoutA, layoutB) + + # Mid-layout truncation + layoutA = Layout((4, 6, 8, 10), (2, 3, 5, 7)) + layoutB = Layout(6, 12) + self.helper_test_composition(layoutA, layoutB) + + +if __name__ == "__main__": + run_tests() diff --git a/test/distributed/_pycute/test_int_tuple.py b/test/distributed/_pycute/test_int_tuple.py new file mode 100644 index 0000000000000..936ce52e03c73 --- /dev/null +++ b/test/distributed/_pycute/test_int_tuple.py @@ -0,0 +1,86 @@ +# ruff: noqa: PGH004, G004, F403 +# flake8: noqa +# Owner(s): ["oncall: distributed"] +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +""" +Unit tests for _pycute.int_tuple +""" + +from torch.distributed._pycute import * +from torch.testing._internal.common_utils import run_tests, TestCase + + +class TestIntTuple(TestCase): + def test_product(self): + self.assertEqual(product(2), 2) + + self.assertEqual(product((3, 2)), 6) + + self.assertEqual(product(product(((2, 3), 4))), 24) + + def test_inner_product(self): + self.assertEqual(inner_product(2, 3), 6) + + self.assertEqual(inner_product((1, 2), (3, 2)), 7) + + self.assertEqual(inner_product(((2, 3), 4), ((2, 1), 2)), 15) + + def test_shape_div(self): + self.assertEqual(shape_div((3, 4), 6), (1, 2)) + + self.assertEqual(shape_div((3, 4), 12), (1, 1)) + + self.assertEqual(shape_div((3, 4), 36), (1, 1)) + + self.assertEqual(shape_div(((3, 4), 6), 36), ((1, 1), 2)) + + self.assertEqual(shape_div((6, (3, 4)), 36), (1, (1, 2))) + + def test_prefix_product(self): + self.assertEqual(prefix_product(2), 1) + + self.assertEqual(prefix_product((3, 2)), (1, 3)) + + self.assertEqual(prefix_product((3, 2, 4)), (1, 3, 6)) + + self.assertEqual(prefix_product(((2, 3), 4)), ((1, 2), 6)) + + self.assertEqual( + prefix_product(((2, 3), (2, 1, 2), (5, 2, 1))), + ((1, 2), (6, 12, 12), (24, 120, 240)), + ) + + +if __name__ == "__main__": + run_tests() diff --git a/test/distributed/_pycute/test_left_inverse.py b/test/distributed/_pycute/test_left_inverse.py new file mode 100644 index 0000000000000..a02e3b29938b5 --- /dev/null +++ b/test/distributed/_pycute/test_left_inverse.py @@ -0,0 +1,91 @@ +# ruff: noqa: PGH004, G004, F403 +# flake8: noqa +# Owner(s): ["oncall: distributed"] +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +""" +Unit tests for _pycute.left_inverse +""" + +import logging + +from torch.distributed._pycute import * +from torch.testing._internal.common_utils import run_tests, TestCase + + +_LOGGER = logging.getLogger(__name__) + + +class TestLeftInverse(TestCase): + def helper_test_left_inverse(self, layout): + inv_layout = left_inverse(layout) + + _LOGGER.debug(f"{layout} => {inv_layout}") + + for i in range(size(layout)): + self.assertEqual(inv_layout(layout(i)), i) + + def test_left_inverse(self): + test = Layout(1, 0) + self.helper_test_left_inverse(test) + + test = Layout((1, 1), (0, 0)) + self.helper_test_left_inverse(test) + + test = Layout(1, 1) + self.helper_test_left_inverse(test) + + test = Layout(4, 1) + self.helper_test_left_inverse(test) + + test = Layout(4, 2) + self.helper_test_left_inverse(test) + + test = Layout((8, 4), (1, 8)) + self.helper_test_left_inverse(test) + + test = Layout((8, 4), (4, 1)) + self.helper_test_left_inverse(test) + + test = Layout((2, 4, 6), (1, 2, 8)) + self.helper_test_left_inverse(test) + + test = Layout((2, 4, 6), (4, 1, 8)) + self.helper_test_left_inverse(test) + + test = Layout((4, 2), (1, 16)) + self.helper_test_left_inverse(test) + + +if __name__ == "__main__": + run_tests() diff --git a/test/distributed/_pycute/test_right_inverse.py b/test/distributed/_pycute/test_right_inverse.py new file mode 100644 index 0000000000000..043e86e021a3f --- /dev/null +++ b/test/distributed/_pycute/test_right_inverse.py @@ -0,0 +1,100 @@ +# ruff: noqa: PGH004, G004, F403 +# flake8: noqa +# Owner(s): ["oncall: distributed"] +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +""" +Unit tests for _pycute.left_inverse +""" + +import logging + +from torch.distributed._pycute import * +from torch.testing._internal.common_utils import run_tests, TestCase + + +_LOGGER = logging.getLogger(__name__) + + +class TestRightInverse(TestCase): + def helper_test_right_inverse(self, layout): + inv_layout = right_inverse(layout) + + _LOGGER.debug(f"{layout} => {inv_layout}") + + for i in range(size(inv_layout)): + self.assertEqual(layout(inv_layout(i)), i) + + def test_right_inverse(self): + test = Layout(1, 0) + self.helper_test_right_inverse(test) + + test = Layout((1, 1), (0, 0)) + self.helper_test_right_inverse(test) + + test = Layout((3, 7), (0, 0)) + self.helper_test_right_inverse(test) + + test = Layout(1, 1) + self.helper_test_right_inverse(test) + + test = Layout(4, 0) + self.helper_test_right_inverse(test) + + test = Layout(4, 1) + self.helper_test_right_inverse(test) + + test = Layout(4, 2) + self.helper_test_right_inverse(test) + + test = Layout((2, 4), (0, 2)) + self.helper_test_right_inverse(test) + + test = Layout((8, 4), (1, 8)) + self.helper_test_right_inverse(test) + + test = Layout((8, 4), (4, 1)) + self.helper_test_right_inverse(test) + + test = Layout((2, 4, 6), (1, 2, 8)) + self.helper_test_right_inverse(test) + + test = Layout((2, 4, 6), (4, 1, 8)) + self.helper_test_right_inverse(test) + + test = Layout((4, 2), (1, 16)) + self.helper_test_right_inverse(test) + + +if __name__ == "__main__": + run_tests() diff --git a/test/distributed/_pycute/test_typing.py b/test/distributed/_pycute/test_typing.py new file mode 100644 index 0000000000000..61f50c08a1add --- /dev/null +++ b/test/distributed/_pycute/test_typing.py @@ -0,0 +1,65 @@ +# flake8: noqa +# ruff: noqa: PGH004 +# Owner(s): ["oncall: distributed"] +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +""" +Unit tests for _pycute.typing +""" + +import logging + +from torch.distributed._pycute import * +from torch.testing._internal.common_utils import run_tests, TestCase + + +_LOGGER = logging.getLogger(__name__) + + +class TestTyping(TestCase): + def helper_test_typing(self, _cls, _obj, cls, expected: bool): + _LOGGER.debug(f"issubclass({_cls}, {cls})") + _LOGGER.debug(f"isinstance({_obj}, {cls})") + + self.assertEqual(expected, issubclass(_cls, cls)) + self.assertEqual(expected, isinstance(_obj, cls)) + + def test_typing(self): + self.helper_test_typing(int, 1, Integer, True) + self.helper_test_typing(float, 1.0, Integer, False) + self.helper_test_typing(str, "hi", Integer, False) + self.helper_test_typing(bool, False, Integer, False) + + +if __name__ == "__main__": + run_tests() diff --git a/torch/distributed/_pycute/__init__.py b/torch/distributed/_pycute/__init__.py new file mode 100644 index 0000000000000..ea1255591c55a --- /dev/null +++ b/torch/distributed/_pycute/__init__.py @@ -0,0 +1,37 @@ +# flake8: noqa +# ruff: noqa: PGH004, B011 +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +from .int_tuple import * +from .layout import * +from .typing import * diff --git a/torch/distributed/_pycute/int_tuple.py b/torch/distributed/_pycute/int_tuple.py new file mode 100644 index 0000000000000..b86a5ff603fd1 --- /dev/null +++ b/torch/distributed/_pycute/int_tuple.py @@ -0,0 +1,233 @@ +# mypy: ignore-errors +# flake8: noqa +# ruff: noqa: PGH004, B011 +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +""" +Functions for manipulating IntTuples +""" + +from functools import reduce +from itertools import chain +from typing import Union + +from .typing import Integer + + +def is_int(x): + return isinstance(x, Integer) + + +def is_tuple(x): + return isinstance(x, tuple) + + +def flatten(t): + if is_tuple(t): + if len(t) == 0: + return () + else: + return tuple(i for a in t for i in flatten(a)) + else: + return (t,) + + +def signum(a): + return bool(a > 0) - bool(a < 0) + + +def product(a): + if is_tuple(a): + return reduce(lambda val, elem: val * product(elem), a, 1) + else: + return a + + +def inner_product(a, b): + if is_tuple(a): # tuple tuple + assert len(a) == len(b) + return sum(inner_product(x, y) for x, y in zip(a, b)) + else: # "int" "int" + assert not is_tuple(b) + return a * b + + +def tuple_max(a): + if is_tuple(a): + return max(tuple_max(x) for x in a) + else: + return a + + +def elem_scale(a, b): + if is_tuple(a): + if is_tuple(b): # tuple tuple + assert len(a) == len(b) + return tuple(elem_scale(x, y) for x, y in zip(a, b)) + else: # tuple "int" + assert False # Error + else: + if is_tuple(b): # "int" tuple + return elem_scale(a, product(b)) + else: # "int" "int" + return a * b + + +# Inclusive prefix ceil div with output congruent to input a +def shape_div(a, b): + if is_tuple(a): + if is_tuple(b): # tuple tuple + assert len(a) == len(b) + return tuple(shape_div(x, y) for x, y in zip(a, b)) + else: # tuple "int" + # r = [shape_div(a[0],b)] + [shape_div(a[i],b := shape_div(b, product(a[i-1]))) for i in range(1,len(a))] + r = [] + for v in a: + r.append(shape_div(v, b)) + b = shape_div(b, product(v)) + return tuple(r) + else: + if is_tuple(b): # "int" tuple + return shape_div(a, product(b)) + else: # "int" "int" + assert a % b == 0 or b % a == 0 + return (a + b - 1) // b + + +# Exclusive prefix product with output congruent to input a +def prefix_product(a, init=1): + if is_tuple(a): + if is_tuple(init): # tuple tuple + assert len(a) == len(init) + return tuple(prefix_product(x, i) for x, i in zip(a, init)) + else: # tuple "int" + # r = [prefix_product(a[0],init)] + [prefix_product(a[i],init := init * product(a[i-1])) for i in range(1,len(a))] + r = [] + for v in a: + r.append(prefix_product(v, init)) + init = init * product(v) + return tuple(r) + else: + if is_tuple(init): # "int" tuple + assert False # Error + else: # "int" "int" + return init + + +def idx2crd(idx, shape, stride=None): + if stride is None: + stride = prefix_product(shape) + + if is_tuple(idx): + if is_tuple(shape): # tuple tuple tuple + assert len(idx) == len(shape) and len(idx) == len(stride) + return tuple(idx2crd(i, s, d) for i, s, d in zip(idx, shape, stride)) + else: # tuple "int" "int" + assert False # Error + else: + if is_tuple(shape): # "int" tuple tuple + assert len(shape) == len(stride) + return tuple(idx2crd(idx, s, d) for s, d in zip(shape, stride)) + else: # "int" "int" "int" + return (idx // stride) % shape + + +def crd2idx(crd, shape, stride=None): + if stride is None: + stride = prefix_product(shape) + + if is_tuple(crd): + if is_tuple(shape): # tuple tuple tuple + assert len(crd) == len(shape) and len(crd) == len(stride) + return sum(crd2idx(c, s, d) for c, s, d in zip(crd, shape, stride)) + else: # tuple "int" "int" + assert False, f"crd={crd}, shape={shape}" # Error + else: + if crd is None: + crd = 0 + + if is_tuple(shape): # "int" tuple tuple + assert len(shape) == len(stride) + result = 0 + for i in range(len(shape) - 1): + result += crd2idx(crd % product(shape[i]), shape[i], stride[i]) + crd = crd // product(shape[i]) + return result + crd2idx(crd, shape[-1], stride[-1]) + else: # "int" "int" "int" + return crd * stride + + +# Transform crd into the dst_shape's iteration space +def crd2crd(crd, dst_shape, src_shape=None): + if is_tuple(crd): + if is_tuple(dst_shape): # tuple tuple + assert len(crd) == len(dst_shape) + return tuple(crd2crd(x, y) for x, y in zip(crd, dst_shape)) + else: # tuple "int" + # Ambiguous unless we have src_shape + assert src_shape is not None + return crd2idx(crd, src_shape) + else: + if is_tuple(dst_shape): # "int" tuple + return idx2crd(crd, dst_shape) + else: # "int" "int" + assert crd < dst_shape + return crd + + +# Filter trg according to crd: keep only elements of trg that are paired with None +def slice_(crd: Union[None, tuple, int], trg: Union[tuple, int]): + if is_tuple(crd): + if is_tuple(trg): # tuple tuple + assert len(crd) == len(trg) + # match C++ behavior of `filter_tuple` using `tuple_cat(...)` + return tuple( + chain( + *filter(lambda x: x != (), [slice_(c, s) for c, s in zip(crd, trg)]) + ) + ) + else: + assert False # tuple "int" : Error + elif crd is None: + # match C++ behavior `return cute::tuple{b};` + return (trg,) + else: + return () + + +# Determine if None appears at any of an int_tuples' terminals +def has_none(a: Union[None, tuple, int]): + if is_tuple(a): + return any(has_none(v) for v in a) + else: + return a is None diff --git a/torch/distributed/_pycute/layout.py b/torch/distributed/_pycute/layout.py new file mode 100644 index 0000000000000..67846b56f6e91 --- /dev/null +++ b/torch/distributed/_pycute/layout.py @@ -0,0 +1,413 @@ +# mypy: ignore-errors +# flake8: noqa +# ruff: noqa: PGH004, B011 +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +""" +Definition of CuTe Layouts and functions to manipulate them +""" + +from itertools import chain + +from .int_tuple import * + + +class LayoutBase: + pass + + +def is_layout(x): + return isinstance(x, LayoutBase) + + +class Layout(LayoutBase): + def __init__(self, _shape, _stride=None): + self.shape = _shape + if _stride is None: + self.stride = prefix_product(self.shape) + else: + self.stride = _stride + + # operator == + def __eq__(self, other): + return self.shape == other.shape and self.stride == other.stride + + # operator len(L) (len [rank] like tuples) + def __len__(self): + if is_tuple(self.shape): + return len(self.shape) + else: + return 1 + + # operator () (map coord to idx) + def __call__(self, *args): + """ + Map a logical coordinate to a linear index (Coord has no Underscore slice operators) + OR + Slice the layout and return the sublayout (Coord has an Underscore slice op) + + Follow the same behavior of `Layout::operator(Coord const&)` in cute C++ + """ + if has_none(args): + if len(args) == 1: + return Layout(slice_(args[0], self.shape), slice_(args[0], self.stride)) + else: + return Layout(slice_(args, self.shape), slice_(args, self.stride)) + else: + if len(args) == 1: + return crd2idx(args[0], self.shape, self.stride) + else: + return crd2idx(args, self.shape, self.stride) + + # operator [] (get-i like tuples) + def __getitem__(self, i): + if is_tuple(self.shape): + return Layout(self.shape[i], self.stride[i]) + else: + assert i == 0 + return Layout(self.shape, self.stride) + + # size(layout) Size of the domain + def size(self): + return product(self.shape) + + # cosize(layout) Size of the codomain + def cosize(self): + return self(self.size() - 1) + 1 + + # print and str + def __str__(self): + return f"{self.shape}:{self.stride}" + + # error msgs and representation + def __repr__(self): + return f"Layout({self.shape},{self.stride})" + + +# Make Layout from a list of layouts (each layout it's own mode in the result) +def make_layout(*layouts): + if len(layouts) == 1 and not is_layout(layouts[0]): + layouts = layouts[0] + + shape, stride = zip(*((a.shape, a.stride) for a in layouts)) + return Layout(shape, stride) + + +# Size of the domain +def size(layout): + if is_layout(layout): + return layout.size() + return product(layout) + + +# Size of the codomain +def cosize(layout): + return layout.cosize() + + +# Layout coalesce -- flatten and combine as many modes as possible while preserving the int-to-int function +def coalesce(layout, profile=None): + if is_tuple(profile): + assert len(layout) >= len(profile) + return make_layout( + chain( + (coalesce(layout[i], profile[i]) for i in range(0, len(profile))), + (layout[i] for i in range(len(profile), len(layout))), + ) + ) + + result_shape = [1] + result_stride = [0] + for shape, stride in zip(flatten(layout.shape), flatten(layout.stride)): + # skip their shape-1s + if shape == 1: + continue + # replace our shape-1 with anything + elif result_shape[-1] == 1: + result_shape[-1] = shape + result_stride[-1] = stride + # merge modes if the shape*stride match + elif result_shape[-1] * result_stride[-1] == stride: + result_shape[-1] = result_shape[-1] * shape + # append a new mode + else: + result_shape.append(shape) + result_stride.append(stride) + + if len(result_shape) == 1: + return Layout(result_shape[0], result_stride[0]) + else: + return Layout(tuple(result_shape), tuple(result_stride)) + + +# Layout filter -- replace all stride-0 modes with size-1 and then coalesce to remove them +def filter(layout, profile=None): + if is_tuple(profile): + assert len(layout) >= len(profile) + return make_layout( + chain( + (filter(layout[i], profile[i]) for i in range(0, len(profile))), + (layout[i] for i in range(len(profile), len(layout))), + ) + ) + + result_shape = [] + result_stride = [] + for shape, stride in zip(flatten(layout.shape), flatten(layout.stride)): + # skip their shape-1s and stride-0s + if not (shape == 1 or stride == 0): + result_shape.append(shape) + result_stride.append(stride) + + if len(result_shape) == 0: + return Layout(1, 0) + else: + return coalesce(Layout(tuple(result_shape), tuple(result_stride))) + + +# Layout composition +# Use tuples-of-layouts to perform this operation by-mode and None as no-op +def composition(layoutA, layoutB): + if layoutB is None: + return layoutA + elif is_int(layoutB): + return composition(layoutA, Layout(layoutB)) + elif is_tuple(layoutB): + assert len(layoutA) >= len(layoutB) + return make_layout( + chain( + (composition(layoutA[i], layoutB[i]) for i in range(0, len(layoutB))), + (layoutA[i] for i in range(len(layoutB), len(layoutA))), + ) + ) + elif is_tuple(layoutB.shape): + return make_layout(composition(layoutA, layoutB_i) for layoutB_i in layoutB) + + if layoutB.stride == 0: + return Layout(layoutB.shape, 0) + else: + result_shape = [] + result_stride = [] + rest_shape = layoutB.shape + rest_stride = layoutB.stride + flat_A = coalesce(layoutA) + for curr_shape, curr_stride in zip( + flatten(flat_A.shape)[:-1], flatten(flat_A.stride)[:-1] + ): + assert curr_shape % rest_stride == 0 or rest_stride % curr_shape == 0 + new_shape = min(max(1, curr_shape // rest_stride), rest_shape) + + if new_shape != 1: + result_shape.append(new_shape) + result_stride.append(rest_stride * curr_stride) + + rest_shape = rest_shape // new_shape + rest_stride = -( + -rest_stride // curr_shape + ) # Python exclusive impl: "//" is always floor div so == ceil_div(abs(rest_stride), curr_shape) * signum(rest_stride) + + if rest_shape != 1 or len(result_shape) == 0: + result_shape.append(rest_shape) + result_stride.append(rest_stride * flatten(flat_A.stride)[-1]) + + if len(result_shape) == 1: + return Layout(result_shape[0], result_stride[0]) + else: + return Layout(tuple(result_shape), tuple(result_stride)) + + +# Layout complement +def complement(layout, max_idx=1): + if is_int(layout): + return complement(Layout(layout)) + + result_shape = [] + result_stride = [] + current_idx = 1 + + sorted_DS = sorted(zip(flatten(layout.stride), flatten(layout.shape))) + for stride, shape in sorted_DS: + if stride == 0 or shape == 1: + continue + + in_bound = current_idx <= shape * stride + # To support symbolic value which can't be evaluated now + assert (type(in_bound) is not bool) or in_bound + + result_shape.append(stride // current_idx) + result_stride.append(current_idx) + current_idx = shape * stride + + result_shape.append((max_idx + current_idx - 1) // current_idx) # ceil_div + result_stride.append(current_idx) + + return coalesce(Layout(tuple(result_shape), tuple(result_stride))) + + +# Layout right inverse +def right_inverse(layout): + if layout is None: + return None + elif is_int(layout): + return Layout(layout) + + result_shape = [] + result_stride = [] + current_idx = 1 + + flat_shape = flatten(layout.shape) + flat_stride = flatten(layout.stride) + sorted_DSA = sorted(zip(flat_stride, flat_shape, prefix_product(flat_shape))) + for stride, shape, rstride in sorted_DSA: + if shape == 1: + continue + if current_idx != stride: + break + + result_shape.append(shape) + result_stride.append(rstride) + current_idx = shape * stride + + return coalesce(Layout(tuple(result_shape), tuple(result_stride))) + + +# Layout left inverse +def left_inverse(layout): + if layout is None: + return None + elif is_int(layout): + return Layout(layout) + return right_inverse(make_layout(layout, complement(layout))) + + +# Split a layout by the composition of B and the "rest" +# Use tuples-of-layouts to perform this operation by-mode and None as no-op +def logical_divide(layoutA, layoutB): + if layoutB is None: + return layoutA + elif is_int(layoutB): + return logical_divide(layoutA, Layout(layoutB)) + elif is_tuple(layoutB): + assert len(layoutA) >= len(layoutB) + return make_layout( + chain( + ( + logical_divide(layoutA[i], layoutB[i]) + for i in range(0, len(layoutB)) + ), + (layoutA[i] for i in range(len(layoutB), len(layoutA))), + ) + ) + + return composition( + layoutA, make_layout(layoutB, complement(layoutB, size(layoutA))) + ) + + +# Reproduce a layoutA over a layoutB +# Use tuples-of-layouts to perform this operation by-mode and None as no-op +def logical_product(layoutA, layoutB): + if layoutB is None: + return layoutA + elif is_int(layoutB): + return logical_divide(layoutA, Layout(layoutB)) + elif is_tuple(layoutB): + assert len(layoutA) >= len(layoutB) + return make_layout( + chain( + ( + logical_product(layoutA[i], layoutB[i]) + for i in range(0, len(layoutB)) + ), + (layoutA[i] for i in range(len(layoutB), len(layoutA))), + ) + ) + + return make_layout( + layoutA, + composition(complement(layoutA, size(layoutA) * cosize(layoutB)), layoutB), + ) + + +# Gather the modes from a hierarchical logical_divide or logical_product +def hier_unzip(splitter, layoutA, layoutB): + if layoutB is None: + return make_layout(Layout(1, 0), layoutA) + elif is_tuple(layoutB): + assert len(layoutA) >= len(layoutB) + # A layout with shape ((A,a),(B,b),(C,c)) + split = make_layout( + hier_unzip(splitter, layoutA[i], layoutB[i]) for i in range(0, len(layoutB)) + ) + # Gather to shape ((A,B,C,...),(a,b,c,...,y,z)) + return make_layout( + make_layout(split[i][0] for i in range(0, len(layoutB))), + make_layout( + chain( + (split[i][1] for i in range(0, len(layoutB))), + (layoutA[i] for i in range(len(layoutB), len(layoutA))), + ) + ), + ) + + # splitter must return a rank-2 layout + return splitter(layoutA, layoutB) + + +# Apply logical divide hierarchically and gather the split modes into two modes +def zipped_divide(layoutA, layoutB): + return hier_unzip(logical_divide, layoutA, layoutB) + + +# Perform logical divide hierarchically and gather tiles (B-layouts) into a new mode +def tiled_divide(layoutA, layoutB): + result = zipped_divide(layoutA, layoutB) + return make_layout([result[0]] + [result[1][i] for i in range(len(result[1]))]) + + +# Apply logical product hierarchically and gather the split modes into two modes +def zipped_product(layoutA, layoutB): + return hier_unzip(logical_product, layoutA, layoutB) + + +# Perform logical product hierarchically and gather tiles (B-layouts) into a new mode +def tiled_product(layoutA, layoutB): + result = zipped_product(layoutA, layoutB) + return make_layout([result[0]] + [result[1][i] for i in range(len(result[1]))]) + + +def slice_and_offset(crd: tuple, layout: Layout): + return ( + Layout(slice_(crd, layout.shape), slice_(crd, layout.stride)), + crd2idx(crd, layout.shape, layout.stride), + ) diff --git a/torch/distributed/_pycute/typing.py b/torch/distributed/_pycute/typing.py new file mode 100644 index 0000000000000..c45ac197b8ae6 --- /dev/null +++ b/torch/distributed/_pycute/typing.py @@ -0,0 +1,45 @@ +# mypy: ignore-errors +# flake8: noqa +# ruff: noqa: PGH004, B011 +################################################################################################# +# +# Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################################# + +from abc import ABC + + +class Integer(ABC): + @classmethod + def __subclasshook__(cls, c): + if c in [bool, float]: + return False + + return issubclass(c, int) From 5dd84559a54509d9ab394a30ba708533a6eddf65 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Thu, 11 Sep 2025 18:36:35 +0000 Subject: [PATCH 154/693] [dynamo] Add DUAL_LEVEL_MATCH C++ guard (#162528) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162528 Approved by: https://github.com/anijain2305 --- torch/_C/_dynamo/guards.pyi | 5 ++++ torch/_dynamo/guards.py | 10 ++----- torch/csrc/dynamo/guards.cpp | 57 ++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/torch/_C/_dynamo/guards.pyi b/torch/_C/_dynamo/guards.pyi index 537a28123d0d3..547ca964d09f2 100644 --- a/torch/_C/_dynamo/guards.pyi +++ b/torch/_C/_dynamo/guards.pyi @@ -320,6 +320,11 @@ class GuardManager: item: Any, verbose_code_parts: list[str], ) -> None: ... + def add_dual_level_match_guard( + self, + level: int, + verbose_code_parts: list[str], + ) -> None: ... def add_tuple_iterator_length_guard( self, length: int, diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index d8c4271f614b3..f75ef9b12b207 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -2058,14 +2058,8 @@ def DUAL_LEVEL(self, guard: Guard) -> None: dual_level = self.check_fn_manager.output_graph.dual_level code = [f"torch.autograd.forward_ad._current_level == {dual_level}"] self._set_guard_export_info(guard, code) - # TODO(anijain2305) - Consider this moving this guard to C++ - forward_ad = torch.autograd.forward_ad - - def fn() -> bool: - return forward_ad._current_level == dual_level - - self.guard_manager.root.add_lambda_guard_no_args( - fn, get_verbose_code_parts(code, guard) + self.guard_manager.root.add_dual_level_match_guard( + dual_level, get_verbose_code_parts(code, guard) ) def FUNCTORCH_STACK_MATCH(self, guard: Guard) -> None: diff --git a/torch/csrc/dynamo/guards.cpp b/torch/csrc/dynamo/guards.cpp index 6682e91cd08b3..7434749ec1f98 100644 --- a/torch/csrc/dynamo/guards.cpp +++ b/torch/csrc/dynamo/guards.cpp @@ -2254,6 +2254,51 @@ class SET_CONTAINS : public LeafGuard { py::object _item; }; +// Check if the dual level is the same as the one in fx graph +class DUAL_LEVEL_MATCH : public LeafGuard { + public: + DUAL_LEVEL_MATCH( + RootGuardManager* root_guard_manager, + int64_t level, + py::object verbose_code_parts) + : LeafGuard(root_guard_manager, std::move(verbose_code_parts)), + _level(level) { + forward_ad_module = py::module_::import("torch.autograd.forward_ad"); + } + + bool check_nopybind(PyObject* value) override { // borrowed ref + // Ignore value arg, this is just to satisfy the interface. + return _check(); + } + + bool check_nopybind(FrameLocalsMapping* value) override { + // Ignore value arg, this is just to satisfy the interface. + return _check(); + } + + bool _check() { + PyObject* current_level = PyObject_GetAttrString( + forward_ad_module.ptr(), "_current_level"); // new ref + if (current_level == nullptr) { + // Attribute absent, clear the exception and return false. + PyErr_Clear(); + return false; + } + if (!PyLong_CheckExact(current_level)) { + Py_DECREF(current_level); + return false; + } else { + int64_t current_level_int = PyLong_AsLongLong(current_level); + Py_DECREF(current_level); + return current_level_int == _level; + } + } + + private: + int64_t _level; + py::object forward_ad_module; +}; + /** * Relational guards compare more than one value. We implement Relational * guards by capturing some state in the guard object. For example for tensor @@ -6826,6 +6871,10 @@ PyObject* torch_c_dynamo_guards_init() { py_m, "SET_CONTAINS") .def(py::init()) .def("__call__", &SET_CONTAINS::check); + py::class_>( + py_m, "DUAL_LEVEL_MATCH") + .def(py::init()) + .def("__call__", &DUAL_LEVEL_MATCH::check); py::class_>( py_m, "DYNAMIC_INDICES") .def(py::init()) @@ -7259,6 +7308,14 @@ PyObject* torch_c_dynamo_guards_init() { std::move(item), std::move(verbose_code_parts))); }) + .def( + "add_dual_level_match_guard", + [](GuardManager& self, + int64_t level, + py::object verbose_code_parts) -> void { + self.add_leaf_guard(std::make_shared( + self.get_root(), level, std::move(verbose_code_parts))); + }) .def( "add_dynamic_indices_guard", [](GuardManager& self, From 79d2418b5ab4ae12e18b20bfe1655299ea49ef5c Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Thu, 11 Sep 2025 18:36:36 +0000 Subject: [PATCH 155/693] [inductor] Add FLOAT_IS_NAN and COMPLEX_IS_NAN guards (#162537) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162537 Approved by: https://github.com/anijain2305, https://github.com/mlazos ghstack dependencies: #162528 --- torch/_C/_dynamo/guards.pyi | 8 ++++++ torch/_dynamo/guards.py | 14 +++------- torch/csrc/dynamo/guards.cpp | 53 ++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 10 deletions(-) diff --git a/torch/_C/_dynamo/guards.pyi b/torch/_C/_dynamo/guards.pyi index 547ca964d09f2..591d471194997 100644 --- a/torch/_C/_dynamo/guards.pyi +++ b/torch/_C/_dynamo/guards.pyi @@ -325,6 +325,14 @@ class GuardManager: level: int, verbose_code_parts: list[str], ) -> None: ... + def add_float_is_nan_guard( + self, + verbose_code_parts: list[str], + ) -> None: ... + def add_complex_is_nan_guard( + self, + verbose_code_parts: list[str], + ) -> None: ... def add_tuple_iterator_length_guard( self, length: int, diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index f75ef9b12b207..bf5ba4be4973c 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -2197,26 +2197,20 @@ def EQUALS_MATCH(self, guard: Guard, recompile_hint: Optional[str] = None) -> No # Special case for nan because float("nan") == float("nan") evaluates to False if istype(val, float) and math.isnan(val): - self.TYPE_MATCH(guard) - code = [] - code.append(f"__math_isnan({ref})") + code = [f"(type({ref}) is float and __math_isnan({ref}))"] self._set_guard_export_info(guard, code) - self.get_guard_manager(guard).add_lambda_guard_no_framelocals( - _get_closure_vars()["__math_isnan"], # type: ignore[arg-type] + self.get_guard_manager(guard).add_float_is_nan_guard( get_verbose_code_parts(code, guard), ) return # Python math library doesn't support complex nan, so we need to use numpy if istype(val, complex) and np.isnan(val): - self.TYPE_MATCH(guard) - code = [] - code.append(f"__numpy_isnan({ref})") + code = [f"(type({ref}) is complex and __numpy_isnan({ref}))"] self._set_guard_export_info(guard, code) - self.get_guard_manager(guard).add_lambda_guard_no_framelocals( - _get_closure_vars()["__numpy_isnan"], # type: ignore[arg-type] + self.get_guard_manager(guard).add_complex_is_nan_guard( get_verbose_code_parts(code, guard), ) return diff --git a/torch/csrc/dynamo/guards.cpp b/torch/csrc/dynamo/guards.cpp index 7434749ec1f98..b1c631f13f560 100644 --- a/torch/csrc/dynamo/guards.cpp +++ b/torch/csrc/dynamo/guards.cpp @@ -2254,6 +2254,39 @@ class SET_CONTAINS : public LeafGuard { py::object _item; }; +// Check if the float is nan +class FLOAT_IS_NAN : public LeafGuard { + public: + FLOAT_IS_NAN( + RootGuardManager* root_guard_manager, + py::object verbose_code_parts) + : LeafGuard(root_guard_manager, std::move(verbose_code_parts)) {} + + bool check_nopybind(PyObject* value) override { // borrowed ref + if (!PyFloat_CheckExact(value)) { + return false; + } + return std::isnan(PyFloat_AsDouble(value)); + } +}; + +// Check if the float is nan +class COMPLEX_IS_NAN : public LeafGuard { + public: + COMPLEX_IS_NAN( + RootGuardManager* root_guard_manager, + py::object verbose_code_parts) + : LeafGuard(root_guard_manager, std::move(verbose_code_parts)) {} + + bool check_nopybind(PyObject* value) override { // borrowed ref + if (!PyComplex_CheckExact(value)) { + return false; + } + Py_complex c_value = PyComplex_AsCComplex(value); + return std::isnan(c_value.real) || std::isnan(c_value.imag); + } +}; + // Check if the dual level is the same as the one in fx graph class DUAL_LEVEL_MATCH : public LeafGuard { public: @@ -6875,6 +6908,14 @@ PyObject* torch_c_dynamo_guards_init() { py_m, "DUAL_LEVEL_MATCH") .def(py::init()) .def("__call__", &DUAL_LEVEL_MATCH::check); + py::class_>( + py_m, "FLOAT_IS_NAN") + .def(py::init()) + .def("__call__", &FLOAT_IS_NAN::check); + py::class_>( + py_m, "COMPLEX_IS_NAN") + .def(py::init()) + .def("__call__", &COMPLEX_IS_NAN::check); py::class_>( py_m, "DYNAMIC_INDICES") .def(py::init()) @@ -7316,6 +7357,18 @@ PyObject* torch_c_dynamo_guards_init() { self.add_leaf_guard(std::make_shared( self.get_root(), level, std::move(verbose_code_parts))); }) + .def( + "add_float_is_nan_guard", + [](GuardManager& self, py::object verbose_code_parts) -> void { + self.add_leaf_guard(std::make_shared( + self.get_root(), std::move(verbose_code_parts))); + }) + .def( + "add_complex_is_nan_guard", + [](GuardManager& self, py::object verbose_code_parts) -> void { + self.add_leaf_guard(std::make_shared( + self.get_root(), std::move(verbose_code_parts))); + }) .def( "add_dynamic_indices_guard", [](GuardManager& self, From 561430edcdd12c06d1568201c3e433d57108e582 Mon Sep 17 00:00:00 2001 From: fduwjj Date: Thu, 11 Sep 2025 10:47:01 -0700 Subject: [PATCH 156/693] [CuTe] Add type for CuTe layout via claude (#162534) This PR mostly is a cosmetic change using Claude to add types for copied PyCute code. We removed all suppressions of linters and add type checker, type alias and mypy ignore(if needed) so that the pycute code will be checked by linter. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162534 Approved by: https://github.com/ezyang, https://github.com/Skylion007 ghstack dependencies: #162413 --- torch/distributed/_pycute/__init__.py | 45 +++++++- torch/distributed/_pycute/int_tuple.py | 83 ++++++++------ torch/distributed/_pycute/layout.py | 150 +++++++++++++++---------- torch/distributed/_pycute/typing.py | 7 +- 4 files changed, 178 insertions(+), 107 deletions(-) diff --git a/torch/distributed/_pycute/__init__.py b/torch/distributed/_pycute/__init__.py index ea1255591c55a..a3e611bbc712a 100644 --- a/torch/distributed/_pycute/__init__.py +++ b/torch/distributed/_pycute/__init__.py @@ -1,5 +1,3 @@ -# flake8: noqa -# ruff: noqa: PGH004, B011 ################################################################################################# # # Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. @@ -32,6 +30,43 @@ # ################################################################################################# -from .int_tuple import * -from .layout import * -from .typing import * +from .int_tuple import ( + crd2crd, + crd2idx, + elem_scale, + flatten, + has_none, + idx2crd, + inner_product, + IntTuple, + is_int, + is_tuple, + prefix_product, + product, + shape_div, + signum, + slice_, + tuple_max, +) +from .layout import ( + coalesce, + complement, + composition, + cosize, + filter, + is_layout, + Layout, + LayoutBase, + left_inverse, + logical_divide, + logical_product, + make_layout, + right_inverse, + size, + slice_and_offset, + tiled_divide, + tiled_product, + zipped_divide, + zipped_product, +) +from .typing import Integer diff --git a/torch/distributed/_pycute/int_tuple.py b/torch/distributed/_pycute/int_tuple.py index b86a5ff603fd1..1b13187e7dbf1 100644 --- a/torch/distributed/_pycute/int_tuple.py +++ b/torch/distributed/_pycute/int_tuple.py @@ -1,6 +1,3 @@ -# mypy: ignore-errors -# flake8: noqa -# ruff: noqa: PGH004, B011 ################################################################################################# # # Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. @@ -39,20 +36,25 @@ from functools import reduce from itertools import chain -from typing import Union +from typing import Optional, Union +from typing_extensions import TypeAlias, TypeIs from .typing import Integer -def is_int(x): +# Type aliases for better readability +IntTuple: TypeAlias = Union[int, tuple["IntTuple", ...]] + + +def is_int(x: object) -> TypeIs[int]: return isinstance(x, Integer) -def is_tuple(x): +def is_tuple(x: object) -> TypeIs[tuple]: return isinstance(x, tuple) -def flatten(t): +def flatten(t: IntTuple) -> tuple[int, ...]: if is_tuple(t): if len(t) == 0: return () @@ -62,40 +64,40 @@ def flatten(t): return (t,) -def signum(a): +def signum(a: int) -> int: return bool(a > 0) - bool(a < 0) -def product(a): +def product(a: IntTuple) -> int: if is_tuple(a): return reduce(lambda val, elem: val * product(elem), a, 1) else: return a -def inner_product(a, b): - if is_tuple(a): # tuple tuple +def inner_product(a: IntTuple, b: IntTuple) -> int: + if is_tuple(a) and is_tuple(b): # tuple tuple assert len(a) == len(b) return sum(inner_product(x, y) for x, y in zip(a, b)) else: # "int" "int" - assert not is_tuple(b) + assert not is_tuple(a) and not is_tuple(b) return a * b -def tuple_max(a): +def tuple_max(a: IntTuple) -> int: if is_tuple(a): return max(tuple_max(x) for x in a) else: return a -def elem_scale(a, b): +def elem_scale(a: IntTuple, b: IntTuple) -> IntTuple: if is_tuple(a): if is_tuple(b): # tuple tuple assert len(a) == len(b) return tuple(elem_scale(x, y) for x, y in zip(a, b)) else: # tuple "int" - assert False # Error + raise AssertionError("Invalid combination: tuple with int") else: if is_tuple(b): # "int" tuple return elem_scale(a, product(b)) @@ -104,7 +106,7 @@ def elem_scale(a, b): # Inclusive prefix ceil div with output congruent to input a -def shape_div(a, b): +def shape_div(a: IntTuple, b: IntTuple) -> IntTuple: if is_tuple(a): if is_tuple(b): # tuple tuple assert len(a) == len(b) @@ -125,7 +127,7 @@ def shape_div(a, b): # Exclusive prefix product with output congruent to input a -def prefix_product(a, init=1): +def prefix_product(a: IntTuple, init: IntTuple = 1) -> IntTuple: if is_tuple(a): if is_tuple(init): # tuple tuple assert len(a) == len(init) @@ -139,44 +141,49 @@ def prefix_product(a, init=1): return tuple(r) else: if is_tuple(init): # "int" tuple - assert False # Error + raise AssertionError("Invalid combination: int with tuple init") else: # "int" "int" return init -def idx2crd(idx, shape, stride=None): +def idx2crd( + idx: IntTuple, shape: IntTuple, stride: Optional[IntTuple] = None +) -> IntTuple: if stride is None: stride = prefix_product(shape) if is_tuple(idx): - if is_tuple(shape): # tuple tuple tuple - assert len(idx) == len(shape) and len(idx) == len(stride) + if is_tuple(shape) and is_tuple(stride): # tuple tuple tuple + assert len(idx) == len(shape) and len(stride) == len(shape) return tuple(idx2crd(i, s, d) for i, s, d in zip(idx, shape, stride)) else: # tuple "int" "int" - assert False # Error + raise AssertionError("Invalid combination: tuple with int stride") else: - if is_tuple(shape): # "int" tuple tuple + if is_tuple(shape) and is_tuple(stride): # "int" tuple tuple assert len(shape) == len(stride) return tuple(idx2crd(idx, s, d) for s, d in zip(shape, stride)) else: # "int" "int" "int" - return (idx // stride) % shape + assert not is_tuple(shape) and not is_tuple(stride) + return (idx // stride) % shape # all are ints after type checks -def crd2idx(crd, shape, stride=None): +def crd2idx( + crd: Optional[IntTuple], shape: IntTuple, stride: Optional[IntTuple] = None +) -> int: if stride is None: stride = prefix_product(shape) if is_tuple(crd): - if is_tuple(shape): # tuple tuple tuple - assert len(crd) == len(shape) and len(crd) == len(stride) + if is_tuple(shape) and is_tuple(stride): # tuple tuple tuple + assert len(crd) == len(shape) and len(stride) == len(shape) return sum(crd2idx(c, s, d) for c, s, d in zip(crd, shape, stride)) else: # tuple "int" "int" - assert False, f"crd={crd}, shape={shape}" # Error + raise AssertionError(f"Invalid combination: crd={crd}, shape={shape}") else: if crd is None: crd = 0 - if is_tuple(shape): # "int" tuple tuple + if is_tuple(shape) and is_tuple(stride): # "int" tuple tuple assert len(shape) == len(stride) result = 0 for i in range(len(shape) - 1): @@ -184,11 +191,14 @@ def crd2idx(crd, shape, stride=None): crd = crd // product(shape[i]) return result + crd2idx(crd, shape[-1], stride[-1]) else: # "int" "int" "int" - return crd * stride + assert not is_tuple(shape) and not is_tuple(stride) + return crd * stride # all are ints after type checks # Transform crd into the dst_shape's iteration space -def crd2crd(crd, dst_shape, src_shape=None): +def crd2crd( + crd: IntTuple, dst_shape: IntTuple, src_shape: Optional[IntTuple] = None +) -> IntTuple: if is_tuple(crd): if is_tuple(dst_shape): # tuple tuple assert len(crd) == len(dst_shape) @@ -206,18 +216,21 @@ def crd2crd(crd, dst_shape, src_shape=None): # Filter trg according to crd: keep only elements of trg that are paired with None -def slice_(crd: Union[None, tuple, int], trg: Union[tuple, int]): +def slice_(crd: Union[None, tuple, int], trg: Union[tuple, int]) -> Union[tuple, int]: if is_tuple(crd): if is_tuple(trg): # tuple tuple assert len(crd) == len(trg) # match C++ behavior of `filter_tuple` using `tuple_cat(...)` return tuple( chain( - *filter(lambda x: x != (), [slice_(c, s) for c, s in zip(crd, trg)]) + *filter( # type: ignore[arg-type] # filter returns Iterator which is compatible + lambda x: x != (), + [slice_(c, s) for c, s in zip(crd, trg)], + ) ) ) else: - assert False # tuple "int" : Error + raise AssertionError("Invalid combination: tuple crd with int trg") elif crd is None: # match C++ behavior `return cute::tuple{b};` return (trg,) @@ -226,7 +239,7 @@ def slice_(crd: Union[None, tuple, int], trg: Union[tuple, int]): # Determine if None appears at any of an int_tuples' terminals -def has_none(a: Union[None, tuple, int]): +def has_none(a: Union[None, tuple, int]) -> bool: if is_tuple(a): return any(has_none(v) for v in a) else: diff --git a/torch/distributed/_pycute/layout.py b/torch/distributed/_pycute/layout.py index 67846b56f6e91..8257bf5049ff8 100644 --- a/torch/distributed/_pycute/layout.py +++ b/torch/distributed/_pycute/layout.py @@ -1,6 +1,3 @@ -# mypy: ignore-errors -# flake8: noqa -# ruff: noqa: PGH004, B011 ################################################################################################# # # Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. @@ -38,20 +35,41 @@ """ from itertools import chain - -from .int_tuple import * +from typing import Optional, Union +from typing_extensions import TypeAlias, TypeIs + +from .int_tuple import ( + crd2idx, + flatten, + has_none, + IntTuple, + is_int, + is_tuple, + prefix_product, + product, + slice_, +) + + +# Type aliases +LayoutOrIntTuple: TypeAlias = Union["Layout", IntTuple] +LayoutProfile: TypeAlias = Optional[Union[tuple[object, ...], "Layout"]] +LayoutInput: TypeAlias = Optional[Union["Layout", IntTuple, tuple[object, ...]]] +CoordinateType: TypeAlias = Optional[ + Union[int, IntTuple, tuple[object, ...]] +] # Input for slice_ and crd2idx functions class LayoutBase: pass -def is_layout(x): +def is_layout(x: object) -> TypeIs["Layout"]: return isinstance(x, LayoutBase) class Layout(LayoutBase): - def __init__(self, _shape, _stride=None): + def __init__(self, _shape: IntTuple, _stride: Optional[IntTuple] = None) -> None: self.shape = _shape if _stride is None: self.stride = prefix_product(self.shape) @@ -59,18 +77,20 @@ def __init__(self, _shape, _stride=None): self.stride = _stride # operator == - def __eq__(self, other): + def __eq__(self, other: object) -> bool: + if not isinstance(other, Layout): + return False return self.shape == other.shape and self.stride == other.stride # operator len(L) (len [rank] like tuples) - def __len__(self): + def __len__(self) -> int: if is_tuple(self.shape): return len(self.shape) else: return 1 # operator () (map coord to idx) - def __call__(self, *args): + def __call__(self, *args: CoordinateType) -> Union["Layout", int]: """ Map a logical coordinate to a linear index (Coord has no Underscore slice operators) OR @@ -85,63 +105,63 @@ def __call__(self, *args): return Layout(slice_(args, self.shape), slice_(args, self.stride)) else: if len(args) == 1: - return crd2idx(args[0], self.shape, self.stride) + return crd2idx(args[0], self.shape, self.stride) # type: ignore[arg-type] else: - return crd2idx(args, self.shape, self.stride) + return crd2idx(args, self.shape, self.stride) # type: ignore[arg-type] # operator [] (get-i like tuples) - def __getitem__(self, i): + def __getitem__(self, i: int) -> "Layout": if is_tuple(self.shape): - return Layout(self.shape[i], self.stride[i]) + return Layout(self.shape[i], self.stride[i]) # type: ignore[index] else: assert i == 0 return Layout(self.shape, self.stride) # size(layout) Size of the domain - def size(self): + def size(self) -> int: return product(self.shape) # cosize(layout) Size of the codomain - def cosize(self): - return self(self.size() - 1) + 1 + def cosize(self) -> int: + return self(self.size() - 1) + 1 # type: ignore[operator] # print and str - def __str__(self): + def __str__(self) -> str: return f"{self.shape}:{self.stride}" # error msgs and representation - def __repr__(self): + def __repr__(self) -> str: return f"Layout({self.shape},{self.stride})" # Make Layout from a list of layouts (each layout it's own mode in the result) -def make_layout(*layouts): +def make_layout(*layouts: Union[Layout, tuple[Layout, ...]]) -> Layout: if len(layouts) == 1 and not is_layout(layouts[0]): layouts = layouts[0] - shape, stride = zip(*((a.shape, a.stride) for a in layouts)) + shape, stride = zip(*((a.shape, a.stride) for a in layouts)) # type: ignore[union-attr] return Layout(shape, stride) # Size of the domain -def size(layout): +def size(layout: LayoutOrIntTuple) -> int: if is_layout(layout): return layout.size() return product(layout) # Size of the codomain -def cosize(layout): +def cosize(layout: Layout) -> int: return layout.cosize() # Layout coalesce -- flatten and combine as many modes as possible while preserving the int-to-int function -def coalesce(layout, profile=None): +def coalesce(layout: Layout, profile: LayoutProfile = None) -> Layout: if is_tuple(profile): assert len(layout) >= len(profile) return make_layout( chain( - (coalesce(layout[i], profile[i]) for i in range(0, len(profile))), + (coalesce(layout[i], profile[i]) for i in range(0, len(profile))), # type: ignore[arg-type] (layout[i] for i in range(len(profile), len(layout))), ) ) @@ -171,12 +191,12 @@ def coalesce(layout, profile=None): # Layout filter -- replace all stride-0 modes with size-1 and then coalesce to remove them -def filter(layout, profile=None): +def filter(layout: Layout, profile: LayoutProfile = None) -> Layout: if is_tuple(profile): assert len(layout) >= len(profile) return make_layout( chain( - (filter(layout[i], profile[i]) for i in range(0, len(profile))), + (filter(layout[i], profile[i]) for i in range(0, len(profile))), # type: ignore[arg-type] (layout[i] for i in range(len(profile), len(layout))), ) ) @@ -197,7 +217,7 @@ def filter(layout, profile=None): # Layout composition # Use tuples-of-layouts to perform this operation by-mode and None as no-op -def composition(layoutA, layoutB): +def composition(layoutA: Layout, layoutB: LayoutInput) -> Layout: if layoutB is None: return layoutA elif is_int(layoutB): @@ -206,12 +226,12 @@ def composition(layoutA, layoutB): assert len(layoutA) >= len(layoutB) return make_layout( chain( - (composition(layoutA[i], layoutB[i]) for i in range(0, len(layoutB))), + (composition(layoutA[i], layoutB[i]) for i in range(0, len(layoutB))), # type: ignore[arg-type] (layoutA[i] for i in range(len(layoutB), len(layoutA))), ) ) elif is_tuple(layoutB.shape): - return make_layout(composition(layoutA, layoutB_i) for layoutB_i in layoutB) + return make_layout(composition(layoutA, layoutB_i) for layoutB_i in layoutB) # type: ignore[arg-type, attr-defined] if layoutB.stride == 0: return Layout(layoutB.shape, 0) @@ -224,16 +244,16 @@ def composition(layoutA, layoutB): for curr_shape, curr_stride in zip( flatten(flat_A.shape)[:-1], flatten(flat_A.stride)[:-1] ): - assert curr_shape % rest_stride == 0 or rest_stride % curr_shape == 0 - new_shape = min(max(1, curr_shape // rest_stride), rest_shape) + assert curr_shape % rest_stride == 0 or rest_stride % curr_shape == 0 # type: ignore[operator] + new_shape = min(max(1, curr_shape // rest_stride), rest_shape) # type: ignore[operator] if new_shape != 1: result_shape.append(new_shape) result_stride.append(rest_stride * curr_stride) - rest_shape = rest_shape // new_shape + rest_shape = rest_shape // new_shape # type: ignore[operator] rest_stride = -( - -rest_stride // curr_shape + -rest_stride // curr_shape # type: ignore[operator] ) # Python exclusive impl: "//" is always floor div so == ceil_div(abs(rest_stride), curr_shape) * signum(rest_stride) if rest_shape != 1 or len(result_shape) == 0: @@ -241,13 +261,13 @@ def composition(layoutA, layoutB): result_stride.append(rest_stride * flatten(flat_A.stride)[-1]) if len(result_shape) == 1: - return Layout(result_shape[0], result_stride[0]) + return Layout(result_shape[0], result_stride[0]) # type: ignore[arg-type] else: - return Layout(tuple(result_shape), tuple(result_stride)) + return Layout(tuple(result_shape), tuple(result_stride)) # type: ignore[arg-type] # Layout complement -def complement(layout, max_idx=1): +def complement(layout: LayoutOrIntTuple, max_idx: int = 1) -> Layout: if is_int(layout): return complement(Layout(layout)) @@ -255,7 +275,7 @@ def complement(layout, max_idx=1): result_stride = [] current_idx = 1 - sorted_DS = sorted(zip(flatten(layout.stride), flatten(layout.shape))) + sorted_DS = sorted(zip(flatten(layout.stride), flatten(layout.shape))) # type: ignore[union-attr] for stride, shape in sorted_DS: if stride == 0 or shape == 1: continue @@ -275,7 +295,7 @@ def complement(layout, max_idx=1): # Layout right inverse -def right_inverse(layout): +def right_inverse(layout: Optional[LayoutOrIntTuple]) -> Optional[Layout]: if layout is None: return None elif is_int(layout): @@ -285,9 +305,9 @@ def right_inverse(layout): result_stride = [] current_idx = 1 - flat_shape = flatten(layout.shape) - flat_stride = flatten(layout.stride) - sorted_DSA = sorted(zip(flat_stride, flat_shape, prefix_product(flat_shape))) + flat_shape = flatten(layout.shape) # type: ignore[union-attr] + flat_stride = flatten(layout.stride) # type: ignore[union-attr] + sorted_DSA = sorted(zip(flat_stride, flat_shape, prefix_product(flat_shape))) # type: ignore[arg-type] for stride, shape, rstride in sorted_DSA: if shape == 1: continue @@ -302,17 +322,17 @@ def right_inverse(layout): # Layout left inverse -def left_inverse(layout): +def left_inverse(layout: Optional[LayoutOrIntTuple]) -> Optional[Layout]: if layout is None: return None elif is_int(layout): return Layout(layout) - return right_inverse(make_layout(layout, complement(layout))) + return right_inverse(make_layout(layout, complement(layout))) # type: ignore[arg-type] # Split a layout by the composition of B and the "rest" # Use tuples-of-layouts to perform this operation by-mode and None as no-op -def logical_divide(layoutA, layoutB): +def logical_divide(layoutA: Layout, layoutB: LayoutInput) -> Layout: if layoutB is None: return layoutA elif is_int(layoutB): @@ -322,7 +342,7 @@ def logical_divide(layoutA, layoutB): return make_layout( chain( ( - logical_divide(layoutA[i], layoutB[i]) + logical_divide(layoutA[i], layoutB[i]) # type: ignore[arg-type] for i in range(0, len(layoutB)) ), (layoutA[i] for i in range(len(layoutB), len(layoutA))), @@ -330,13 +350,14 @@ def logical_divide(layoutA, layoutB): ) return composition( - layoutA, make_layout(layoutB, complement(layoutB, size(layoutA))) + layoutA, + make_layout(layoutB, complement(layoutB, size(layoutA))), ) # Reproduce a layoutA over a layoutB # Use tuples-of-layouts to perform this operation by-mode and None as no-op -def logical_product(layoutA, layoutB): +def logical_product(layoutA: Layout, layoutB: LayoutInput) -> Layout: if layoutB is None: return layoutA elif is_int(layoutB): @@ -346,7 +367,7 @@ def logical_product(layoutA, layoutB): return make_layout( chain( ( - logical_product(layoutA[i], layoutB[i]) + logical_product(layoutA[i], layoutB[i]) # type: ignore[arg-type] for i in range(0, len(layoutB)) ), (layoutA[i] for i in range(len(layoutB), len(layoutA))), @@ -360,20 +381,25 @@ def logical_product(layoutA, layoutB): # Gather the modes from a hierarchical logical_divide or logical_product -def hier_unzip(splitter, layoutA, layoutB): +def hier_unzip( + splitter: object, + layoutA: Layout, + layoutB: LayoutInput, +) -> Layout: if layoutB is None: return make_layout(Layout(1, 0), layoutA) elif is_tuple(layoutB): assert len(layoutA) >= len(layoutB) # A layout with shape ((A,a),(B,b),(C,c)) split = make_layout( - hier_unzip(splitter, layoutA[i], layoutB[i]) for i in range(0, len(layoutB)) + hier_unzip(splitter, layoutA[i], layoutB[i]) # type: ignore[arg-type] + for i in range(0, len(layoutB)) ) # Gather to shape ((A,B,C,...),(a,b,c,...,y,z)) return make_layout( - make_layout(split[i][0] for i in range(0, len(layoutB))), + make_layout(split[i][0] for i in range(0, len(layoutB))), # type: ignore[arg-type] make_layout( - chain( + chain( # type: ignore[arg-type] (split[i][1] for i in range(0, len(layoutB))), (layoutA[i] for i in range(len(layoutB), len(layoutA))), ) @@ -381,33 +407,33 @@ def hier_unzip(splitter, layoutA, layoutB): ) # splitter must return a rank-2 layout - return splitter(layoutA, layoutB) + return splitter(layoutA, layoutB) # type: ignore[operator] # Apply logical divide hierarchically and gather the split modes into two modes -def zipped_divide(layoutA, layoutB): +def zipped_divide(layoutA: Layout, layoutB: LayoutInput) -> Layout: return hier_unzip(logical_divide, layoutA, layoutB) # Perform logical divide hierarchically and gather tiles (B-layouts) into a new mode -def tiled_divide(layoutA, layoutB): +def tiled_divide(layoutA: Layout, layoutB: LayoutInput) -> Layout: result = zipped_divide(layoutA, layoutB) - return make_layout([result[0]] + [result[1][i] for i in range(len(result[1]))]) + return make_layout([result[0]] + [result[1][i] for i in range(len(result[1]))]) # type: ignore[arg-type] # Apply logical product hierarchically and gather the split modes into two modes -def zipped_product(layoutA, layoutB): +def zipped_product(layoutA: Layout, layoutB: LayoutInput) -> Layout: return hier_unzip(logical_product, layoutA, layoutB) # Perform logical product hierarchically and gather tiles (B-layouts) into a new mode -def tiled_product(layoutA, layoutB): +def tiled_product(layoutA: Layout, layoutB: LayoutInput) -> Layout: result = zipped_product(layoutA, layoutB) - return make_layout([result[0]] + [result[1][i] for i in range(len(result[1]))]) + return make_layout([result[0]] + [result[1][i] for i in range(len(result[1]))]) # type: ignore[arg-type] -def slice_and_offset(crd: tuple, layout: Layout): +def slice_and_offset(crd: tuple[object, ...], layout: Layout) -> tuple[Layout, int]: return ( Layout(slice_(crd, layout.shape), slice_(crd, layout.stride)), - crd2idx(crd, layout.shape, layout.stride), + crd2idx(crd, layout.shape, layout.stride), # type: ignore[arg-type] ) diff --git a/torch/distributed/_pycute/typing.py b/torch/distributed/_pycute/typing.py index c45ac197b8ae6..5e6fe0a9c66e8 100644 --- a/torch/distributed/_pycute/typing.py +++ b/torch/distributed/_pycute/typing.py @@ -1,6 +1,3 @@ -# mypy: ignore-errors -# flake8: noqa -# ruff: noqa: PGH004, B011 ################################################################################################# # # Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. @@ -36,9 +33,9 @@ from abc import ABC -class Integer(ABC): +class Integer(ABC): # noqa: B024 # Uses __subclasshook__ instead of abstract methods @classmethod - def __subclasshook__(cls, c): + def __subclasshook__(cls, c: type) -> bool: if c in [bool, float]: return False From 0babdfad63a3ab3965f501de8bed7b87999ce7fe Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 12 Sep 2025 05:52:04 +0000 Subject: [PATCH 157/693] [1/N] Port 6 fsdp distributed test cases to Intel GPU (#160158) For https://github.com/pytorch/pytorch/issues/114850, we will port distributed tests to Intel GPU. We could enable Intel GPU with following methods and try the best to keep the original code styles: - Instantiate_device_type_tests() - Use "torch.accelerator.current_accelerator()" to determine the accelerator backend - Enabled XPU for some test path - Added allow_xpu=True for supported test class Pull Request resolved: https://github.com/pytorch/pytorch/pull/160158 Approved by: https://github.com/guangyey, https://github.com/d4l3k --- test/distributed/fsdp/test_fsdp_overlap.py | 22 +++-- test/distributed/fsdp/test_fsdp_pure_fp16.py | 2 +- .../fsdp/test_fsdp_sharded_grad_scaler.py | 33 +++++--- test/distributed/fsdp/test_fsdp_state_dict.py | 83 ++++++++++--------- .../fsdp/test_fsdp_tp_integration.py | 28 ++++--- test/distributed/fsdp/test_fsdp_traversal.py | 8 +- 6 files changed, 103 insertions(+), 73 deletions(-) diff --git a/test/distributed/fsdp/test_fsdp_overlap.py b/test/distributed/fsdp/test_fsdp_overlap.py index d076563750e63..f9a43e748c464 100644 --- a/test/distributed/fsdp/test_fsdp_overlap.py +++ b/test/distributed/fsdp/test_fsdp_overlap.py @@ -8,8 +8,7 @@ import torch import torch.nn as nn -from torch import distributed as dist -from torch.cuda import Event +from torch import distributed as dist, Event from torch.distributed.fsdp import FullyShardedDataParallel as FSDP from torch.testing._internal.common_device_type import instantiate_device_type_tests from torch.testing._internal.common_distributed import skip_if_lt_x_gpu @@ -19,6 +18,8 @@ run_tests, TEST_HPU, TEST_WITH_DEV_DBG_ASAN, + TEST_XPU, + xfailIf, ) @@ -33,6 +34,8 @@ ) sys.exit(0) +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + class Layer(nn.Module): def __init__(self, compute_cycles, has_params: bool): @@ -50,7 +53,8 @@ def forward(self, x): # Record the fake forward compute time. self.e1.record() if self.sleep_cycles > 0: - torch.cuda._sleep(self.sleep_cycles) + if torch.cuda.is_available(): + torch.cuda._sleep(self.sleep_cycles) if self.optional_param is not None: x = x + self.optional_param # force the param to be part of the graph self.e2.record() @@ -72,7 +76,7 @@ def _create_model(compute_cycles, has_params: bool): FSDP(Layer(compute_cycles, has_params), limit_all_gathers=False), ), limit_all_gathers=False, - ).cuda() + ).to(device_type) return model @@ -110,7 +114,7 @@ def run(compute_cycles, all_gather_cycles): # Get the input and sets the input's requires_grad to True because # we have a fake compute in the forward pass. - batch = torch.rand(1).cuda() + batch = torch.rand(1).to(device_type) batch.requires_grad = True # Run one dummy iteration to trigger the execution order validation @@ -137,7 +141,8 @@ def run(compute_cycles, all_gather_cycles): def _delayed_all_gather(*args, **kwargs): nonlocal all_gather_called all_gather_called = True - torch.cuda._sleep(all_gather_cycles) + if torch.cuda.is_available(): + torch.cuda._sleep(all_gather_cycles) assert orig_all_gather return orig_all_gather(*args, **kwargs) @@ -245,6 +250,7 @@ def _delayed_all_gather(*args, **kwargs): self.assertTrue(compute_only + all_gather_only > 1.1 * both) @unittest.skipIf(TEST_HPU, "HPU doesn't has HW sleep API support, skipping") + @xfailIf(TEST_XPU) # https://github.com/intel/torch-xpu-ops/issues/1504 @skip_if_lt_x_gpu(2) def test_forward_overlap(self): self._dist_train() @@ -256,9 +262,9 @@ def world_size(self): return 2 -devices = ("cuda", "hpu") +devices = ("cuda", "hpu", "xpu") instantiate_device_type_tests( - TestForwardOverlapWorldSizeOne, globals(), only_for=devices + TestForwardOverlapWorldSizeOne, globals(), only_for=devices, allow_xpu=True ) if __name__ == "__main__": run_tests() diff --git a/test/distributed/fsdp/test_fsdp_pure_fp16.py b/test/distributed/fsdp/test_fsdp_pure_fp16.py index 9ec55f22c54b2..20c2f927651f6 100644 --- a/test/distributed/fsdp/test_fsdp_pure_fp16.py +++ b/test/distributed/fsdp/test_fsdp_pure_fp16.py @@ -152,6 +152,6 @@ def _test_fp16_dtypes( devices = ("cuda", "hpu", "xpu") -instantiate_device_type_tests(TestPureFP16, globals(), only_for=devices) +instantiate_device_type_tests(TestPureFP16, globals(), only_for=devices, allow_xpu=True) if __name__ == "__main__": run_tests() diff --git a/test/distributed/fsdp/test_fsdp_sharded_grad_scaler.py b/test/distributed/fsdp/test_fsdp_sharded_grad_scaler.py index 047972252fc6a..6a7d533396189 100644 --- a/test/distributed/fsdp/test_fsdp_sharded_grad_scaler.py +++ b/test/distributed/fsdp/test_fsdp_sharded_grad_scaler.py @@ -35,6 +35,7 @@ parametrize, run_tests, TEST_WITH_DEV_DBG_ASAN, + TEST_XPU, TestCase, ) @@ -51,6 +52,8 @@ ) sys.exit(0) +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + params = "cpu_offload,sharding_strategy,mixed_precision,use_orig_params" cpu_offload_config = [CPUOffload(offload_params=True), CPUOffload(offload_params=False)] @@ -76,11 +79,14 @@ class TestShardGradScaler(TestCase): @unittest.skipIf( - amp_definitely_not_available(), "no supported device (cuda, xla) found" + amp_definitely_not_available() and not TEST_XPU, + "no supported device (cuda, xla, xpu) found", ) def test_grad_scaling(self): pg = DummyProcessGroup(0, 1) - scaler = ShardedGradScaler(init_scale=2.0, process_group=pg, enabled=True) + scaler = ShardedGradScaler( + device=device_type, init_scale=2.0, process_group=pg, enabled=True + ) t0 = torch.full((1,), 4.0, dtype=torch.float32, device="cpu") t1 = torch.full((1,), 8.0, dtype=torch.float32, device="cpu") outputs = [t1.clone(), (t0.clone(), t1.clone()), [t0.clone(), t1.clone()]] @@ -92,11 +98,14 @@ def test_grad_scaling(self): self.assertTrue(scaler._scale.device == t1.device) @unittest.skipIf( - amp_definitely_not_available(), "no supported device (cuda, xla) found" + amp_definitely_not_available() and not TEST_XPU, + "no supported device (cuda, xla, xpu) found", ) def test_scaling_unscaling_sparse(self): pg = DummyProcessGroup(0, 1) - scaler = ShardedGradScaler(init_scale=2.0, process_group=pg, enabled=True) + scaler = ShardedGradScaler( + device=device_type, init_scale=2.0, process_group=pg, enabled=True + ) inv_scale = torch.full((1,), 0.5, dtype=torch.float, device="cpu") found_inf = torch.full((1,), 0, dtype=torch.float, device="cpu") @@ -137,11 +146,14 @@ def test_scaling_unscaling_sparse(self): self.assertEqual(found_inf, 1.0) @unittest.skipIf( - amp_definitely_not_available(), "no supported device (cuda, xla) found" + amp_definitely_not_available() and not TEST_XPU, + "no supported device (cuda, xla, xpu) found", ) def test_inf_gradients_skip_optim_step(self): pg = DummyProcessGroup(0, 1) - scaler = ShardedGradScaler(init_scale=2.0, process_group=pg, enabled=True) + scaler = ShardedGradScaler( + device=device_type, init_scale=2.0, process_group=pg, enabled=True + ) loss = torch.full((1,), 4.0, dtype=torch.float32, device="cpu") t0 = torch.tensor([float("inf")], dtype=torch.float32, device="cpu") t0.grad = t0.clone() @@ -228,8 +240,9 @@ def _build_model_and_optim( { TransformerEncoderLayer, TransformerDecoderLayer, - } + }, ), + "device_id": self.rank, } model = FSDP(model, **fsdp_kwargs) optim = torch.optim.Adam(model.parameters(), lr=1e-2) @@ -257,10 +270,10 @@ def _test_sharded_grad_scaler_found_inf( cpu_offload=cpu_offload, use_orig_params=use_orig_params, ) - grad_scaler = ShardedGradScaler(init_scale=2.0) - ref_grad_scaler = torch.amp.GradScaler(device="cuda", init_scale=2.0) + grad_scaler = ShardedGradScaler(device=device_type, init_scale=2.0) + ref_grad_scaler = torch.amp.GradScaler(device=device_type, init_scale=2.0) scaled_losses: list[torch.Tensor] = [] - device = torch.device("cuda") + device = torch.device(device_type) torch.manual_seed(42 + self.rank + 1) for iter in range(10): diff --git a/test/distributed/fsdp/test_fsdp_state_dict.py b/test/distributed/fsdp/test_fsdp_state_dict.py index b76bbfd8b91f7..b0677655186a6 100644 --- a/test/distributed/fsdp/test_fsdp_state_dict.py +++ b/test/distributed/fsdp/test_fsdp_state_dict.py @@ -97,6 +97,8 @@ "sharded_state_dict": StateDictType.SHARDED_STATE_DICT, } +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + class Model(Module): def __init__( @@ -155,13 +157,13 @@ def forward(self, x): return self.net3(self.net2(self.net1(x))) def get_input(self): - return torch.rand(8, 8, device="cuda") + return torch.rand(8, 8, device=device_type) class TestFSDPStateDict(FSDPTest): @property def world_size(self): - return min(torch.cuda.device_count(), 2) + return min(torch.accelerator.device_count(), 2) def _broadcast_state_dict(self, state_dict): return _broadcast_state_dict(self.rank, state_dict) @@ -196,8 +198,8 @@ def _get_simple_nested_model( self, *fsdp_args, wrap=True, checkpoint_wrap=False, **fsdp_kwargs ): if wrap: - lin1 = nn.Linear(10, 10, bias=False).cuda() - lin2 = nn.Linear(10, 10, bias=False).cuda() + lin1 = nn.Linear(10, 10, bias=False).to(device_type) + lin2 = nn.Linear(10, 10, bias=False).to(device_type) if checkpoint_wrap: lin1 = checkpoint_wrapper(lin1) lin2 = checkpoint_wrapper(lin2) @@ -207,13 +209,13 @@ def _get_simple_nested_model( model = FSDP(seq, *fsdp_args, **fsdp_kwargs) else: model = nn.Sequential( - nn.Linear(10, 10, bias=False).cuda(), - nn.Linear(10, 10, bias=False).cuda(), + nn.Linear(10, 10, bias=False).to(device_type), + nn.Linear(10, 10, bias=False).to(device_type), ) return model def _get_simple_model(self, *fsdp_args, checkpoint_wrap=False, **fsdp_kwargs): - lin = nn.Linear(10, 10, bias=False).cuda() + lin = nn.Linear(10, 10, bias=False).to(device_type) if checkpoint_wrap: lin = checkpoint_wrapper(lin) model = FSDP(lin, *fsdp_args, **fsdp_kwargs) @@ -230,9 +232,9 @@ def _get_multibuffer_nested_model( else None ) if wrap: - lin1 = nn.Linear(10, 10, bias=False).cuda() - bn1 = nn.BatchNorm1d(10).cuda() - lin2 = nn.Linear(10, 10, bias=False).cuda() + lin1 = nn.Linear(10, 10, bias=False).to(device_type) + bn1 = nn.BatchNorm1d(10).to(device_type) + lin2 = nn.Linear(10, 10, bias=False).to(device_type) if checkpoint_wrap: lin1 = checkpoint_wrapper(lin1) bn1 = checkpoint_wrapper(bn1) @@ -247,9 +249,9 @@ def _get_multibuffer_nested_model( model = FSDP(seq, *fsdp_args, **fsdp_kwargs) else: model = nn.Sequential( - nn.Linear(10, 10, bias=False).cuda(), - nn.BatchNorm1d(10).cuda(), - nn.Linear(10, 10, bias=False).cuda(), + nn.Linear(10, 10, bias=False).to(device_type), + nn.BatchNorm1d(10).to(device_type), + nn.Linear(10, 10, bias=False).to(device_type), ) return model @@ -257,7 +259,7 @@ def _get_non_fsdp_root_module(self, *fsdp_args, wrap=True, **fsdp_kwargs): class FSDPContainer(nn.Module): def __init__(self, fsdp_1, fsdp_2): super().__init__() - self.non_fsdp_lin = nn.Linear(10, 10, bias=False).cuda() + self.non_fsdp_lin = nn.Linear(10, 10, bias=False).to(device_type) self.fsdp_1 = fsdp_1 self.fsdp_2 = fsdp_2 @@ -505,7 +507,7 @@ def test_state_dict_rank0_offload_save_load_flow(self, use_orig_params: bool): # Broadcast the module states from rank 0 with `sync_module_states=True` new_fsdp_model = FSDP( new_model, - device_id=torch.cuda.current_device(), + device_id=torch.accelerator.current_device_index(), auto_wrap_policy=auto_wrap_policy, sync_module_states=True, ) @@ -602,7 +604,7 @@ def test_basic_save_and_load_state_dict( model_new = model_call() if not cpu_offload.offload_params: - model_new = model_new.cuda() + model_new = model_new.to(device_type) if fp16: model_new.half() # Run a forward/backward to compute gradients to test the case @@ -677,7 +679,7 @@ def test_buffers_save_and_load_state_dict( model_new = model_call() if not cpu_offload.offload_params: - model_new = model_new.cuda() + model_new = model_new.to(device_type) # zero the model to ensure parameters are different. _zero_model(model_new, zero_buffers=True) @@ -704,7 +706,7 @@ def test_save_and_load_after_forward_state_dict( """ if state_dict_rank0_and_offload and state_dict_type != "state_dict": return - torch.cuda.set_device(self.rank) + torch.accelerator.set_device_index(self.rank) mixed_precision = ( MixedPrecision( param_dtype=torch.float16, @@ -718,7 +720,7 @@ def test_save_and_load_after_forward_state_dict( optim = torch.optim.SGD(model.parameters(), lr=0.1) initial_params = get_full_params(model) for _ in range(6): - inp = torch.randn(1, 10, device=torch.cuda.current_device()) + inp = torch.randn(1, 10, device=torch.accelerator.current_device_index()) output = model(*inp) loss = output.sum() expected_dtype = torch.float32 if mixed_precision is None else torch.float16 @@ -768,7 +770,7 @@ def _initialize_model( # keep everything deterministic for input data torch.manual_seed(0) - model = Model(wrap_fsdp, register_buffers=register_buffers).cuda() + model = Model(wrap_fsdp, register_buffers=register_buffers).to(device_type) if wrap_fsdp: model = FSDP(model) elif wrap_ddp: @@ -804,7 +806,9 @@ def _dist_train( model = self._initialize_model(wrap_fsdp) optim = SGD(model.parameters(), lr=0.1) - in_data = torch.rand(64, 4, requires_grad=True, device=torch.device("cuda")) + in_data = torch.rand( + 64, 4, requires_grad=True, device=torch.device(device_type) + ) for _ in range(3): out = model(in_data) out.sum().backward() @@ -812,7 +816,7 @@ def _dist_train( optim.zero_grad() if wrap_fsdp: - blank_model = FSDP(Model(True).cuda()) + blank_model = FSDP(Model(True).to(device_type)) _zero_model(blank_model) state_dict = self._state_dict(model, state_dict_type) if move_to_cpu: @@ -884,10 +888,12 @@ def test_state_dict_load_into_local_module( optim = SGD(model.parameters(), lr=0.1) if not fsdp_root: in_data = torch.randn( - 1, 10, requires_grad=True, device=torch.device("cuda") + 1, 10, requires_grad=True, device=torch.device(device_type) ) else: - in_data = torch.rand(64, 4, requires_grad=True, device=torch.device("cuda")) + in_data = torch.rand( + 64, 4, requires_grad=True, device=torch.device(device_type) + ) for _ in range(3): out = model(in_data) out.sum().backward() @@ -943,7 +949,7 @@ def test_state_dict_load_into_local_module( @parametrize("state_dict_type", _SUPPORTED_STATE_DICT_IMPLS) @parametrize("double_nest", [True]) def test_state_dict_skip_module(self, state_dict_type, double_nest): - torch.cuda.set_device(self.rank) + torch.accelerator.set_device_index(self.rank) def _create_module(wrap_fsdp=True): LINEAR_SKIP = "linear_skip" @@ -968,7 +974,7 @@ def _create_module(wrap_fsdp=True): fsdp, _ = _create_module() # Run a forward pass - inp = torch.randn((1, 10), device=torch.cuda.current_device()) + inp = torch.randn((1, 10), device=torch.accelerator.current_device_index()) loss = fsdp(inp) loss.sum().backward() @@ -1016,7 +1022,7 @@ def _create_module(wrap_fsdp=True): @skip_if_lt_x_gpu(2) def test_wrong_state_dict_config(self): - model = FSDP(Model(wrap_fsdp=True).cuda()) + model = FSDP(Model(wrap_fsdp=True).to(device_type)) with self.assertRaisesRegex(RuntimeError, "Expected state_dict_config of type"): with model.state_dict_type( model, StateDictType.FULL_STATE_DICT, LocalStateDictConfig() @@ -1038,7 +1044,7 @@ def test_state_dict_with_ignored_modules( register_buffers=True, ignore_inner=ignore_inner, mixed_precision=mixed_precision, - ).cuda() + ).to(device_type) ignored_modules = [model.outer] ignored_tensor_to_tensor_name = { model.outer.bias: "outer.bias", @@ -1097,7 +1103,7 @@ def test_state_dict_with_ignored_modules( self.assertEqual(sd1[prefixed_buffer_name].dtype, torch.float32) # Check that the state dict can be loaded into a non-wrapped version of # the model - nonwrapped_model = Model(wrap_fsdp=False, register_buffers=True).cuda() + nonwrapped_model = Model(wrap_fsdp=False, register_buffers=True).to(device_type) for param in nonwrapped_model.parameters(): with torch.no_grad(): param.zero_() @@ -1144,7 +1150,7 @@ def __init__(self) -> None: def forward(self, x): return self.my_parameter - model = FSDP(Model().cuda()) + model = FSDP(Model().to(device_type)) with FSDP.state_dict_type(model, StateDictType.LOCAL_STATE_DICT): out = model(None) out.backward() @@ -1153,7 +1159,7 @@ def forward(self, x): with torch.no_grad(): with FSDP.summon_full_params(model): self.assertEqual(model.my_parameter.item(), 3.1415926) - model.my_parameter.copy_(torch.full((1,), 1.75).cuda()) + model.my_parameter.copy_(torch.full((1,), 1.75).to(device_type)) self.assertEqual(model.my_parameter.item(), 1.75) model.load_state_dict(state_dict) with FSDP.summon_full_params(model): @@ -1161,7 +1167,7 @@ def forward(self, x): @skip_if_lt_x_gpu(2) def test_torch_save_load(self): - model = Model(wrap_fsdp=True).cuda() + model = Model(wrap_fsdp=True).to(device_type) with FSDP.state_dict_type(model, StateDictType.LOCAL_STATE_DICT): state_dict = model.state_dict() checkpoint = io.BytesIO() @@ -1192,7 +1198,7 @@ def test_torch_save_load(self): @skip_if_lt_x_gpu(2) def test_shared_module_and_shared_parameter(self): - model = FSDP(TestDummyModel().cuda()) + model = FSDP(TestDummyModel().to(device_type)) with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT): state_dict = model.state_dict() self.assertEqual( @@ -1226,7 +1232,8 @@ def test_sharded_load_multi_backend_pg(self): } for load_cpu in [True, False]: with self.subTest(load_cpu=load_cpu): - pg = dist.new_group(backend="cpu:gloo,cuda:nccl") + backend = torch.distributed.get_default_backend_for_device(device_type) + pg = dist.new_group(backend=f"cpu:gloo,{device_type}:{backend}") fsdp_model = TransformerWithSharedParams.init( pg, FSDPInitMode.RECURSIVE, @@ -1272,7 +1279,7 @@ def test_world_size_one(self): class TestFSDPStateDict4GPUs(FSDPTest): @property def world_size(self): - return torch.cuda.device_count() + return torch.accelerator.device_count() @skip_if_lt_x_gpu(4) def test_local_state_dict_reshard(self): @@ -1282,10 +1289,10 @@ def test_local_state_dict_reshard(self): local_state_dict, there are still some corner cases that using local_state_dict is a better solution. """ - model = FSDP(Model(wrap_fsdp=True)).cuda() + model = FSDP(Model(wrap_fsdp=True)).to(device_type) optim = torch.optim.SGD(model.parameters(), lr=0.1) - batch = torch.randn(4, 4, device=torch.cuda.current_device()) + batch = torch.randn(4, 4, device=torch.accelerator.current_device_index()) output = model(batch) loss = output.sum() loss.backward() @@ -1319,7 +1326,7 @@ def test_local_state_dict_reshard(self): if rank < 2: model2 = FSDP( Model(wrap_fsdp=True, process_group=new_pg), process_group=new_pg - ).cuda() + ).to(device_type) with FSDP.state_dict_type(model2, StateDictType.LOCAL_STATE_DICT): model2.load_state_dict(resharded_state_dict) diff --git a/test/distributed/fsdp/test_fsdp_tp_integration.py b/test/distributed/fsdp/test_fsdp_tp_integration.py index 2cc3858e12696..4577848337317 100644 --- a/test/distributed/fsdp/test_fsdp_tp_integration.py +++ b/test/distributed/fsdp/test_fsdp_tp_integration.py @@ -49,6 +49,8 @@ ) sys.exit(0) +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + class SimpleModel(torch.nn.Module): def __init__(self) -> None: @@ -119,7 +121,7 @@ def _get_sub_pgs(self, tensor_parallel_size: int): """ # 2-D mesh is [dp, tp] twod_mesh = DeviceMesh( - device_type="cuda", + device_type=device_type, mesh=torch.arange(0, self.world_size).view(-1, tensor_parallel_size), ) @@ -166,7 +168,7 @@ def _sync_tp_grads( self.rank // tp_world_size ] grad_device = flat_param.grad.device - grad = flat_param.grad.detach().clone().cuda(self.rank) + grad = flat_param.grad.detach().clone().to(self.rank) dist.all_reduce(grad, op=dist.ReduceOp.SUM, group=tp_pg) grad = grad.to(grad_device) flat_param.grad[~sharded_mask] = grad[~sharded_mask] @@ -199,7 +201,7 @@ def _get_grads_as_flattened( ] ) .contiguous() - .cuda(self.rank) + .to(self.rank) ) all_grads_as_flattened = torch.cat( [torch.empty_like(local_grads_as_flattened) for _ in range(fsdp_pg.size())] @@ -252,7 +254,7 @@ def _test_fsdp_tp_integration( tensor_parallel_size = 2 LR = 3e-5 torch.manual_seed(0) - model = SimpleModel().cuda(self.rank) + model = SimpleModel().to(self.rank) tp_fsdp_model = copy.deepcopy(model) sharded_param_names = SimpleModel.get_sharded_param_names() non_sharded_param_names = SimpleModel.get_non_sharded_param_names() @@ -268,10 +270,10 @@ def _test_fsdp_tp_integration( input_seed = self.rank torch.manual_seed(input_seed + 1) inp_size = [2, 3, 5] - inp = torch.rand(*inp_size).cuda(self.rank) + inp = torch.rand(*inp_size).to(self.rank) self.assertEqual(model(inp), tp_fsdp_model(inp)) # sanity check - mesh_1d = init_device_mesh("cuda", (self.world_size,)) + mesh_1d = init_device_mesh(device_type, (self.world_size,)) fsdp_model = FSDP( model, cpu_offload=cpu_offload, @@ -280,7 +282,7 @@ def _test_fsdp_tp_integration( use_orig_params=use_orig_params, ) mesh_2d = init_device_mesh( - "cuda", + device_type, (self.world_size // tensor_parallel_size, tensor_parallel_size), mesh_dim_names=["dp", "tp"], ) @@ -346,7 +348,7 @@ def _test_fsdp_tp_integration( fsdp_optim.step() tp_fsdp_optim.step() torch.manual_seed(input_seed + 16) - inp = torch.rand(*inp_size).cuda(self.rank) + inp = torch.rand(*inp_size).to(self.rank) fsdp_out = fsdp_model(inp) tp_fsdp_out = tp_fsdp_model(inp) self.assertEqual(fsdp_out, tp_fsdp_out) @@ -357,19 +359,19 @@ def test_fsdp_tp_extension_grad(self): Tests TP + FSDP extension with correct gradient (i.e. no ACT) """ mesh_2d = init_device_mesh( - "cuda", (self.world_size // 2, 2), mesh_dim_names=["dp", "tp"] + device_type, (self.world_size // 2, 2), mesh_dim_names=["dp", "tp"] ) class TestModel(torch.nn.Module): def __init__(self) -> None: super().__init__() - self.mlp = MLPModule("cuda") + self.mlp = MLPModule(device_type) self.mlp_norm = RMSNormPython(10) def forward(self, x): return self.mlp(self.mlp_norm(x)) - model = TestModel().cuda(self.rank) + model = TestModel().to(self.rank) # Shard with TP and test gradient tp_mesh = mesh_2d["tp"] @@ -387,7 +389,7 @@ def forward(self, x): comm_mode = CommDebugMode() with comm_mode: - fsdp_2d_model(torch.rand(2, 10).cuda(self.rank)).sum().backward() + fsdp_2d_model(torch.rand(2, 10).to(self.rank)).sum().backward() funcol = torch.ops.c10d_functional c10d_ops = torch.ops.c10d @@ -409,7 +411,7 @@ def forward(self, x): @skip_if_lt_x_gpu(4) def test_fsdp_tp_sync_module_state(self): mesh_2d = init_device_mesh( - "cuda", (self.world_size // 2, 2), mesh_dim_names=["dp", "tp"] + device_type, (self.world_size // 2, 2), mesh_dim_names=["dp", "tp"] ) tp_mesh = mesh_2d["tp"] dp_mesh = mesh_2d["dp"] diff --git a/test/distributed/fsdp/test_fsdp_traversal.py b/test/distributed/fsdp/test_fsdp_traversal.py index 6cd3188293df3..ad93d9a17829f 100644 --- a/test/distributed/fsdp/test_fsdp_traversal.py +++ b/test/distributed/fsdp/test_fsdp_traversal.py @@ -29,8 +29,8 @@ class TestTraversal(FSDPTest): @property def world_size(self): - if torch.cuda.is_available(): - gpu_cnt = torch.cuda.device_count() + if torch.torch.accelerator.is_available(): + gpu_cnt = torch.accelerator.device_count() if gpu_cnt < 2: return gpu_cnt return 2 @@ -62,6 +62,8 @@ def test_fsdp_modules(self): devices = ("cuda", "hpu", "xpu") -instantiate_device_type_tests(TestTraversal, globals(), only_for=devices) +instantiate_device_type_tests( + TestTraversal, globals(), only_for=devices, allow_xpu=True +) if __name__ == "__main__": run_tests() From 28e853103273335f1ca3e6258f2be13762941b04 Mon Sep 17 00:00:00 2001 From: orangeH25 <18085625039@163.com> Date: Fri, 12 Sep 2025 05:56:19 +0000 Subject: [PATCH 158/693] Add api info for torch._C._nn.pyi (#162361) Fix part of #148404 APis involved are as followed: - im2col - l1_loss - mish - mish_ - mse_loss Pull Request resolved: https://github.com/pytorch/pytorch/pull/162361 Approved by: https://github.com/ezyang --- tools/pyi/gen_pyi.py | 53 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tools/pyi/gen_pyi.py b/tools/pyi/gen_pyi.py index 0dc1e8de37d8c..ca17415762fd1 100644 --- a/tools/pyi/gen_pyi.py +++ b/tools/pyi/gen_pyi.py @@ -663,6 +663,59 @@ def gen_nn_functional(fm: FileManager) -> None: "Tensor", ) ], + "im2col": [ + defs( + "im2col", + [ + INPUT, + KERNEL_SIZE, + "dilation: _int | _size", + "padding: _int | _size", + "stride: _int | _size", + ], + "Tensor", + ) + ], + "l1_loss": [ + defs( + "l1_loss", + [ + INPUT, + "target: Tensor", + "reduction: str = ...", + ], + "Tensor", + ) + ], + "mish": [ + defs( + "mish", + [ + INPUT, + ], + "Tensor", + ) + ], + "mish_": [ + defs( + "mish_", + [ + INPUT, + ], + "Tensor", + ) + ], + "mse_loss": [ + defs( + "mse_loss", + [ + INPUT, + "target: Tensor", + "reduction: str = ...", + ], + "Tensor", + ) + ], } ) From 30e16d638953d20580b6db4715537d595890210e Mon Sep 17 00:00:00 2001 From: dolpm <34420038+dolpm@users.noreply.github.com> Date: Fri, 12 Sep 2025 05:56:19 +0000 Subject: [PATCH 159/693] [nativert] aoti (#162353) Summary: att Test Plan: ci Rollback Plan: Differential Revision: D81731425 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162353 Approved by: https://github.com/yiming0416 --- build_variables.bzl | 3 + docs/source/nativert.rst | 17 ++ docs/source/pytorch-api.md | 1 + test/cpp/nativert/CMakeLists.txt | 4 + ...test_aoti_model_container_registration.cpp | 16 ++ test/export/test_nativert.py | 157 ++++++++++++++++ torch/nativert/ModelRunner.cpp | 1 + torch/nativert/__init__.py | 0 torch/nativert/backends/__init__.py | 4 - .../{lower_utils.py => _lower_utils.py} | 2 +- ...aoti_module.py => _lowered_aoti_module.py} | 0 .../executor/AOTInductorDelegateExecutor.cpp | 168 ++++++++++++++++++ .../executor/AOTInductorDelegateExecutor.h | 49 +++++ .../AOTInductorModelContainerCudaShim.cpp | 24 +++ torch/nativert/executor/ETDelegateExecutor.h | 34 ++++ .../nativert/kernels/ETCallDelegateKernel.cpp | 43 +++++ torch/nativert/kernels/ETCallDelegateKernel.h | 22 +++ .../kernels/KernelHandlerRegistry.cpp | 41 +++++ 18 files changed, 581 insertions(+), 5 deletions(-) create mode 100644 docs/source/nativert.rst create mode 100644 test/cpp/nativert/test_aoti_model_container_registration.cpp create mode 100644 torch/nativert/__init__.py rename torch/nativert/backends/{lower_utils.py => _lower_utils.py} (98%) rename torch/nativert/backends/{lowered_aoti_module.py => _lowered_aoti_module.py} (100%) create mode 100644 torch/nativert/executor/AOTInductorDelegateExecutor.cpp create mode 100644 torch/nativert/executor/AOTInductorDelegateExecutor.h create mode 100644 torch/nativert/executor/AOTInductorModelContainerCudaShim.cpp create mode 100644 torch/nativert/executor/ETDelegateExecutor.h create mode 100644 torch/nativert/kernels/ETCallDelegateKernel.cpp create mode 100644 torch/nativert/kernels/ETCallDelegateKernel.h diff --git a/build_variables.bzl b/build_variables.bzl index 05b1cfdc7a4b0..01b204458eee0 100644 --- a/build_variables.bzl +++ b/build_variables.bzl @@ -638,10 +638,13 @@ libtorch_nativert_sources = [ "torch/nativert/kernels/KernelHandlerRegistry.cpp", "torch/nativert/kernels/TritonKernel.cpp", "torch/nativert/executor/triton/CpuTritonKernelManager.cpp", + "torch/nativert/executor/AOTInductorDelegateExecutor.cpp", + "torch/nativert/kernels/ETCallDelegateKernel.cpp", ] libtorch_nativert_cuda_sources = [ "torch/nativert/executor/triton/CudaTritonKernelManager.cpp", + "torch/nativert/executor/AOTInductorModelContainerCudaShim.cpp", ] torch_mobile_tracer_sources = [ diff --git a/docs/source/nativert.rst b/docs/source/nativert.rst new file mode 100644 index 0000000000000..3576573c9239e --- /dev/null +++ b/docs/source/nativert.rst @@ -0,0 +1,17 @@ +torch.nativert +============== + +.. automodule:: torch.nativert +.. currentmodule:: torch.nativert + +.. py:module:: torch.nativert + :noindex: + +torch.nativert.backends +----------------------- + +.. automodule:: torch.nativert.backends +.. currentmodule:: torch.nativert.backends + +.. py:module:: torch.nativert.backends + :noindex: diff --git a/docs/source/pytorch-api.md b/docs/source/pytorch-api.md index 6ebf94c47a357..2e858079d239f 100644 --- a/docs/source/pytorch-api.md +++ b/docs/source/pytorch-api.md @@ -56,6 +56,7 @@ torch.monitor torch.signal torch.special torch.overrides +torch.nativert torch.package profiler nn.init diff --git a/test/cpp/nativert/CMakeLists.txt b/test/cpp/nativert/CMakeLists.txt index 524b7a82d960c..30c6828328249 100644 --- a/test/cpp/nativert/CMakeLists.txt +++ b/test/cpp/nativert/CMakeLists.txt @@ -43,10 +43,14 @@ set(NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/nativert/executor/triton/CpuTritonKernelManager.cpp ${TORCH_ROOT}/torch/nativert/kernels/TritonKernel.cpp ${TORCH_ROOT}/torch/nativert/executor/DelegateExecutor.cpp + ${TORCH_ROOT}/torch/nativert/executor/AOTInductorDelegateExecutor.cpp + ${TORCH_ROOT}/torch/nativert/kernels/ETCallDelegateKernel.cpp + ${TORCH_ROOT}/torch/csrc/inductor/aoti_torch/oss_proxy_executor.cpp ) if(USE_CUDA) list(APPEND NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/nativert/executor/triton/CudaTritonKernelManager.cpp) + list(APPEND NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/nativert/executor/AOTInductorModelContainerCudaShim.cpp) endif() add_executable(test_nativert diff --git a/test/cpp/nativert/test_aoti_model_container_registration.cpp b/test/cpp/nativert/test_aoti_model_container_registration.cpp new file mode 100644 index 0000000000000..94b19bf8e7563 --- /dev/null +++ b/test/cpp/nativert/test_aoti_model_container_registration.cpp @@ -0,0 +1,16 @@ +#include + +#include + +using namespace ::testing; +using namespace torch::nativert; + +TEST(AOTIModelContainerRegistrationTests, TestRegister) { + EXPECT_TRUE(AOTIModelContainerRunnerRegistry()->Has(at::kCPU)); + +#ifdef USE_CUDA + EXPECT_TRUE(AOTIModelContainerRunnerRegistry()->Has(at::kCUDA)); +#else + EXPECT_FALSE(AOTIModelContainerRunnerRegistry()->Has(at::kCUDA)); +#endif // USE_CUDA +} diff --git a/test/export/test_nativert.py b/test/export/test_nativert.py index bcbda2e42fc10..98824dee40674 100644 --- a/test/export/test_nativert.py +++ b/test/export/test_nativert.py @@ -6,9 +6,19 @@ import tempfile import unittest +from parameterized import parameterized + import torch +import torch._dynamo as torchdynamo from torch._C._nativert import PyModelRunner +from torch._dynamo.test_case import TestCase from torch._subclasses.fake_tensor import FakeTensor +from torch.nativert.backends._lower_utils import ( + lower_exported_program, + package_nativert_with_aoti_delegate, +) +from torch.testing._internal.common_utils import IS_WINDOWS +from torch.testing._internal.inductor_utils import HAS_GPU from torch.utils import _pytree as pytree @@ -185,6 +195,153 @@ def make_dynamic_cls(cls, strict=False): test_class.__module__ = __name__ +@unittest.skipIf(IS_WINDOWS, "Windows isn't supported for this case") +@unittest.skipIf(not torchdynamo.is_dynamo_supported(), "dynamo isn't support") +class TestNativeRT(TestCase): + @staticmethod + def get_module(): + class M(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + self.linear = torch.nn.Linear(4, 4) + self.relu = torch.nn.ReLU() + + def forward(self, x): + return self.relu(self.linear(x)) + + return M() + + @staticmethod + def get_module_multi_output(): + class MMultiOutput(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + self.linear = torch.nn.Linear(4, 4) + self.relu = torch.nn.ReLU() + + def forward(self, x): + return (self.relu(self.linear(x)), x) + + return MMultiOutput() + + @staticmethod + def get_model_pytree(): + class M(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + self.linear1 = torch.nn.Linear(4, 4) + self.linear2 = torch.nn.Linear(4, 4) + + def forward(self, x): + x1, (x2, x3) = x + y1 = self.linear1(x1) + y2 = self.linear2(x2) + y3 = self.linear2(x3) + return (y1, (y2, y3)) + + return M() + + parameters = [] + for device in ["cpu", "cuda"]: + if device == "cuda" and not HAS_GPU: + continue + for module, sample_inputs in [ + (get_module.__func__().to(device), (torch.randn(4, 4).to(device),)), + ( + get_module_multi_output.__func__().to(device), + (torch.randn(4, 4).to(device),), + ), + ( + get_model_pytree.__func__().to(device), + ( + ( + torch.randn(4, 4).to(device), + ( + torch.randn(4, 4).to(device), + torch.randn(4, 4).to(device), + ), + ), + ), + ), + ]: + parameters.append( + ( + device, + module, + sample_inputs, + ) + ) + + @parameterized.expand(parameters) + def test_aoti(self, device, m, sample_inputs): + MODEL_NAME = "model" + BACKEND_ID = "aoti" + + # get the original EP + original_ep = torch.export.export(m, sample_inputs) + + aoti_delegate_ep, aoti_files = lower_exported_program( + original_ep, MODEL_NAME, BACKEND_ID + ) + + # package everything needed for the NativeRT to execute the AOTI delegate + with tempfile.NamedTemporaryFile(suffix=".pt2", delete=False) as f: + package_nativert_with_aoti_delegate( + f, + MODEL_NAME, + BACKEND_ID, + original_ep, + aoti_delegate_ep, + aoti_files, + ) + filename = f.name + + try: + ep_args, ep_kwargs = aoti_delegate_ep.example_inputs + ep_args_copied, ep_kwargs_copied = ( + copy.deepcopy(ep_args), + copy.deepcopy(ep_kwargs), + ) + torch.manual_seed(0) + try: + flat_expected = pytree.tree_leaves( + aoti_delegate_ep.module()(*ep_args_copied, **ep_kwargs_copied) + ) + except Exception as e: + raise unittest.case.SkipTest(str(e)) from e + + model_runner = PyModelRunner(filename, f"{MODEL_NAME}-{BACKEND_ID}") + torch.manual_seed(0) + if _is_supported_types((ep_args, ep_kwargs)): + results = model_runner.run(*ep_args, **ep_kwargs) + else: + results = model_runner.run_with_flat_inputs_and_outputs( + *pytree.tree_leaves((ep_args, ep_kwargs)) + ) + flat_results = pytree.tree_leaves(results) + assert len(flat_results) == len(flat_expected) + for result, expected in zip(flat_results, flat_expected): + assert type(result) == type(expected) + if isinstance(result, torch.Tensor) and isinstance( + expected, torch.Tensor + ): + assert result.shape == expected.shape + assert result.dtype == expected.dtype + assert result.device == expected.device + torch.testing.assert_close(result, expected, equal_nan=True) + else: + assert result == expected + except RuntimeError as e: + # User need to register pytree type on the cpp side, which + # cannot be tested in python unittest. + if "Unknown pytree node type" in str(e): + pass + else: + raise e + finally: + pathlib.Path(filename).unlink(missing_ok=True) + + tests = [ test_export.TestExport, ] diff --git a/torch/nativert/ModelRunner.cpp b/torch/nativert/ModelRunner.cpp index a7688860561e7..3baac49bfb220 100644 --- a/torch/nativert/ModelRunner.cpp +++ b/torch/nativert/ModelRunner.cpp @@ -85,6 +85,7 @@ ModelRunner::ModelRunner( weights->validateAllWeightsLoaded(); torch::nativert::ExecutorConfig config; + config.modelName = modelName; executor_ = std::make_unique( config, graph_, std::move(weights), pytorchStreamReader); diff --git a/torch/nativert/__init__.py b/torch/nativert/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/torch/nativert/backends/__init__.py b/torch/nativert/backends/__init__.py index 0981407836bde..e69de29bb2d1d 100644 --- a/torch/nativert/backends/__init__.py +++ b/torch/nativert/backends/__init__.py @@ -1,4 +0,0 @@ -from .lowered_aoti_module import LoweredBackendModule - - -__all__ = ["LoweredBackendModule"] diff --git a/torch/nativert/backends/lower_utils.py b/torch/nativert/backends/_lower_utils.py similarity index 98% rename from torch/nativert/backends/lower_utils.py rename to torch/nativert/backends/_lower_utils.py index 9bbd3aad9b701..fa97bc30b4a04 100644 --- a/torch/nativert/backends/lower_utils.py +++ b/torch/nativert/backends/_lower_utils.py @@ -6,7 +6,7 @@ from torch.export.pt2_archive._package import AOTI_FILES, package_pt2 from torch.types import FileLike -from .lowered_aoti_module import LoweredBackendModule +from ._lowered_aoti_module import LoweredBackendModule def get_new_ep_with_flat_inputs_outputs(ep: ExportedProgram) -> ExportedProgram: diff --git a/torch/nativert/backends/lowered_aoti_module.py b/torch/nativert/backends/_lowered_aoti_module.py similarity index 100% rename from torch/nativert/backends/lowered_aoti_module.py rename to torch/nativert/backends/_lowered_aoti_module.py diff --git a/torch/nativert/executor/AOTInductorDelegateExecutor.cpp b/torch/nativert/executor/AOTInductorDelegateExecutor.cpp new file mode 100644 index 0000000000000..3d701f41c00e9 --- /dev/null +++ b/torch/nativert/executor/AOTInductorDelegateExecutor.cpp @@ -0,0 +1,168 @@ +#include + +#include +#include +#include + +#include +#include +#include + +namespace torch::nativert { + +#ifndef NATIVERT_MSVC_TEST +C10_DEFINE_TYPED_REGISTRY( + AOTIModelContainerRunnerRegistry, + c10::DeviceType, + torch::inductor::AOTIModelContainerRunner, + std::unique_ptr, + const std::string&, + size_t, + const std::string&, + const std::string&, + const bool) +#endif // NATIVERT_MSVC_TEST + +namespace { +template +std::optional parse_precision( + const std::optional& precision) { + if (precision) { + return static_cast(*precision); + } + return std::nullopt; +} + +c10::Device infer_target_device(const Node& node) { + std::vector devices; + + const auto& tensorValuesMeta = node.owningGraph()->tensorValuesMeta(); + for (const auto* output : node.outputs()) { + if (auto it = tensorValuesMeta.find(std::string{output->name()}); + it != tensorValuesMeta.end()) { + devices.emplace_back(it->second.device()); + } + } + + TORCH_CHECK(!devices.empty(), "AOTI node should have at least one output"); + for (const auto i : c10::irange(1, devices.size())) { + if (!torch::nativert::isSameDevice(devices[0], devices[i])) { + LOG(WARNING) << "Node " << node + << " has outputs on multiple devices: " << devices[0] + << " and " << devices[i]; + } + } + + return devices[0]; +} + +std::unique_ptr +create_aoti_model_container_runner_cpu( + const std::string& model_so_path, + size_t num_models, + const std::string& device_str, + const std::string& cubin_dir, + const bool run_single_threaded) { + return std::make_unique( + model_so_path, + num_models, + /* run_single_threaded= */ run_single_threaded); +} + +} // namespace + +C10_REGISTER_TYPED_CREATOR( + AOTIModelContainerRunnerRegistry, + at::kCPU, + create_aoti_model_container_runner_cpu) + +AOTIDelegateExecutor::AOTIDelegateExecutor( + const Node& node, + const std::shared_ptr& weights, + const ExecutorConfig& executorConfig, + caffe2::serialize::PyTorchStreamReader* packageReader, + const MakeProxyExecutorFn& makeProxyExecutorFunc) + : ETDelegateExecutor(torch::_export::archive_spec::AOTINDUCTOR_DIR, node) { + TORCH_CHECK( + packageReader, "Package reader cannot be null for lowered modules"); + + auto path = get_delegate_dir() + "/"; + + LOG(INFO) << "Loading aotinductor model from archive path: " << path; + + std::optional model_name = std::nullopt; + for (const auto& record : packageReader->getAllRecords()) { + if (c10::starts_with(record, path) && c10::ends_with(record, ".so")) { + model_name = record.substr(record.find_last_of("/\\") + 1); + break; + } + } + + TORCH_CHECK(model_name.has_value(), "missing model .so in archive: ", path); + path.pop_back(); // remove trailing slash + + std::string tmp_dir = extractToTemporaryFolder(*packageReader, path); + LOG(INFO) << "Extracted aot_inductor model to: " << tmp_dir; + + std::string model_path = tmp_dir + "/" + *model_name; + + LOG(INFO) << "Loading aotinductor model from model path: " << model_path; + + auto device = infer_target_device(node); + LOG(INFO) << "Creating AOTI model container runner with device " + << device.str(); + + aoti_model_container_runner_ = AOTIModelContainerRunnerRegistry()->Create( + device.type(), + model_path, + /* num_models= */ executorConfig.maxNumConcurrentThreads, + device.str(), + /*cubin_dir=*/tmp_dir, + /*run_single_threaded=*/false); + + for (const auto& [name, original_fqn] : + aoti_model_container_runner_->getConstantNamesToOriginalFQNs()) { + if (weights->contains(original_fqn)) { + weight_names_map_[original_fqn] = name; + } else { + LOG(WARNING) + << "AOTI's Constant " << original_fqn + << " is not found in weights, it's likely a constant created by AOTI constant folding. " + << "Valid weight FQNs are " << weights->toString(); + } + } + + // AOTI's DelegateExecutor doesn't need to call processWeights or + // commitWeights here because it's invoked from Executor's ctor already. +} + +void AOTIDelegateExecutor::initWeights(std::shared_ptr weights) { + // Do nothing for AOTI, as AOTI's .so already contains the weights. + LOG(INFO) + << "Skipping initWeights for AOTI to use original weights from .so file."; +} + +void AOTIDelegateExecutor::processWeights(std::shared_ptr weights) { + LOG(INFO) << "AOTIDelegateExecutor processing weights"; + std::unordered_map new_weights; + for (const auto& [original_fqn, name] : weight_names_map_) { + new_weights.emplace(name, &weights->at(original_fqn)); + } + + aoti_model_container_runner_->update_inactive_constant_buffer(new_weights); + aoti_model_container_runner_->run_const_fold(/*use_inactive=*/true); +} + +void AOTIDelegateExecutor::commitWeights() { + LOG(INFO) << "AOTIDelegateExecutor committing weights"; + aoti_model_container_runner_->swap_constant_buffer(); +} + +std::vector AOTIDelegateExecutor::run( + std::vector& inputs) { + RECORD_USER_SCOPE("sigmoid::AOTIDelegateExecutor::run"); + std::vector outputs = aoti_model_container_runner_->run(inputs); + return outputs; +} + +} // namespace torch::nativert diff --git a/torch/nativert/executor/AOTInductorDelegateExecutor.h b/torch/nativert/executor/AOTInductorDelegateExecutor.h new file mode 100644 index 0000000000000..9c44ae85f34e6 --- /dev/null +++ b/torch/nativert/executor/AOTInductorDelegateExecutor.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include +#include + +#ifdef USE_CUDA +#include +#endif + +namespace torch::nativert { + +class AOTIDelegateExecutor : public ETDelegateExecutor { + public: + explicit AOTIDelegateExecutor( + const Node& node, + const std::shared_ptr& weights, + const ExecutorConfig& executorConfig, + caffe2::serialize::PyTorchStreamReader* packageReader, + const MakeProxyExecutorFn& makeProxyExecutorFunc); + ~AOTIDelegateExecutor() override = default; + + void processWeights(std::shared_ptr weights) override; + void initWeights(std::shared_ptr weights) override; + void commitWeights() override; + + std::vector run(std::vector& inputs) override; + + private: + std::unique_ptr + aoti_model_container_runner_; + + // key is weight's original fqn, value is weight's name in AOTI + std::unordered_map weight_names_map_; +}; + +C10_DECLARE_TYPED_REGISTRY( + AOTIModelContainerRunnerRegistry, + c10::DeviceType, + torch::inductor::AOTIModelContainerRunner, + std::unique_ptr, + const std::string&, + size_t, + const std::string&, + const std::string&, + const bool); + +} // namespace torch::nativert diff --git a/torch/nativert/executor/AOTInductorModelContainerCudaShim.cpp b/torch/nativert/executor/AOTInductorModelContainerCudaShim.cpp new file mode 100644 index 0000000000000..cf415571c8c26 --- /dev/null +++ b/torch/nativert/executor/AOTInductorModelContainerCudaShim.cpp @@ -0,0 +1,24 @@ +#include +#include + +namespace torch::nativert { + +namespace { +std::unique_ptr +create_aoti_model_container_runner_cuda( + const std::string& model_so_path, + size_t num_models, + const std::string& device_str, + const std::string& cubin_dir, + const bool run_single_threaded) { + return std::make_unique( + model_so_path, num_models, device_str, cubin_dir, run_single_threaded); +} +} // namespace + +C10_REGISTER_TYPED_CREATOR( + AOTIModelContainerRunnerRegistry, + at::kCUDA, + create_aoti_model_container_runner_cuda) + +} // namespace torch::nativert diff --git a/torch/nativert/executor/ETDelegateExecutor.h b/torch/nativert/executor/ETDelegateExecutor.h new file mode 100644 index 0000000000000..2a71cf242429f --- /dev/null +++ b/torch/nativert/executor/ETDelegateExecutor.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include + +namespace torch::nativert { + +class ETDelegateExecutor : public DelegateExecutor { + public: + explicit ETDelegateExecutor( + const std::string_view& dir_prefix, + const Node& node) + : delegate_dir_([&]() { + const std::string* path = + std::get_if(&node.attributes()[0].value); + TORCH_CHECK( + path != nullptr, + "et hop's first attribute should correspond to it's path"); + return std::string(dir_prefix) + *path; + }()) { + VLOG(1) << "ETDelegateExecutor: " << delegate_dir_; + } + + ~ETDelegateExecutor() override = default; + + const std::string& get_delegate_dir() { + return delegate_dir_; + } + + private: + std::string delegate_dir_; +}; + +} // namespace torch::nativert diff --git a/torch/nativert/kernels/ETCallDelegateKernel.cpp b/torch/nativert/kernels/ETCallDelegateKernel.cpp new file mode 100644 index 0000000000000..3b43c3e8deb54 --- /dev/null +++ b/torch/nativert/kernels/ETCallDelegateKernel.cpp @@ -0,0 +1,43 @@ +#include + +#include + +namespace torch::nativert { + +ETCallDelegateKernel::ETCallDelegateKernel( + const Node* node, + ETDelegateExecutor& delegateExecutor) + : OpKernel(node), delegateExecutor_(delegateExecutor) { + for (const auto& input : node_->inputs()) { + TORCH_CHECK(input.value->type() == Type::Kind::Tensor); + } + + for (const auto* output : node_->outputs()) { + TORCH_CHECK(output->type() == Type::Kind::Tensor); + } +} + +void ETCallDelegateKernel::computeInternal( + ExecutionFrame& executionFrame) const { + std::vector inputs; + inputs.reserve(numInputs()); + + for (const auto& input : node_->inputs()) { + inputs.emplace_back(executionFrame.getTensor(input.value->id())); + } + + auto outputs = delegateExecutor_.run(inputs); + const auto& node_outputs = node_->outputs(); + TORCH_CHECK(outputs.size() == node_outputs.size()); + + size_t i = 0; + for (auto begin = std::make_move_iterator(outputs.begin()), + end = std::make_move_iterator(outputs.end()); + begin != end; + ++begin) { + executionFrame.setIValue(node_outputs[i]->id(), *begin); + i++; + } +} + +} // namespace torch::nativert diff --git a/torch/nativert/kernels/ETCallDelegateKernel.h b/torch/nativert/kernels/ETCallDelegateKernel.h new file mode 100644 index 0000000000000..c7eefc3b4eef1 --- /dev/null +++ b/torch/nativert/kernels/ETCallDelegateKernel.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace torch::nativert { + +class ETDelegateExecutor; + +class ETCallDelegateKernel : public OpKernel { + public: + explicit ETCallDelegateKernel( + const Node* node, + ETDelegateExecutor& delegateExecutor); + + void computeInternal(ExecutionFrame& executionFrame) const override final; + + private: + ETDelegateExecutor& delegateExecutor_; +}; + +} // namespace torch::nativert diff --git a/torch/nativert/kernels/KernelHandlerRegistry.cpp b/torch/nativert/kernels/KernelHandlerRegistry.cpp index 653ca5dfcb816..3ac176a81bc3a 100644 --- a/torch/nativert/kernels/KernelHandlerRegistry.cpp +++ b/torch/nativert/kernels/KernelHandlerRegistry.cpp @@ -12,6 +12,10 @@ #include #include +#include +#include +#include + namespace torch::nativert { namespace { @@ -31,6 +35,14 @@ std::string maybeRevisedStaticDispatchTarget(const Node& node) { } return std::string(node.target()); } + +std::unique_ptr make_proxy_executor( + const std::string& filename, + bool is_cpu, + std::optional> custom_objs) { + return std::make_unique( + filename, is_cpu, std::move(custom_objs)); +} } // namespace void register_kernel_handlers() { @@ -62,6 +74,35 @@ void register_kernel_handlers() { ->Create(maybeRevisedStaticDispatchTarget(node), &node), nullptr}; })); + KernelFactory::registerHandler( + "et_delegate", + KernelFactoryHandler( + [](const Node& node, + const torch::nativert::ExecutorConfig& /* executorConfig */) { + return c10::starts_with( + node.target(), + "torch.ops.higher_order.executorch_call_delegate"); + }, + [](const Node& node, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::shared_ptr weights, + const torch::nativert::ExecutorConfig& executorConfig, + caffe2::serialize::PyTorchStreamReader* packageReader) + -> std::pair< + KernelFactoryHandler::OpKernelPtr, + KernelFactoryHandler::DelegateExecutorPtr> { + auto delegateExecutor = std::make_unique( + node, + weights, + executorConfig, + packageReader, + make_proxy_executor); + + return { + std::make_unique( + &node, *delegateExecutor), + std::move(delegateExecutor)}; + })); }); } From f7e83219619a05934a344ca699c33ee69d5a3642 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Fri, 12 Sep 2025 06:12:43 +0000 Subject: [PATCH 160/693] fix cpp extension distributed warning spew (#162764) With the new change we only log the warning if we're running non distributed code or if we're in rank 0. Unit testing that certain messages get printed on certain ranks only feels kinda jank so test plan is below instead Test plan ```python # torchrun --nproc_per_node=2 demo_fix.py import os import logging logging.getLogger('torch.utils.cpp_extension').setLevel(logging.DEBUG) import torch if 'RANK' in os.environ: torch.distributed.init_process_group('nccl') from torch.utils.cpp_extension import _get_cuda_arch_flags _get_cuda_arch_flags() print(f"Rank {os.environ.get('RANK', '0')} done") ``` Logs showing how how `TORCH_CUDA_ARCH_LIST`only shows up once if we explicitly set the the logging level to `logging.DEBUG`. It also improves the debug message to explain what the actual behavior will be ``` (source) [marksaroufim@devgpu005]~% torchrun --nproc_per_node=2 demo_fix.py W0911 18:30:16.594000 1315439 /home/marksaroufim/pytorch/torch/distributed/run.py:814] W0911 18:30:16.594000 1315439 /home/marksaroufim/pytorch/torch/distributed/run.py:814] ***************************************** W0911 18:30:16.594000 1315439 /home/marksaroufim/pytorch/torch/distributed/run.py:814] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. W0911 18:30:16.594000 1315439 /home/marksaroufim/pytorch/torch/distributed/run.py:814] ***************************************** [rank0]:V0911 18:30:18.921000 1316753 pytorch/torch/utils/cpp_extension.py:2444] TORCH_CUDA_ARCH_LIST is not set, using TORCH_CUDA_ARCH_LIST='10.0+PTX' for visible GPU architectures. Set os.environ['TORCH_CUDA_ARCH_LIST'] to override. Rank 0 done Rank 1 done ``` But if we just use the default and comment out `logging.getLogger('torch.utils.cpp_extension').setLevel(logging.DEBUG)` Then we get ``` (source) [marksaroufim@devgpu005]~% torchrun --nproc_per_node=2 demo_fix.py W0911 18:14:33.926000 690759 /home/marksaroufim/pytorch/torch/distributed/run.py:814] W0911 18:14:33.926000 690759 /home/marksaroufim/pytorch/torch/distributed/run.py:814] ***************************************** W0911 18:14:33.926000 690759 /home/marksaroufim/pytorch/torch/distributed/run.py:814] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. W0911 18:14:33.926000 690759 /home/marksaroufim/pytorch/torch/distributed/run.py:814] ***************************************** Rank 0 done Rank 1 done (source) [marksaroufim@devgpu005]~% ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162764 Approved by: https://github.com/ezyang, https://github.com/zou3519 --- torch/utils/cpp_extension.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py index 7202a9638756d..902d2fe6ce0f5 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py @@ -2418,10 +2418,6 @@ def _get_cuda_arch_flags(cflags: Optional[list[str]] = None) -> list[str]: # If not given or set as native, determine what's best for the GPU / CUDA version that can be found if not _arch_list or _arch_list == "native": - if not _arch_list: - logger.warning( - "TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. \n" - "If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'] to specific architectures.") arch_list = [] # the assumption is that the extension should run on any of the currently visible cards, # which could be of different types - therefore all archs for visible cards should be included @@ -2440,6 +2436,15 @@ def _get_cuda_arch_flags(cflags: Optional[list[str]] = None) -> list[str]: arch_list.append(arch) arch_list = sorted(arch_list) arch_list[-1] += '+PTX' + + if not _arch_list: + # Only log on rank 0 in distributed settings to avoid spam + if not torch.distributed.is_available() or not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0: + arch_list_str = ';'.join(arch_list) + logger.debug( + "TORCH_CUDA_ARCH_LIST is not set, using TORCH_CUDA_ARCH_LIST='%s' " + "for visible GPU architectures. Set os.environ['TORCH_CUDA_ARCH_LIST'] to override.", + arch_list_str) else: # Deal with lists that are ' ' separated (only deal with ';' after) _arch_list = _arch_list.replace(' ', ';') From 333e546c02c51a65b2f6e0375cb0a67995ef638f Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Fri, 12 Sep 2025 06:38:28 +0000 Subject: [PATCH 161/693] [CUDAGraph][UX] warn many times for rerecording from dynamic shapes (#162696) Excessive re-recording CUDAGraphs lead to bad performance. We previously warns once if this happens. However, the limit (=50) is too high and users may just observe bad performance before actually seeing the warning message. Even worse, users may not see the warning message when there are many other logs. @anijain2305 reported that he never saw this warning message when using transformer library, but he DOES observe slowdown due to cudagraph re-recording & needs to turn off cudagraph. #162663 attempts to hard error when re-recording too many times due to dynamic shapes. But it is a bc-breaking change. Actually, hf-t5-generate model in torchbench failed due to 256 re-recordings. This PR a) reduces to smaller limit (=8); and b) makes the warning more spam, i.e., warn once for every distinct shapes once the limit is reached. Fixes #162299 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162696 Approved by: https://github.com/mlazos --- test/inductor/test_cudagraph_trees.py | 2 -- torch/_inductor/config.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/test/inductor/test_cudagraph_trees.py b/test/inductor/test_cudagraph_trees.py index a4a3aa65c42c5..91e65cad82992 100644 --- a/test/inductor/test_cudagraph_trees.py +++ b/test/inductor/test_cudagraph_trees.py @@ -2684,8 +2684,6 @@ def iter(batch_size: int, mod: torch.nn.Module): for batch_size in range(10, 200, 10): iter(batch_size, mod) - print(captured_output) - FileCheck().check_count( "CUDAGraph supports dynamic shapes by recording a new graph for each " "distinct input size. Recording too many CUDAGraphs may lead to " diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py index f6921a057ba0f..32c915795967b 100644 --- a/torch/_inductor/config.py +++ b/torch/_inductor/config.py @@ -1245,7 +1245,7 @@ class triton: # Warn loudly when the number of cudagraphs due to dynamic shape # exceeds this limit - cudagraph_dynamic_shape_warn_limit: Optional[int] = 50 + cudagraph_dynamic_shape_warn_limit: Optional[int] = 8 # synchronize after cudagraph invocation force_cudagraph_sync = False From 00e9ba75cde155b049e740012b86b02bdfca9aa4 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Fri, 12 Sep 2025 06:49:36 +0000 Subject: [PATCH 162/693] Revert "[indexing] Prevent integer overflow from large step values in C++ (#161707)" This reverts commit c140bf217f5ca5071ab9dbc1bcf9d4006242f44a. Reverted https://github.com/pytorch/pytorch/pull/161707 on behalf of https://github.com/huydhn due to Look like there is a land race as lots of jobs are failing after this lands ([comment](https://github.com/pytorch/pytorch/pull/161707#issuecomment-3283980465)) --- aten/src/ATen/native/TensorShape.cpp | 2 +- test/inductor/test_torchinductor.py | 25 ------------------------- torch/_decomp/decompositions.py | 3 +-- 3 files changed, 2 insertions(+), 28 deletions(-) diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp index aea88dd1cc94b..c2d0856c3cd4c 100644 --- a/aten/src/ATen/native/TensorShape.cpp +++ b/aten/src/ATen/native/TensorShape.cpp @@ -3063,7 +3063,7 @@ Tensor slice( } auto storage_offset = self.storage_offset() + start_val * strides[dim]; auto len = end_val - start_val; - sizes[dim] = (len / step) + (len % step != 0); // safely round-up + sizes[dim] = (len + step - 1) / step; // round-up strides[dim] *= step; Tensor result; diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py index ac5b538189b38..6a6e3c674179b 100644 --- a/test/inductor/test_torchinductor.py +++ b/test/inductor/test_torchinductor.py @@ -4309,31 +4309,6 @@ def fn(x, y): self.assertEqual(torch.compile(fn)(x1, y), fn(x1, y)) self.assertEqual(torch.compile(fn)(x2, y), fn(x2, y)) - def test_slice_copy(self): - class Model(nn.Module): - def __init__(self, start=449, step=(2**63 - 1)): - super().__init__() - self.start = start - self.step = step - - def forward(self, x: torch.Tensor): - sliced = torch.slice_copy( - x, dim=0, start=self.start, end=None, step=self.step - ) - return torch.reciprocal(sliced) - - with config.patch({"implicit_fallbacks": True}): - # bad case - self.common( - Model(), - (torch.randn(875),), - ) - # normal case - self.common( - Model(step=10), - (torch.randn(875),), - ) - def test_slice1(self): def fn(a): return ( diff --git a/torch/_decomp/decompositions.py b/torch/_decomp/decompositions.py index 1918373b342e3..ba09c6173c5f3 100644 --- a/torch/_decomp/decompositions.py +++ b/torch/_decomp/decompositions.py @@ -759,8 +759,7 @@ def slice_forward( storage_offset = self.storage_offset() + start_val * strides[dim] len = end_val - start_val - # safely round-up for corresponding c++ impl - sizes[dim] = (len // step) + (1 if len % step != 0 else 0) + sizes[dim] = (len + step - 1) // step strides[dim] *= step if self.is_quantized: From 5f66902ecfb9cb4f7b9c50cb86307217cec1dbe9 Mon Sep 17 00:00:00 2001 From: jainapurva Date: Fri, 12 Sep 2025 06:51:12 +0000 Subject: [PATCH 163/693] Fix operator benchmark issue#162708 (#162744) This PR skips memory metric calculation for ops which don't take tensor input, fixing the operator_benchmark bug Fixes https://github.com/pytorch/pytorch/issues/162708 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162744 Approved by: https://github.com/huydhn --- benchmarks/operator_benchmark/benchmark_core.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/benchmarks/operator_benchmark/benchmark_core.py b/benchmarks/operator_benchmark/benchmark_core.py index 0b7fcf4e555f8..3caaf3e3a9167 100644 --- a/benchmarks/operator_benchmark/benchmark_core.py +++ b/benchmarks/operator_benchmark/benchmark_core.py @@ -373,9 +373,14 @@ def _measure_metrics(self, launch_test, test_case, iters, print_per_iter): curr_test_total_time = 0 time_trace = [] peak_memory = 0 - sample_input = next(iter(test_case.op_bench.inputs.values())) - device = sample_input.device - device_module = torch.get_device_module(device.type) + input_values = test_case.op_bench.inputs.values() + device, device_module = None, None + if input_values and isinstance(next(iter(input_values)), torch.Tensor): + # The device and device module information are crucial for memory metric calculation, + # In case of ops where inputs are integers (not tensor), memory metrics need not be calculated. + sample_input = next(iter(input_values)) + device = sample_input.device + device_module = torch.get_device_module(device.type) # TODO: add support for cpu memory measurement while True: if hasattr(device_module, "reset_peak_memory_stats"): From 6b59a19242e0862563bfe6b595f7db3ef44ade7f Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Fri, 12 Sep 2025 06:52:01 +0000 Subject: [PATCH 164/693] Revert "[RELAND] Always build USE_DISTRIBUTED (#160449) and Make distributed modules importable even when backend not built (#159889) (#162594)" This reverts commit 6e8f17c58029e5fa6bc222b2445ebbc0cbdc17c7. Reverted https://github.com/pytorch/pytorch/pull/162594 on behalf of https://github.com/huydhn due to Reverted internally ([comment](https://github.com/pytorch/pytorch/pull/162594#issuecomment-3283985880)) --- .ci/pytorch/macos-build.sh | 7 +- .ci/pytorch/macos-test.sh | 4 - .ci/wheel/build_wheel.sh | 3 +- BUILD.bazel | 3 +- CMakeLists.txt | 12 +- buckbuild.bzl | 4 +- c10/ovrsource_defs.bzl | 4 +- caffe2/CMakeLists.txt | 144 +++++----- cmake/Dependencies.cmake | 2 +- cmake/Summary.cmake | 12 +- docs/source/conf.py | 7 + test/cpp/dist_autograd/CMakeLists.txt | 2 +- test/distributed/tensor/test_fake.py | 41 --- test/export/test_export.py | 10 +- test/test_numa_binding.py | 5 +- tools/build_pytorch_libs.py | 3 +- torch/CMakeLists.txt | 50 ++-- torch/_C/_distributed_c10d.pyi | 9 - torch/csrc/Exceptions.h | 2 + torch/csrc/Module.cpp | 10 + torch/csrc/autograd/functions/init.cpp | 4 + torch/csrc/distributed/c10d/HashStore.cpp | 1 + torch/csrc/distributed/c10d/Work.cpp | 2 +- torch/csrc/distributed/c10d/init.cpp | 1 - torch/csrc/inductor/aoti_torch/shim_cpu.cpp | 4 + torch/csrc/jit/python/pybind_utils.h | 6 +- .../csrc/jit/python/python_sugared_value.cpp | 3 +- torch/csrc/jit/runtime/interpreter.h | 14 +- torch/csrc/jit/serialization/pickler.h | 2 + torch/csrc/jit/serialization/unpickler.h | 2 + .../standalone/execution_trace_observer.cpp | 9 + torch/csrc/profiler/util.cpp | 6 +- torch/csrc/profiler/util.h | 2 + torch/distributed/_C_stubs.py | 150 ---------- torch/distributed/__init__.py | 258 +++++++++--------- torch/distributed/_dist2.py | 2 +- torch/distributed/_distributed_c10d.py | 245 ----------------- torch/distributed/_functional_collectives.py | 12 +- .../_shard/sharded_tensor/reshard.py | 2 +- .../chunk_sharding_spec_ops/embedding_bag.py | 2 +- .../distributed/_symmetric_memory/__init__.py | 22 +- .../_symmetric_memory/_nvshmem_triton.py | 2 +- torch/distributed/_tools/fake_collectives.py | 4 +- .../algorithms/model_averaging/utils.py | 4 + torch/distributed/constants.py | 15 +- torch/distributed/device_mesh.py | 44 ++- torch/distributed/distributed_c10d.py | 70 ++--- torch/distributed/elastic/control_plane.py | 2 +- torch/distributed/nn/functional.py | 4 + torch/distributed/rpc/__init__.py | 2 +- torch/distributed/tensor/_collective_utils.py | 4 +- .../testing/_internal/distributed/fake_pg.py | 2 +- 52 files changed, 458 insertions(+), 778 deletions(-) delete mode 100644 test/distributed/tensor/test_fake.py delete mode 100644 torch/distributed/_C_stubs.py delete mode 100644 torch/distributed/_distributed_c10d.py diff --git a/.ci/pytorch/macos-build.sh b/.ci/pytorch/macos-build.sh index d41c3c08e6288..d7447e7d48582 100755 --- a/.ci/pytorch/macos-build.sh +++ b/.ci/pytorch/macos-build.sh @@ -35,10 +35,11 @@ fi print_cmake_info if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then - USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel + # Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls + USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel else - # NB: we always build with distributed; USE_DISTRIBUTED turns off all - # backends (specifically the gloo backend), so test that this case works too + # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests + # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448 USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64 fi if which sccache > /dev/null; then diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh index 79d47da431712..a859901191e03 100755 --- a/.ci/pytorch/macos-test.sh +++ b/.ci/pytorch/macos-test.sh @@ -13,13 +13,9 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available( fi popd -python -mpip install -r requirements.txt - # enable debug asserts in serialization export TORCH_SERIALIZATION_DEBUG=1 -python -mpip install --no-input -r requirements.txt - setup_test_python() { # The CircleCI worker hostname doesn't resolve to an address. # This environment variable makes ProcessGroupGloo default to diff --git a/.ci/wheel/build_wheel.sh b/.ci/wheel/build_wheel.sh index 98b50c0ceeafe..2d5f4d30b4c82 100755 --- a/.ci/wheel/build_wheel.sh +++ b/.ci/wheel/build_wheel.sh @@ -177,8 +177,7 @@ source ~/${desired_python}-build/bin/activate retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements.txt" retry brew install libomp -# For USE_DISTRIBUTED=1 on macOS, this enables gloo, which needs libuv, which -# is build as part of tensorpipe submodule +# For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule export USE_DISTRIBUTED=1 export USE_MKLDNN=OFF diff --git a/BUILD.bazel b/BUILD.bazel index 635f39eed2cee..d4202e7a2c1e4 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -22,6 +22,7 @@ COMMON_COPTS = [ "-DHAVE_SHM_UNLINK=1", "-D_FILE_OFFSET_BITS=64", "-DUSE_FBGEMM", + "-DUSE_DISTRIBUTED", "-DAT_PER_OPERATOR_HEADERS", "-DATEN_THREADING=NATIVE", "-DNO_CUDNN_DESTROY_HANDLE", @@ -810,7 +811,7 @@ cc_library( name = "torch_python", srcs = libtorch_python_core_sources + if_cuda(libtorch_python_cuda_sources) - + libtorch_python_distributed_sources + + if_cuda(libtorch_python_distributed_sources) + GENERATED_AUTOGRAD_PYTHON, hdrs = glob([ "torch/csrc/generic/*.cpp", diff --git a/CMakeLists.txt b/CMakeLists.txt index f3e4b28bcff98..efad5419aaffa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,9 +181,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le)") set(CPU_POWER ON) endif() -# For non-supported platforms, turn USE_DISTRIBUTED off by default. -# NB: USE_DISTRIBUTED simply disables the backend; distributed code -# still gets built +# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not +# tested and likely won't work without additional changes. if(NOT LINUX AND NOT WIN32) set(USE_DISTRIBUTED OFF @@ -263,11 +262,11 @@ option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF) option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF) option(USE_NATIVE_ARCH "Use -march=native" OFF) cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF) -option(USE_DISTRIBUTED "Enable default distributed backends" ON) +option(USE_DISTRIBUTED "Use distributed" ON) cmake_dependent_option(USE_NCCL "Use NCCL" ON "USE_DISTRIBUTED;USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF) cmake_dependent_option(USE_XCCL "Use XCCL" ON - "USE_DISTRIBUTED;USE_XPU;UNIX;NOT APPLE" OFF) + "USE_XPU;UNIX;NOT APPLE" OFF) cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF) cmake_dependent_option(USE_RCCL "Use RCCL" ON "USE_NCCL;NOT WIN32" OFF) cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF) @@ -432,10 +431,11 @@ if(WIN32) PATH_SUFFIXES lib NO_DEFAULT_PATH) if(NOT libuv_tmp_LIBRARY) + set(USE_DISTRIBUTED OFF) set(USE_GLOO OFF) message( WARNING - "Libuv is not installed in current conda env. Set USE_GLOO to OFF. " + "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. " "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv." ) else() diff --git a/buckbuild.bzl b/buckbuild.bzl index 3e3af13f9118a..e079d98395441 100644 --- a/buckbuild.bzl +++ b/buckbuild.bzl @@ -156,7 +156,7 @@ ROOT = "//" if IS_OSS else "//xplat/caffe2" # for targets in subfolders ROOT_PATH = "//" if IS_OSS else "//xplat/caffe2/" -C10 = "//c10:c10" if IS_OSS else ("//xplat/caffe2/c10:c10_ovrsource" if is_arvr_mode() else "//xplat/caffe2/c10:c10") +C10 = "//c10:c10" if IS_OSS else "//xplat/caffe2/c10:c10" # a dictionary maps third party library name to fbsource and oss target THIRD_PARTY_LIBS = { @@ -948,7 +948,6 @@ def define_buck_targets( [ ("torch/csrc/api/include", "torch/**/*.h"), ("", "torch/csrc/**/*.h"), - ("", "torch/csrc/**/*.hpp"), ("", "torch/nativert/**/*.h"), ("", "torch/headeronly/**/*.h"), ("", "torch/script.h"), @@ -2034,7 +2033,6 @@ def define_buck_targets( ("", "caffe2/utils/*.h"), ("", "caffe2/core/*.h"), ("", "torch/csrc/*.h"), - ("", "torch/csrc/*.hpp"), ("", "torch/csrc/api/include/torch/*.h"), ("", "torch/csrc/autograd/*.h"), ("", "torch/csrc/autograd/*/*.h"), diff --git a/c10/ovrsource_defs.bzl b/c10/ovrsource_defs.bzl index 532404f21bbaf..aafe5a4de8c42 100644 --- a/c10/ovrsource_defs.bzl +++ b/c10/ovrsource_defs.bzl @@ -18,9 +18,9 @@ cuda_supported_platforms = [ def define_c10_ovrsource(name, is_mobile): if is_mobile: - pp_flags = ["-DC10_MOBILE=1", "-DC10_USE_GLOG"] + pp_flags = ["-DC10_MOBILE=1"] else: - pp_flags = ["-DC10_USE_GLOG"] + pp_flags = [] oxx_static_library( name = name, diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index b5d47bb4b5dff..99d4b2cd5aa93 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -540,9 +540,11 @@ if(NOT INTERN_BUILD_MOBILE AND NOT BUILD_LITE_INTERPRETER) ${TORCH_SRC_DIR}/csrc/utils/byte_order.cpp ) - append_filelist("libtorch_distributed_base_sources" TORCH_SRCS) - if(NOT WIN32) - append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS) + if(USE_DISTRIBUTED) + append_filelist("libtorch_distributed_base_sources" TORCH_SRCS) + if(NOT WIN32) + append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS) + endif() endif() endif() @@ -571,30 +573,32 @@ if(USE_CUDA) list(APPEND Caffe2_GPU_SRCS ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) endif() - append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS) - if(NOT WIN32) - append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS) - set_source_files_properties( - ${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupNCCL.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/utils.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CudaDMAConnectivity.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp - PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" - ) - endif() + if(USE_DISTRIBUTED) + append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS) + if(NOT WIN32) + append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS) + set_source_files_properties( + ${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupNCCL.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/utils.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CudaDMAConnectivity.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp + PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" + ) + endif() - set(ASYNC_MM_FILE "${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/AsyncMM.cu") - # Disable the warning to make cutlass warp-specialized cooperative kernel build for gcc-9 - if(CMAKE_COMPILER_IS_GNUCXX) - set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-Wno-unused-but-set-variable") - endif() - if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND CUDA_NVCC_FLAGS MATCHES ".*compute_90.*") - set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a") + set(ASYNC_MM_FILE "${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/AsyncMM.cu") + # Disable the warning to make cutlass warp-specialized cooperative kernel build for gcc-9 + if(CMAKE_COMPILER_IS_GNUCXX) + set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-Wno-unused-but-set-variable") + endif() + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND CUDA_NVCC_FLAGS MATCHES ".*compute_90.*") + set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a") + endif() endif() set_source_files_properties( ${TORCH_ROOT}/aten/src/ATen/cuda/detail/LazyNVRTC.cpp @@ -627,9 +631,11 @@ if(USE_ROCM) list(APPEND Caffe2_HIP_SRCS ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) endif() - append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS) - if(NOT WIN32) - append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS) + if(USE_DISTRIBUTED) + append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS) + if(NOT WIN32) + append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS) + endif() endif() # caffe2_nvrtc's stubs to driver APIs are useful for HIP. # See NOTE [ ATen NVRTC Stub and HIP ] @@ -1350,10 +1356,12 @@ if(BUILD_TEST) add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit) add_subdirectory(${TORCH_ROOT}/test/cpp/nativert ${CMAKE_BINARY_DIR}/test_nativert) add_subdirectory(${TORCH_ROOT}/test/inductor ${CMAKE_BINARY_DIR}/test_inductor) - add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d) - if(NOT WIN32) - add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd) - add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc) + if(USE_DISTRIBUTED) + add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d) + if(NOT WIN32) + add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd) + add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc) + endif() endif() if(NOT NO_API) add_subdirectory(${TORCH_ROOT}/test/cpp/api ${CMAKE_BINARY_DIR}/test_api) @@ -1458,40 +1466,46 @@ if(BUILD_LITE_INTERPRETER) endif() endif() -if(USE_GLOO AND USE_C10D_GLOO) - target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO) -endif() -if(USE_UCC AND USE_C10D_UCC) - target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC) - if(USE_CUDA) - target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC) + +# Pass USE_DISTRIBUTED to torch_cpu, as some codes in jit/pickler.cpp and +# jit/unpickler.cpp need to be compiled only when USE_DISTRIBUTED is set +if(USE_DISTRIBUTED) + target_compile_definitions(torch_cpu PUBLIC USE_DISTRIBUTED) + if(USE_GLOO AND USE_C10D_GLOO) + target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO) endif() -endif() -if(USE_NCCL AND USE_C10D_NCCL) - if(USE_ROCM) - target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL) - else() - target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL) + if(USE_UCC AND USE_C10D_UCC) + target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC) + if(USE_CUDA) + target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC) + endif() + endif() + if(USE_NCCL AND USE_C10D_NCCL) + if(USE_ROCM) + target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL) + else() + target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL) + endif() + endif() + if(USE_MPI AND USE_C10D_MPI) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set_source_files_properties( + "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp" + PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) + endif() + target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI) + endif() + # Pass USE_RPC in order to reduce use of + # #if defined(USE_DISTRIBUTED) && !defined(_WIN32) + # need to be removed when RPC is supported + if(NOT WIN32) + target_compile_definitions(torch_cpu PUBLIC USE_RPC) + endif() + # Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp + # can only be compiled with USE_TENSORPIPE is set. + if(USE_TENSORPIPE) + target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE) endif() -endif() -if(USE_MPI AND USE_C10D_MPI) - if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set_source_files_properties( - "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp" - PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) - endif() - target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI) -endif() -# Pass USE_RPC in order to reduce use of -# #if defined(USE_DISTRIBUTED) && !defined(_WIN32) -# need to be removed when RPC is supported -if(NOT WIN32) - target_compile_definitions(torch_cpu PUBLIC USE_RPC) -endif() -# Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp -# can only be compiled with USE_TENSORPIPE is set. -if(USE_TENSORPIPE) - target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE) endif() if(NOT INTERN_BUILD_MOBILE) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 08ffdaf8cf451..6ad56d3b9b44e 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1134,7 +1134,7 @@ if(USE_CUDA AND CUDA_VERSION VERSION_LESS 13.0) include_directories(SYSTEM ${CUB_INCLUDE_DIRS}) endif() -if(USE_TENSORPIPE) +if(USE_DISTRIBUTED AND USE_TENSORPIPE) if(MSVC) message(WARNING "Tensorpipe cannot be used on Windows.") else() diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index fb64e99bccf22..ffd4b5298a890 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -192,11 +192,13 @@ function(caffe2_print_configuration_summary) message(STATUS " USE_PYTORCH_QNNPACK : ${USE_PYTORCH_QNNPACK}") message(STATUS " USE_XNNPACK : ${USE_XNNPACK}") message(STATUS " USE_DISTRIBUTED : ${USE_DISTRIBUTED}") - message(STATUS " USE_MPI : ${USE_MPI}") - message(STATUS " USE_GLOO : ${USE_GLOO}") - message(STATUS " USE_GLOO_WITH_OPENSSL : ${USE_GLOO_WITH_OPENSSL}") - message(STATUS " USE_GLOO_IBVERBS : ${USE_GLOO_IBVERBS}") - message(STATUS " USE_TENSORPIPE : ${USE_TENSORPIPE}") + if(${USE_DISTRIBUTED}) + message(STATUS " USE_MPI : ${USE_MPI}") + message(STATUS " USE_GLOO : ${USE_GLOO}") + message(STATUS " USE_GLOO_WITH_OPENSSL : ${USE_GLOO_WITH_OPENSSL}") + message(STATUS " USE_GLOO_IBVERBS : ${USE_GLOO_IBVERBS}") + message(STATUS " USE_TENSORPIPE : ${USE_TENSORPIPE}") + endif() if(NOT "${SELECTED_OP_LIST}" STREQUAL "") message(STATUS " SELECTED_OP_LIST : ${SELECTED_OP_LIST}") endif() diff --git a/docs/source/conf.py b/docs/source/conf.py index d1504757f9c54..44ad4de8115f6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -3333,6 +3333,13 @@ def coverage_post_process(app, exception): if not isinstance(app.builder, CoverageBuilder): return + if not torch.distributed.is_available(): + raise RuntimeError( + "The coverage tool cannot run with a version " + "of PyTorch that was built with USE_DISTRIBUTED=0 " + "as this module's API changes." + ) + # These are all the modules that have "automodule" in an rst file # These modules are the ones for which coverage is checked # Here, we make sure that no module is missing from that list diff --git a/test/cpp/dist_autograd/CMakeLists.txt b/test/cpp/dist_autograd/CMakeLists.txt index 86a6c924288bb..14fd7f7ae9a2b 100644 --- a/test/cpp/dist_autograd/CMakeLists.txt +++ b/test/cpp/dist_autograd/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT WIN32) +if(USE_DISTRIBUTED AND NOT WIN32) set(DIST_AUTOGRAD_TEST_DIR "${TORCH_ROOT}/test/cpp/dist_autograd") set(DIST_AUTOGRAD_TEST_SOURCES ${TORCH_ROOT}/test/cpp/common/main.cpp diff --git a/test/distributed/tensor/test_fake.py b/test/distributed/tensor/test_fake.py deleted file mode 100644 index 099c6e87f5f18..0000000000000 --- a/test/distributed/tensor/test_fake.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates -# Owner(s): ["oncall: distributed"] - -import torch -from torch._subclasses.fake_tensor import FakeTensorMode -from torch.distributed.tensor import DTensor -from torch.distributed.tensor.placement_types import Shard -from torch.testing._internal.common_utils import run_tests, TestCase -from torch.testing._internal.distributed.fake_pg import FakeStore - - -class TestFakeDTensor(TestCase): - def test_fake_dtensor_operations(self): - # Use FakeTensorMode to handle CUDA tensors without actual CUDA - fake_mode = FakeTensorMode() - world_size = 4 - - fake_store = FakeStore() - torch.distributed.init_process_group( - "fake", store=fake_store, rank=0, world_size=world_size - ) - device_mesh = torch.distributed.device_mesh.init_device_mesh( - "cuda", - (2, world_size // 2), - ) - - # Create fake CUDA tensor using FakeTensorMode - with fake_mode: - x = torch.randn(1, 1, device="cuda") - x = DTensor.from_local(x, device_mesh, [Shard(0), Shard(1)]) - - # Test basic DTensor operations - self.assertIsInstance(x, DTensor) - - # Test sum operation - r = x.sum(1) - self.assertIsInstance(r, DTensor) - - -if __name__ == "__main__": - run_tests() diff --git a/test/export/test_export.py b/test/export/test_export.py index 2c466f162a893..b4596eab95baf 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -60,7 +60,10 @@ from torch.fx.experimental.proxy_tensor import make_fx from torch.fx.experimental.symbolic_shapes import ShapeEnv from torch.testing import FileCheck -from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_FLASH_ATTENTION +from torch.testing._internal.common_cuda import ( + PLATFORM_SUPPORTS_FLASH_ATTENTION, + xfailIfDistributedNotSupported, +) from torch.testing._internal.common_utils import ( find_library_location, IS_FBCODE, @@ -15769,6 +15772,7 @@ def distributed_env(self, world_size): finally: torch.distributed.destroy_process_group() + @xfailIfDistributedNotSupported def test_distributed_all_reduce(self): class Foo(torch.nn.Module): def __init__(self): @@ -15786,6 +15790,7 @@ def forward(self, x): inp = (torch.randn(4, 4),) self.assertTrue(torch.allclose(ep.module()(*inp), m(*inp))) + @xfailIfDistributedNotSupported def test_distributed_all_gather(self): class Foo(torch.nn.Module): def forward(self, x): @@ -15801,6 +15806,7 @@ def forward(self, x): torch.allclose(a, b) for a, b in zip(ep.module()(*inp), m(*inp)) ) + @xfailIfDistributedNotSupported def test_distributed_all_gather_into_tensor(self): class Foo(torch.nn.Module): def forward(self, x): @@ -15814,6 +15820,7 @@ def forward(self, x): inp = (torch.randn(2),) self.assertTrue(torch.allclose(ep.module()(*inp), m(*inp))) + @xfailIfDistributedNotSupported @testing.expectedFailureCppRuntime def test_distributed_all_to_all_single(self): class Foo(torch.nn.Module): @@ -15831,6 +15838,7 @@ def forward(self, x): ) self.assertEqual(len(nodes), 1) + @xfailIfDistributedNotSupported @testing.expectedFailureCppRuntime def test_distributed_reduce_scatter_tensor(self): class Foo(torch.nn.Module): diff --git a/test/test_numa_binding.py b/test/test_numa_binding.py index d38032ba22603..764156ff9b98a 100644 --- a/test/test_numa_binding.py +++ b/test/test_numa_binding.py @@ -7,7 +7,7 @@ from dataclasses import dataclass from multiprocessing.context import SpawnProcess from typing import Any, Optional -from unittest import skipIf, skipUnless +from unittest import skipUnless from unittest.mock import mock_open, patch import torch @@ -22,7 +22,7 @@ AffinityMode, NumaOptions, ) -from torch.testing._internal.common_utils import IS_MACOS, run_tests, TestCase +from torch.testing._internal.common_utils import run_tests, TestCase @dataclass(frozen=True) @@ -680,7 +680,6 @@ def test_core_complex_tiebreak_prefers_lower_cache_key(self) -> None: set(range(0, 2)), ) - @skipIf(IS_MACOS, "sched_getaffinity doesn't exist") def test_binds_to_node_0_if_node_stored_as_minus_one(self) -> None: self._add_mock_hardware( num_sockets=1, diff --git a/tools/build_pytorch_libs.py b/tools/build_pytorch_libs.py index 457b224354fb2..9d43de80f1298 100644 --- a/tools/build_pytorch_libs.py +++ b/tools/build_pytorch_libs.py @@ -88,7 +88,8 @@ def build_pytorch( ) -> None: my_env = _create_build_env() if ( - not check_negative_env_flag("USE_CUDA") + not check_negative_env_flag("USE_DISTRIBUTED") + and not check_negative_env_flag("USE_CUDA") and not check_negative_env_flag("USE_NCCL") and not check_env_flag("USE_SYSTEM_NCCL") ): diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index adc9aad4a05c3..866c40ad1c12e 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -276,30 +276,32 @@ add_custom_command( WORKING_DIRECTORY "${TORCH_ROOT}" ) +if(USE_DISTRIBUTED) + if(WIN32) + append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS) + else() + append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS) + endif() + # Disable certain warnings for GCC-9.X + if(CMAKE_COMPILER_IS_GNUCXX) + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") + endif() + # NCCL is a private dependency of libtorch, but libtorch_python includes + # some private headers of libtorch, which in turn include NCCL. As a hacky + # alternative to making NCCL a public dependency of libtorch, we make it + # a private dependency of libtorch_python as well. + if(USE_NCCL) + list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl) + endif() + # Same for MPI. + if(USE_MPI) + list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX) + endif() + list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D) -if(WIN32) - append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS) -else() - append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS) endif() -# Disable certain warnings for GCC-9.X -if(CMAKE_COMPILER_IS_GNUCXX) - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") -endif() -# NCCL is a private dependency of libtorch, but libtorch_python includes -# some private headers of libtorch, which in turn include NCCL. As a hacky -# alternative to making NCCL a public dependency of libtorch, we make it -# a private dependency of libtorch_python as well. -if(USE_NCCL) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl) -endif() -# Same for MPI. -if(USE_MPI) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX) -endif() -list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D) if(USE_NCCL AND NOT WIN32) list(APPEND TORCH_PYTHON_SRCS @@ -367,6 +369,10 @@ if(BUILD_LIBTORCHLESS) target_compile_definitions(torch_python PRIVATE USE_C10D_NCCL) endif() + if(USE_DISTRIBUTED) + target_compile_definitions(torch_python PRIVATE USE_DISTRIBUTED) + endif() + if(USE_MPI AND USE_C10D_MPI) target_compile_definitions(torch_python PRIVATE USE_C10D_MPI) endif() diff --git a/torch/_C/_distributed_c10d.pyi b/torch/_C/_distributed_c10d.pyi index 79e437063b8cb..ad3d8e3abf245 100644 --- a/torch/_C/_distributed_c10d.pyi +++ b/torch/_C/_distributed_c10d.pyi @@ -851,12 +851,3 @@ class ProcessGroupXCCL(Backend): def _set_process_group(pg: ProcessGroup) -> None: ... def _current_process_group() -> ProcessGroup: ... -def _dump_nccl_trace_json( - includeCollectives: Optional[bool] = ..., - onlyActive: Optional[bool] = ..., -) -> bytes: ... -def _dump_nccl_trace( - includeCollectives: Optional[bool] = ..., - includeStackTraces: Optional[bool] = ..., - onlyActive: Optional[bool] = ..., -) -> bytes: ... diff --git a/torch/csrc/Exceptions.h b/torch/csrc/Exceptions.h index d43d2b02a23ef..60a7bb644df01 100644 --- a/torch/csrc/Exceptions.h +++ b/torch/csrc/Exceptions.h @@ -15,7 +15,9 @@ #include #include +#if defined(USE_DISTRIBUTED) #include +#endif inline void PyErr_SetString(PyObject* type, const std::string& message) { PyErr_SetString(type, message.c_str()); diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index ac2b03d2651cc..675a4c4310052 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -120,10 +120,14 @@ #endif #endif +#ifdef USE_DISTRIBUTED +#ifdef USE_C10D #include #include #include #include +#endif +#endif #if defined(USE_VALGRIND) #include @@ -548,7 +552,11 @@ static PyObject* THPModule_getBackcompatKeepdimWarn( } static PyObject* THPModule_hasDistributed(PyObject* _unused, PyObject* noargs) { +#ifdef USE_DISTRIBUTED Py_RETURN_TRUE; +#else + Py_RETURN_FALSE; +#endif } static PyObject* THPModule_showConfig(PyObject* module, PyObject* noargs) { @@ -1985,6 +1993,7 @@ PyObject* initModule() { #ifdef USE_XPU THPUtils_addPyMethodDefs(methods, THXPModule_methods()); #endif +#if defined(USE_DISTRIBUTED) && defined(USE_C10D) THPUtils_addPyMethodDefs( methods, torch::distributed::c10d::python_functions()); #ifndef _WIN32 @@ -1994,6 +2003,7 @@ PyObject* initModule() { methods, torch::distributed::autograd::python_functions()); THPUtils_addPyMethodDefs( methods, torch::distributed::rpc::testing::python_functions()); +#endif #endif static struct PyModuleDef torchmodule = { diff --git a/torch/csrc/autograd/functions/init.cpp b/torch/csrc/autograd/functions/init.cpp index 05c8901e1f60d..5e19010f9ae3c 100644 --- a/torch/csrc/autograd/functions/init.cpp +++ b/torch/csrc/autograd/functions/init.cpp @@ -8,7 +8,9 @@ #include #include #include +#ifdef USE_DISTRIBUTED #include +#endif #include #include #include @@ -148,9 +150,11 @@ void THPAutograd_initFunctions() { static PyTypeObject CopyBackwardsClass; addClass(module, CopyBackwardsClass, "CopyBackwards"); +#ifdef USE_DISTRIBUTED static PyTypeObject SendRpcBackwardClass; addClass( module, SendRpcBackwardClass, "SendRpcBackward"); +#endif static PyTypeObject CopySlicesClass; addClass(module, CopySlicesClass, "CopySlices"); diff --git a/torch/csrc/distributed/c10d/HashStore.cpp b/torch/csrc/distributed/c10d/HashStore.cpp index 1055afc4847d0..15befd9ec34e2 100644 --- a/torch/csrc/distributed/c10d/HashStore.cpp +++ b/torch/csrc/distributed/c10d/HashStore.cpp @@ -1,5 +1,6 @@ #include +#include #include #include diff --git a/torch/csrc/distributed/c10d/Work.cpp b/torch/csrc/distributed/c10d/Work.cpp index 2c1ee42727d8a..cdec9185ce537 100644 --- a/torch/csrc/distributed/c10d/Work.cpp +++ b/torch/csrc/distributed/c10d/Work.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp index 128fab6593b37..7e79fef8392f6 100644 --- a/torch/csrc/distributed/c10d/init.cpp +++ b/torch/csrc/distributed/c10d/init.cpp @@ -46,7 +46,6 @@ #include #include #include - #include #include diff --git a/torch/csrc/inductor/aoti_torch/shim_cpu.cpp b/torch/csrc/inductor/aoti_torch/shim_cpu.cpp index a610685fe9557..b1c864bf3fbba 100644 --- a/torch/csrc/inductor/aoti_torch/shim_cpu.cpp +++ b/torch/csrc/inductor/aoti_torch/shim_cpu.cpp @@ -1,5 +1,7 @@ +#ifdef USE_DISTRIBUTED #include +#endif #include #include @@ -531,6 +533,7 @@ AOTITorchError aoti_torch_cpu__weight_int4pack_mm_cpu_tensor( }); } +#ifdef USE_DISTRIBUTED AOTITorchError aoti_torch_cpu__c10d_functional_all_reduce_( AtenTensorHandle inp, const char* reduce_op, @@ -563,3 +566,4 @@ AOTITorchError aoti_torch_cpu__c10d_functional_wait_tensor( *ret0 = new_tensor_handle(std::move(tmp_result)); }); } +#endif diff --git a/torch/csrc/jit/python/pybind_utils.h b/torch/csrc/jit/python/pybind_utils.h index 2c0c1ea4b9cf2..5ae84e3e0c68b 100644 --- a/torch/csrc/jit/python/pybind_utils.h +++ b/torch/csrc/jit/python/pybind_utils.h @@ -13,8 +13,6 @@ #include #include #include -#include -#include #include #include #include @@ -26,6 +24,10 @@ #include #include #include +#ifdef USE_DISTRIBUTED +#include +#include +#endif #include #include diff --git a/torch/csrc/jit/python/python_sugared_value.cpp b/torch/csrc/jit/python/python_sugared_value.cpp index 808fe7d3605ba..8b16e089aa50e 100644 --- a/torch/csrc/jit/python/python_sugared_value.cpp +++ b/torch/csrc/jit/python/python_sugared_value.cpp @@ -1225,7 +1225,7 @@ std::shared_ptr toSugaredValue( } else if (obj.ptr() == py::module::import("torch").attr("_check").ptr()) { return std::make_shared(); #ifdef USE_RPC - // This is not defined on WINDOWS + // RPC module is only available when build flag "USE_DISTRIBUTED" is on. } else if ( isRpcAvailable && obj.ptr() == @@ -1238,6 +1238,7 @@ std::shared_ptr toSugaredValue( return SpecialFormValue::create(prim::rpc_sync); } else if ( isRpcAvailable && + // RPC module is only available when build flag "USE_DISTRIBUTED" is on. obj.ptr() == py::module::import("torch.distributed.rpc").attr("remote").ptr()) { return SpecialFormValue::create(prim::rpc_remote); diff --git a/torch/csrc/jit/runtime/interpreter.h b/torch/csrc/jit/runtime/interpreter.h index be582cfb7cdd8..6ae9f52a0cda2 100644 --- a/torch/csrc/jit/runtime/interpreter.h +++ b/torch/csrc/jit/runtime/interpreter.h @@ -128,8 +128,13 @@ struct InterpreterContinuation { std::optional tls_state = std::nullopt) : state(std::move(state_)), stack(std::move(stack_)), - tls_state_(std::move(tls_state)), - dist_autograd_context_id_(dist_autograd_context_id) {} + tls_state_(std::move(tls_state)) +#ifdef USE_DISTRIBUTED + , + dist_autograd_context_id_(dist_autograd_context_id) +#endif + { + } void operator()(); @@ -137,10 +142,9 @@ struct InterpreterContinuation { InterpreterState state; Stack stack; std::optional tls_state_ = std::nullopt; -#ifndef USE_RPC - [[maybe_unused]] -#endif +#ifdef USE_DISTRIBUTED int64_t dist_autograd_context_id_; +#endif }; // what is the tensors type, including state from the current execution context diff --git a/torch/csrc/jit/serialization/pickler.h b/torch/csrc/jit/serialization/pickler.h index e3379f4de65ac..526c840bc10e8 100644 --- a/torch/csrc/jit/serialization/pickler.h +++ b/torch/csrc/jit/serialization/pickler.h @@ -79,7 +79,9 @@ class TORCH_API Pickler { void pushTuple(const IValue& ivalue); void pushString(const std::string& string); void pushDevice(const IValue& ivalue); +#ifdef USE_DISTRIBUTED void pushRRef(const IValue& ivalue); +#endif // unmemoized version void pushStringImpl(const std::string& string); void pushStorageOfTensor(const at::Tensor& tensor); diff --git a/torch/csrc/jit/serialization/unpickler.h b/torch/csrc/jit/serialization/unpickler.h index 208cf554ad2bb..702a1d8816e7f 100644 --- a/torch/csrc/jit/serialization/unpickler.h +++ b/torch/csrc/jit/serialization/unpickler.h @@ -140,7 +140,9 @@ class TORCH_API Unpickler { void rebuildParameter(); void rebuildTensorFromTypeV2(); void rebuildSparseTensor(); +#ifdef USE_DISTRIBUTED void rebuildRRef(); +#endif PickleOpCode readInstruction(); PickleOpCode readOpCode() { return static_cast(read()); diff --git a/torch/csrc/profiler/standalone/execution_trace_observer.cpp b/torch/csrc/profiler/standalone/execution_trace_observer.cpp index e46c141cd3f4d..1c88e80d4021c 100644 --- a/torch/csrc/profiler/standalone/execution_trace_observer.cpp +++ b/torch/csrc/profiler/standalone/execution_trace_observer.cpp @@ -30,12 +30,15 @@ #include #include +#ifdef USE_DISTRIBUTED #include +#endif // USE_DISTRIBUTED using namespace at; // Collective property attributes // https://github.com/pytorch/pytorch/issues/124674 +#ifdef USE_DISTRIBUTED constexpr auto kETCommsName = "collective_name"; constexpr auto kETInMsgNelems = "in_msg_nelems"; constexpr auto kETOutMsgNelems = "out_msg_nelems"; @@ -46,6 +49,7 @@ constexpr auto kETGlobalRankStride = "global_rank_stride"; constexpr auto kETGroupSize = "pg_size"; constexpr auto kETProcessGroupName = "pg_name"; constexpr auto kETProcessGroupDesc = "pg_desc"; +#endif // USE_DISTRIBUTED namespace torch::profiler::impl { @@ -265,6 +269,7 @@ static std::ofstream openOutputFile(const std::string& name) { return stream; } +#ifdef USE_DISTRIBUTED static std::string getAttrJson( const std::string& name, const std::string& type, @@ -277,6 +282,7 @@ static std::string getAttrJson( type, value); } +#endif static void writeJsonNode( std::ofstream& out, @@ -654,6 +660,7 @@ static void handleKernelBackendInfo( inline std::string getCommsNodeAttrs(const RecordFunction& fn) { // NOLINT std::vector attrs; +#ifdef USE_DISTRIBUTED // We rely on paramcommsdebug object that is available in thread local info auto debugInfo = dynamic_cast( c10::ThreadLocalDebugInfo::get(c10::DebugInfoKind::PARAM_COMMS_INFO)); @@ -697,6 +704,8 @@ inline std::string getCommsNodeAttrs(const RecordFunction& fn) { // NOLINT addAttr(kGroupSize, kETGroupSize, "uint64"); +#endif // USE_DISTRIBUTED + // XXX consider using as string stream? return attrs.empty() ? "" : fmt::format(", {}", fmt::join(attrs, ", ")); } diff --git a/torch/csrc/profiler/util.cpp b/torch/csrc/profiler/util.cpp index e97699a99fd1c..0b2979e6fb7ea 100644 --- a/torch/csrc/profiler/util.cpp +++ b/torch/csrc/profiler/util.cpp @@ -11,7 +11,9 @@ #ifdef USE_KINETO #include #endif +#ifdef USE_DISTRIBUTED #include +#endif // USE_DISTRIBUTED namespace torch::profiler::impl { @@ -453,7 +455,7 @@ std::unordered_map saveNcclMeta( // @lint-ignore CLANGTIDY const SaveNcclMetaConfig& config) { std::unordered_map map; -#if !defined(BUILD_LITE_INTERPRETER) && !defined(C10_MOBILE) +#ifdef USE_DISTRIBUTED auto debugInfo = dynamic_cast( c10::ThreadLocalDebugInfo::get(c10::DebugInfoKind::PARAM_COMMS_INFO)); @@ -563,7 +565,7 @@ std::unordered_map saveNcclMeta( } } } -#endif // !defined(BUILD_LITE_INTERPRETER) && !defined(C10_MOBILE) +#endif // USE_DISTRIBUTED return map; } diff --git a/torch/csrc/profiler/util.h b/torch/csrc/profiler/util.h index dcb4b866a2de3..f2ae57fa0e591 100644 --- a/torch/csrc/profiler/util.h +++ b/torch/csrc/profiler/util.h @@ -185,6 +185,7 @@ struct HashCombine { } }; +#ifdef USE_DISTRIBUTED constexpr auto kCommsName = "Collective name"; constexpr auto kDtype = "dtype"; constexpr auto kInMsgNelems = "In msg nelems"; @@ -202,5 +203,6 @@ constexpr auto kP2pSrc = "Src Rank"; constexpr auto kP2pDst = "Dst Rank"; constexpr auto kInTensorsStart = "Input Tensors start"; constexpr auto kOutTensorsStart = "Output Tensors start"; +#endif // USE_DISTRIBUTED } // namespace torch::profiler::impl diff --git a/torch/distributed/_C_stubs.py b/torch/distributed/_C_stubs.py deleted file mode 100644 index b241006372b6a..0000000000000 --- a/torch/distributed/_C_stubs.py +++ /dev/null @@ -1,150 +0,0 @@ -# mypy: allow-untyped-defs -""" -Python stubs for backend-specific distributed components. - -Since _C._distributed_c10d always exists now, this module only provides -stubs for backend-specific functionality that may not be available in all builds -(e.g., NCCL, UCC, MPI, Gloo, etc.). -""" - -from __future__ import annotations - -from typing import Optional, TYPE_CHECKING - -from torch._C._distributed_c10d import Store - - -if TYPE_CHECKING: - from datetime import timedelta - -import torch - - -# Store classes -class HashStore(Store): - """Stub HashStore for builds without this functionality.""" - - def __init__(self, *args, **kwargs): - self._data = {} - - def set(self, key: str, value: str): - self._data[key] = value - - def get(self, key: str) -> bytes: - return self._data.get(key, "").encode() - - -# Backend-specific process group stubs -class ProcessGroupMPI: - """Stub ProcessGroupMPI for non-MPI builds.""" - - def __init__(self, *args, **kwargs): - pass - - -class ProcessGroupNCCL: - """Stub ProcessGroupNCCL for non-NCCL builds.""" - - def __init__(self, *args, **kwargs): - pass - - -class ProcessGroupGloo: - """Stub ProcessGroupGloo for non-Gloo builds.""" - - def __init__(self, *args, **kwargs): - pass - - -class ProcessGroupUCC: - """Stub ProcessGroupUCC for non-UCC builds.""" - - def __init__(self, *args, **kwargs): - pass - - -class ProcessGroupXCCL: - """Stub ProcessGroupXCCL for non-XCCL builds.""" - - def __init__(self, *args, **kwargs): - pass - - -class _ProcessGroupWrapper: - """Stub _ProcessGroupWrapper for non-Gloo builds.""" - - def __init__(self, process_group, *args, **kwargs): - self._process_group = process_group - - def __getattr__(self, name): - return getattr(self._process_group, name) - - -# NCCL-specific function stubs -_DEFAULT_PG_NCCL_TIMEOUT: Optional[timedelta] = None - - -def _hash_tensors(tensors): - """Stub function to hash tensors - returns dummy hash.""" - return 0 - - -def _dump_nccl_trace_json( - includeCollectives: Optional[bool] = None, onlyActive: Optional[bool] = None -) -> bytes: - """Stub function that returns empty JSON trace.""" - return b"{}" - - -def _dump_nccl_trace( - includeCollectives: Optional[bool] = None, - includeStackTraces: Optional[bool] = None, - onlyActive: Optional[bool] = None, -) -> bytes: - """Stub function that returns empty pickle trace.""" - return b"" - - -# NVSHMEM/SymmetricMemory stubs -def _is_nvshmem_available() -> bool: - """Stub function that returns False indicating NVSHMEM is not available.""" - return False - - -def _nvshmemx_cumodule_init(module: int) -> None: - """Stub function for NVSHMEM CU module initialization.""" - - -class _SymmetricMemory: - """Stub _SymmetricMemory class for builds without this functionality.""" - - def __init__(self, *args, **kwargs): - pass - - @classmethod - def empty_strided_p2p(cls, size, stride, dtype, device, group_name=None): - """Stub that returns a regular tensor.""" - return torch.empty(size, dtype=dtype, device=device) - - @classmethod - def rendezvous(cls, tensor, group_name=None): - """Stub that returns None.""" - return None - - @classmethod - def set_group_info(cls, *args, **kwargs): - """Stub that does nothing.""" - - @classmethod - def set_backend(cls, name): - """Stub that does nothing.""" - - @classmethod - def get_backend(cls, device): - """Stub that returns None.""" - return None - - @classmethod - def has_multicast_support(cls, device_type, device_index): - """Stub that returns False.""" - return False diff --git a/torch/distributed/__init__.py b/torch/distributed/__init__.py index 836b00c51c3a4..38e2fdbee803a 100644 --- a/torch/distributed/__init__.py +++ b/torch/distributed/__init__.py @@ -14,10 +14,16 @@ def is_available() -> bool: """ - Always returns ``True``. Note that even if distributed is available, - there may not necessarily be any usable backends. + Return ``True`` if the distributed package is available. + + Otherwise, + ``torch.distributed`` does not expose any other APIs. Currently, + ``torch.distributed`` is available on Linux, MacOS and Windows. Set + ``USE_DISTRIBUTED=1`` to enable it when building PyTorch from source. + Currently, the default value is ``USE_DISTRIBUTED=1`` for Linux and Windows, + ``USE_DISTRIBUTED=0`` for MacOS. """ - return True + return hasattr(torch._C, "_c10d_init") if is_available() and not torch._C._c10d_init(): @@ -30,124 +36,132 @@ def is_available() -> bool: DistStoreError = torch._C._DistStoreError QueueEmptyError = torch._C._DistQueueEmptyError -from torch.distributed._distributed_c10d import ( - _broadcast_coalesced, - _compute_bucket_assignment_by_size, - _ControlCollectives, - _DEFAULT_FIRST_BUCKET_BYTES, - _make_nccl_premul_sum, - _register_builtin_comm_hook, - _register_comm_hook, - _StoreCollectives, - _test_python_store, - _verify_params_across_processes, - Backend as _Backend, - BuiltinCommHookType, - DebugLevel, - FileStore, - get_debug_level, - GradBucket, - Logger, - PrefixStore, - ProcessGroup as ProcessGroup, - Reducer, - set_debug_level, - set_debug_level_from_env, - Store, - TCPStore, - Work as _Work, -) - - -class _DistributedPdb(pdb.Pdb): - """ - Supports using PDB from inside a multiprocessing child process. - - Usage: - _DistributedPdb().set_trace() - """ - - def interaction(self, *args, **kwargs): - _stdin = sys.stdin +if is_available(): + from torch._C._distributed_c10d import ( + _broadcast_coalesced, + _compute_bucket_assignment_by_size, + _ControlCollectives, + _DEFAULT_FIRST_BUCKET_BYTES, + _make_nccl_premul_sum, + _register_builtin_comm_hook, + _register_comm_hook, + _StoreCollectives, + _test_python_store, + _verify_params_across_processes, + Backend as _Backend, + BuiltinCommHookType, + DebugLevel, + FileStore, + get_debug_level, + GradBucket, + Logger, + PrefixStore, + ProcessGroup as ProcessGroup, + Reducer, + set_debug_level, + set_debug_level_from_env, + Store, + TCPStore, + Work as _Work, + ) + + class _DistributedPdb(pdb.Pdb): + """ + Supports using PDB from inside a multiprocessing child process. + + Usage: + _DistributedPdb().set_trace() + """ + + def interaction(self, *args, **kwargs): + _stdin = sys.stdin + try: + sys.stdin = open("/dev/stdin") + pdb.Pdb.interaction(self, *args, **kwargs) + finally: + sys.stdin = _stdin + + _breakpoint_cache: dict[int, typing.Any] = {} + + def breakpoint(rank: int = 0, skip: int = 0, timeout_s=3600): + """ + Set a breakpoint, but only on a single rank. All other ranks will wait for you to be + done with the breakpoint before continuing. + + Args: + rank (int): Which rank to break on. Default: ``0`` + skip (int): Skip the first ``skip`` calls to this breakpoint. Default: ``0``. + """ + if skip > 0: + key = hash(str(traceback.format_exc())) + counter = _breakpoint_cache.get(key, 0) + 1 + _breakpoint_cache[key] = counter + if counter <= skip: + log.warning("Skip the breakpoint, counter=%d", counter) + return + + # avoid having the default timeout (if short) interrupt your debug session + if timeout_s is not None: + for group in torch.distributed.distributed_c10d._pg_map: + torch.distributed.distributed_c10d._set_pg_timeout( + timedelta(seconds=timeout_s), group + ) + + if get_rank() == rank: + pdb = _DistributedPdb() + pdb.message( + "\n!!! ATTENTION !!!\n\n" + f"Type 'up' to get to the frame that called dist.breakpoint(rank={rank})\n" + ) + pdb.set_trace() + # If Meta/Python keys are in the TLS, we want to make sure that we ignore them + # and hit the (default) CPU/CUDA implementation of barrier. + meta_in_tls = torch._C._meta_in_tls_dispatch_include() + guard = torch._C._DisableTorchDispatch() # type: ignore[attr-defined] + torch._C._set_meta_in_tls_dispatch_include(False) try: - sys.stdin = open("/dev/stdin") - pdb.Pdb.interaction(self, *args, **kwargs) + barrier() finally: - sys.stdin = _stdin - - -_breakpoint_cache: dict[int, typing.Any] = {} - - -def breakpoint(rank: int = 0, skip: int = 0, timeout_s=3600): - """ - Set a breakpoint, but only on a single rank. All other ranks will wait for you to be - done with the breakpoint before continuing. - - Args: - rank (int): Which rank to break on. Default: ``0`` - skip (int): Skip the first ``skip`` calls to this breakpoint. Default: ``0``. - """ - if skip > 0: - key = hash(str(traceback.format_exc())) - counter = _breakpoint_cache.get(key, 0) + 1 - _breakpoint_cache[key] = counter - if counter <= skip: - log.warning("Skip the breakpoint, counter=%d", counter) - return - - # avoid having the default timeout (if short) interrupt your debug session - if timeout_s is not None: - for group in torch.distributed.distributed_c10d._pg_map: - torch.distributed.distributed_c10d._set_pg_timeout( - timedelta(seconds=timeout_s), group - ) - - if get_rank() == rank: - pdb = _DistributedPdb() - pdb.message( - "\n!!! ATTENTION !!!\n\n" - f"Type 'up' to get to the frame that called dist.breakpoint(rank={rank})\n" - ) - pdb.set_trace() - # If Meta/Python keys are in the TLS, we want to make sure that we ignore them - # and hit the (default) CPU/CUDA implementation of barrier. - meta_in_tls = torch._C._meta_in_tls_dispatch_include() - guard = torch._C._DisableTorchDispatch() # type: ignore[attr-defined] - torch._C._set_meta_in_tls_dispatch_include(False) - try: - barrier() - finally: - torch._C._set_meta_in_tls_dispatch_include(meta_in_tls) - del guard - - -if sys.platform != "win32": - from torch.distributed._distributed_c10d import HashStore - -from .device_mesh import DeviceMesh, init_device_mesh - -# Variables prefixed with underscore are not auto imported -# See the comment in `distributed_c10d.py` above `_backend` on why we expose -# this. -from .distributed_c10d import * # noqa: F403 -from .distributed_c10d import ( - _all_gather_base, - _coalescing_manager, - _CoalescingManager, - _create_process_group_wrapper, - _get_process_group_name, - _rank_not_in_group, - _reduce_scatter_base, - _time_estimator, - get_node_local_rank, -) -from .remote_device import _remote_device -from .rendezvous import ( - _create_store_from_options, - register_rendezvous_handler, - rendezvous, -) - - -set_debug_level_from_env() + torch._C._set_meta_in_tls_dispatch_include(meta_in_tls) + del guard + + if sys.platform != "win32": + from torch._C._distributed_c10d import HashStore + + from .device_mesh import DeviceMesh, init_device_mesh + + # Variables prefixed with underscore are not auto imported + # See the comment in `distributed_c10d.py` above `_backend` on why we expose + # this. + from .distributed_c10d import * # noqa: F403 + from .distributed_c10d import ( + _all_gather_base, + _coalescing_manager, + _CoalescingManager, + _create_process_group_wrapper, + _get_process_group_name, + _rank_not_in_group, + _reduce_scatter_base, + _time_estimator, + get_node_local_rank, + ) + from .remote_device import _remote_device + from .rendezvous import ( + _create_store_from_options, + register_rendezvous_handler, + rendezvous, + ) + + set_debug_level_from_env() + +else: + # This stub is sufficient to get + # python test/test_public_bindings.py -k test_correct_module_names + # working even when USE_DISTRIBUTED=0. Feel free to add more + # stubs as necessary. + # We cannot define stubs directly because they confuse pyre + + class _ProcessGroupStub: + pass + + sys.modules["torch.distributed"].ProcessGroup = _ProcessGroupStub # type: ignore[attr-defined] diff --git a/torch/distributed/_dist2.py b/torch/distributed/_dist2.py index 1c27bf55d6834..ce5cb8d7e0cc3 100644 --- a/torch/distributed/_dist2.py +++ b/torch/distributed/_dist2.py @@ -10,7 +10,7 @@ from typing import Protocol, Union import torch -from torch.distributed._distributed_c10d import ( +from torch._C._distributed_c10d import ( _current_process_group, _set_process_group, ProcessGroup, diff --git a/torch/distributed/_distributed_c10d.py b/torch/distributed/_distributed_c10d.py deleted file mode 100644 index beb7830edc1da..0000000000000 --- a/torch/distributed/_distributed_c10d.py +++ /dev/null @@ -1,245 +0,0 @@ -# mypy: disable-error-code="assignment" -# noqa: F401 -""" -Centralized module for importing and re-exporting torch._C._distributed_c10d components. - -IMPORTANT PATTERN: -Never access torch._C._distributed_c10d directly in code. Always import from and use -torch.distributed._distributed_c10d which is guaranteed to have all functions available. - -Example: - # WRONG: torch._C._distributed_c10d._set_global_rank(rank) - # RIGHT: - from torch.distributed._distributed_c10d import _set_global_rank - _set_global_rank(rank) -""" - -from typing import TYPE_CHECKING - -# Import all core distributed components from the C extension -# NB: This list has to be spelled out because the _C module doesn't have __all__ -from torch._C._distributed_c10d import ( - _allow_inflight_collective_as_graph_input, - _broadcast_coalesced, - _compute_bucket_assignment_by_size, - _ControlCollectives, - _current_process_group, - _DEFAULT_FIRST_BUCKET_BYTES, - _DEFAULT_PG_TIMEOUT, - _DistributedBackendOptions, - _make_nccl_premul_sum, - _register_builtin_comm_hook, - _register_comm_hook, - _register_process_group, - _register_work, - _resolve_process_group, - _set_allow_inflight_collective_as_graph_input, - _set_global_rank, - _set_process_group, - _StoreCollectives, - _test_python_store, - _unregister_all_process_groups, - _unregister_process_group, - _verify_params_across_processes, - _WorkerServer, - AllgatherOptions, - AllreduceCoalescedOptions, - AllreduceOptions, - AllToAllOptions, - Backend, - BarrierOptions, - BroadcastOptions, - BuiltinCommHookType, - DebugLevel, - FakeProcessGroup, - FakeWork, - FileStore, - GatherOptions, - get_debug_level, - GradBucket, - Logger, - PrefixStore, - ProcessGroup, - ReduceOp, - ReduceOptions, - Reducer, - ReduceScatterOptions, - ScatterOptions, - set_debug_level, - set_debug_level_from_env, - Store, - TCPStore, - Work, -) - - -# Backend-specific components that may not be available -_MPI_AVAILABLE = False -_NCCL_AVAILABLE = False -_GLOO_AVAILABLE = False -_UCC_AVAILABLE = False -_XCCL_AVAILABLE = False - -# HashStore -try: - from torch._C._distributed_c10d import HashStore -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import HashStore - -# NVSHMEM/SymmetricMemory components - -# There are multiple backends for SymmetricMemory, as a result, -# _SymmetricMemory should not be imported together with NVSHMEM related modules. -try: - from torch._C._distributed_c10d import _SymmetricMemory -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import _SymmetricMemory - -try: - from torch._C._distributed_c10d import ( - _is_nvshmem_available, - _nvshmemx_cumodule_init, - ) -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import ( - _is_nvshmem_available, - _nvshmemx_cumodule_init, - ) - -# MPI backend -try: - from torch._C._distributed_c10d import ProcessGroupMPI - - _MPI_AVAILABLE = True -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import ProcessGroupMPI - -# NCCL backend -try: - from torch._C._distributed_c10d import ( - _DEFAULT_PG_NCCL_TIMEOUT, - _dump_nccl_trace, - _dump_nccl_trace_json, - _hash_tensors, - ProcessGroupNCCL, - ) - - _NCCL_AVAILABLE = True -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import ( - _DEFAULT_PG_NCCL_TIMEOUT, - _dump_nccl_trace, - _dump_nccl_trace_json, - _hash_tensors, - ProcessGroupNCCL, - ) - -# Gloo backend -try: - from torch._C._distributed_c10d import _ProcessGroupWrapper, ProcessGroupGloo - - _GLOO_AVAILABLE = True -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import _ProcessGroupWrapper, ProcessGroupGloo - -# UCC backend -try: - from torch._C._distributed_c10d import ProcessGroupUCC - - _UCC_AVAILABLE = True -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import ProcessGroupUCC - -# XCCL backend -try: - from torch._C._distributed_c10d import ProcessGroupXCCL - - _XCCL_AVAILABLE = True -except ImportError: - if not TYPE_CHECKING: - from torch.distributed._C_stubs import ProcessGroupXCCL - -# Provide backwards compatibility by making all symbols available at module level -__all__ = [ - # Basic components - "_broadcast_coalesced", - "_compute_bucket_assignment_by_size", - "_ControlCollectives", - "_DEFAULT_FIRST_BUCKET_BYTES", - "_DEFAULT_PG_TIMEOUT", - "_DEFAULT_PG_NCCL_TIMEOUT", - "_make_nccl_premul_sum", - "_register_builtin_comm_hook", - "_register_comm_hook", - "_StoreCollectives", - "_test_python_store", - "_verify_params_across_processes", - "_allow_inflight_collective_as_graph_input", - "_register_work", - "_set_allow_inflight_collective_as_graph_input", - "_is_nvshmem_available", - "_nvshmemx_cumodule_init", - "_SymmetricMemory", - "_hash_tensors", - "_set_global_rank", - "_dump_nccl_trace", - "_dump_nccl_trace_json", - "Backend", - "BuiltinCommHookType", - "DebugLevel", - "FakeProcessGroup", - "FileStore", - "get_debug_level", - "GradBucket", - "HashStore", - "Logger", - "PrefixStore", - "ProcessGroup", - "Reducer", - "ReduceOp", - "set_debug_level", - "set_debug_level_from_env", - "Store", - "TCPStore", - "Work", - "FakeWork", - # Additional distributed_c10d components - "_DistributedBackendOptions", - "_register_process_group", - "_resolve_process_group", - "_unregister_all_process_groups", - "_unregister_process_group", - "_current_process_group", - "_set_process_group", - "_WorkerServer", - "AllgatherOptions", - "AllreduceCoalescedOptions", - "AllreduceOptions", - "AllToAllOptions", - "BarrierOptions", - "BroadcastOptions", - "GatherOptions", - "ReduceOptions", - "ReduceScatterOptions", - "ScatterOptions", - # Process group implementations - "ProcessGroupMPI", - "ProcessGroupNCCL", - "ProcessGroupGloo", - "ProcessGroupUCC", - "ProcessGroupXCCL", - "_ProcessGroupWrapper", - # Availability flags - "_MPI_AVAILABLE", - "_NCCL_AVAILABLE", - "_GLOO_AVAILABLE", - "_UCC_AVAILABLE", - "_XCCL_AVAILABLE", -] diff --git a/torch/distributed/_functional_collectives.py b/torch/distributed/_functional_collectives.py index 95feb6cd79714..c893794fc3011 100644 --- a/torch/distributed/_functional_collectives.py +++ b/torch/distributed/_functional_collectives.py @@ -7,10 +7,6 @@ import torch import torch.distributed as dist import torch.distributed.distributed_c10d as c10d -from torch.distributed._distributed_c10d import ( - _allow_inflight_collective_as_graph_input, - _set_allow_inflight_collective_as_graph_input, -) from torch.distributed.device_mesh import DeviceMesh from torch.fx.experimental.proxy_tensor import get_proxy_mode @@ -862,13 +858,15 @@ def all_reduce_wait_compiled(y): will be registered in the work registry, and the wait_tensor() in compiled region called on the output tensor of the collective will wait on the correct work object. """ - previous = _allow_inflight_collective_as_graph_input() + previous = torch._C._distributed_c10d._allow_inflight_collective_as_graph_input() try: - _set_allow_inflight_collective_as_graph_input(value) + torch._C._distributed_c10d._set_allow_inflight_collective_as_graph_input(value) yield finally: - _set_allow_inflight_collective_as_graph_input(previous) + torch._C._distributed_c10d._set_allow_inflight_collective_as_graph_input( + previous + ) def _make_all_gather_out_tensor(input, group_size): diff --git a/torch/distributed/_shard/sharded_tensor/reshard.py b/torch/distributed/_shard/sharded_tensor/reshard.py index 2bc3d65e5c8cb..daef9c3586184 100644 --- a/torch/distributed/_shard/sharded_tensor/reshard.py +++ b/torch/distributed/_shard/sharded_tensor/reshard.py @@ -4,7 +4,7 @@ import torch import torch.distributed as dist import torch.distributed._shard.sharding_spec as shard_spec -from torch.distributed._distributed_c10d import ProcessGroup +from torch._C._distributed_c10d import ProcessGroup from torch.distributed._shard.metadata import ShardMetadata from torch.distributed._shard.sharding_spec._internals import ( get_chunked_dim_size, diff --git a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py index f02563619d2fa..61808d0adf62a 100644 --- a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py +++ b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py @@ -4,7 +4,7 @@ import torch import torch.distributed as dist -from torch.distributed._distributed_c10d import ReduceOp +from torch._C._distributed_c10d import ReduceOp from torch.distributed._shard.sharded_tensor import ShardedTensor from torch.distributed._shard.sharding_spec import ChunkShardingSpec from torch.distributed._shard.sharding_spec.api import custom_sharding_spec_op diff --git a/torch/distributed/_symmetric_memory/__init__.py b/torch/distributed/_symmetric_memory/__init__.py index 8154cd9809139..43c2959fdd8d1 100644 --- a/torch/distributed/_symmetric_memory/__init__.py +++ b/torch/distributed/_symmetric_memory/__init__.py @@ -15,12 +15,7 @@ import torch.distributed._functional_collectives as funcol import torch.distributed.distributed_c10d as c10d from torch._C._autograd import DeviceType -from torch.distributed._distributed_c10d import ( - _register_work, - _SymmetricMemory, - ProcessGroup, - Work as _Work, -) +from torch._C._distributed_c10d import _SymmetricMemory, Work as _Work _group_name_to_store: dict[str, c10d.Store] = {} @@ -1493,7 +1488,7 @@ def _low_contention_all_gather( src_buf = symm_mem.get_buffer(remote_rank, tensor.shape, tensor.dtype) chunks[remote_rank].copy_(src_buf) symm_mem.barrier() - _register_work(output, Work()) + torch._C._distributed_c10d._register_work(output, Work()) return output @@ -1541,7 +1536,7 @@ def _low_contention_reduce_scatter_with_symm_mem_input( ret = ret.mean(dim=0) else: raise ValueError(f"reduce_op ({reduce_op}) is not supported") - _register_work(ret, Work()) + torch._C._distributed_c10d._register_work(ret, Work()) return ret @@ -1576,7 +1571,7 @@ def _low_contention_reduce_scatter_with_workspace( ret = ret.mean(dim=0) else: raise ValueError(f"reduce_op ({reduce_op}) is not supported") - _register_work(ret, Work()) + torch._C._distributed_c10d._register_work(ret, Work()) return ret @@ -1654,6 +1649,7 @@ def _all_to_all_vdev_2d_offset_meta( if TYPE_CHECKING: + from torch._C._distributed_c10d import ProcessGroup from torch.types import _device, _dtype, _int @@ -1731,6 +1727,8 @@ def rendezvous( group (Union[str, :class:`torch.distributed.ProcessGroup`]): The group identifying the participating processes. This can be either a group name or a process group object. """ + from torch._C._distributed_c10d import ProcessGroup + if isinstance(group, str): group_name = group elif isinstance(group, ProcessGroup): @@ -1748,7 +1746,11 @@ def is_nvshmem_available() -> bool: Check if NVSHMEM is available in current build and on current system. """ - from torch.distributed._distributed_c10d import _is_nvshmem_available + try: + from torch._C._distributed_c10d import _is_nvshmem_available + except ImportError: + # Not all builds have NVSHMEM support. + return False # Check if NVSHMEM is available on current system. return _is_nvshmem_available() diff --git a/torch/distributed/_symmetric_memory/_nvshmem_triton.py b/torch/distributed/_symmetric_memory/_nvshmem_triton.py index 7b7828227d7d1..c543fdffc1c76 100644 --- a/torch/distributed/_symmetric_memory/_nvshmem_triton.py +++ b/torch/distributed/_symmetric_memory/_nvshmem_triton.py @@ -75,7 +75,7 @@ def enable_triton(lib_dir: Optional[str] = None) -> dict[str, str]: """ import triton - from torch.distributed._distributed_c10d import _nvshmemx_cumodule_init + from torch._C._distributed_c10d import _nvshmemx_cumodule_init if lib_dir is not None: lib_path = os.path.join(lib_dir, "libnvshmem_device.bc") diff --git a/torch/distributed/_tools/fake_collectives.py b/torch/distributed/_tools/fake_collectives.py index b89970ab33480..3b201b395334b 100644 --- a/torch/distributed/_tools/fake_collectives.py +++ b/torch/distributed/_tools/fake_collectives.py @@ -2,9 +2,7 @@ from typing import Any import torch - -# Import centralized distributed components -from torch.distributed._distributed_c10d import ( +from torch._C._distributed_c10d import ( _resolve_process_group, FakeWork, ProcessGroup, diff --git a/torch/distributed/algorithms/model_averaging/utils.py b/torch/distributed/algorithms/model_averaging/utils.py index 3e3243002a9c0..fa8cc184eddc5 100644 --- a/torch/distributed/algorithms/model_averaging/utils.py +++ b/torch/distributed/algorithms/model_averaging/utils.py @@ -5,6 +5,10 @@ import torch import torch.distributed as dist + +# The two imports below are not always available depending on the +# USE_DISTRIBUTED compile flag. Make sure they raise import error +# if we're trying to use them. from torch.distributed import group, ProcessGroup diff --git a/torch/distributed/constants.py b/torch/distributed/constants.py index bfa8785218645..c1e604bc86753 100644 --- a/torch/distributed/constants.py +++ b/torch/distributed/constants.py @@ -1,11 +1,7 @@ from datetime import timedelta from typing import Optional -# Import from centralized fallback module - no ImportError handling needed -from torch.distributed._distributed_c10d import ( - _DEFAULT_PG_NCCL_TIMEOUT, - _DEFAULT_PG_TIMEOUT, -) +from torch._C._distributed_c10d import _DEFAULT_PG_TIMEOUT __all__ = ["default_pg_timeout", "default_pg_nccl_timeout"] @@ -20,4 +16,11 @@ # Later, we could consider merging them back together at the c++ layer if we can align on a same value. # (only if TORCH_NCCL_BLOCKING_WAIT or TORCH_NCCL_ASYNC_ERROR_HANDLING is set to 1). -default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT +try: + from torch._C._distributed_c10d import _DEFAULT_PG_NCCL_TIMEOUT + + default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT +except ImportError: + # if C++ NCCL support is not compiled, we don't have access to the default nccl value. + # if anyone is actually trying to use nccl in this state, it should error. + default_pg_nccl_timeout = None diff --git a/torch/distributed/device_mesh.py b/torch/distributed/device_mesh.py index 6ee9263db8cd4..3a9363090bf71 100644 --- a/torch/distributed/device_mesh.py +++ b/torch/distributed/device_mesh.py @@ -11,14 +11,35 @@ from typing import Optional, TYPE_CHECKING, Union import torch +from torch.distributed import is_available from torch.utils._typing_utils import not_none __all__ = ["init_device_mesh", "DeviceMesh"] -if True: # just to temporarily avoid reindentation - from torch.distributed._distributed_c10d import Backend as C10dBackend +if not is_available(): + import sys + + # We need to create the stubs when distributed is not available. + # Otherwise, we would fail the doc tests (```./.ci/pytorch/docs-test.sh```), + # since it would try to import ``torch.distributed.device_mesh`` or + # ``torch.distributed.init_device_mesh`` but cannot find them. + + class _DeviceMeshStub: + pass + + def _init_device_mesh_stub(): + pass + + sys.modules["torch.distributed.device_mesh"].DeviceMesh = _DeviceMeshStub # type: ignore[attr-defined] + sys.modules[ + "torch.distributed.device_mesh" + ].init_device_mesh = _init_device_mesh_stub # type: ignore[attr-defined] + + +else: + from torch._C._distributed_c10d import Backend as C10dBackend from torch.distributed.distributed_c10d import ( _get_default_group, _resolve_process_group, @@ -513,16 +534,15 @@ def _setup_world_group_and_device(self): # heuristic to set the current cuda/cuda-like device base on num of gpu devices available in each host # NOTE: This device selection would only work for homogeneous hardware. num_devices_per_host = device_handle.device_count() - if num_devices_per_host: - if ( - world_size > num_devices_per_host - and world_size % num_devices_per_host != 0 - ): - raise RuntimeError( - f"DeviceMesh only support homogeneous hardware, but found " - f"{world_size} ranks and {num_devices_per_host} {self.device_type} devices!" - ) - device_handle.set_device(get_rank() % num_devices_per_host) + if ( + world_size > num_devices_per_host + and world_size % num_devices_per_host != 0 + ): + raise RuntimeError( + f"DeviceMesh only support homogeneous hardware, but found " + f"{world_size} ranks and {num_devices_per_host} {self.device_type} devices!" + ) + device_handle.set_device(get_rank() % num_devices_per_host) return _get_default_group() diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index c81d9c60eb1fe..29609404df09b 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -19,21 +19,13 @@ from typing_extensions import deprecated import torch -import torch.distributed._distributed_c10d as _c10d from torch._C import _DistStoreError as DistStoreError -from torch._utils_internal import set_pytorch_distributed_envs_from_justknobs -from torch.distributed._distributed_c10d import ( # Process group implementations; Availability flags +from torch._C._distributed_c10d import ( _DistributedBackendOptions, - _GLOO_AVAILABLE, - _MPI_AVAILABLE, - _NCCL_AVAILABLE, - _ProcessGroupWrapper, _register_process_group, _resolve_process_group, - _UCC_AVAILABLE, _unregister_all_process_groups, _unregister_process_group, - _XCCL_AVAILABLE, AllgatherOptions, AllreduceCoalescedOptions, AllreduceOptions, @@ -45,11 +37,6 @@ get_debug_level, PrefixStore, ProcessGroup, - ProcessGroupGloo, - ProcessGroupMPI, - ProcessGroupNCCL, - ProcessGroupUCC, - ProcessGroupXCCL, ReduceOp, ReduceOptions, ReduceScatterOptions, @@ -57,6 +44,7 @@ Store, Work, ) +from torch._utils_internal import set_pytorch_distributed_envs_from_justknobs from torch.monitor import _WaitCounter from torch.overrides import handle_torch_function, has_torch_function from torch.utils._typing_utils import not_none @@ -143,11 +131,17 @@ "split_group", ] +_MPI_AVAILABLE = True +_NCCL_AVAILABLE = True +_GLOO_AVAILABLE = True +_UCC_AVAILABLE = True +_XCCL_AVAILABLE = True + _pickler = pickle.Pickler _unpickler = pickle.Unpickler -# Change __module__ of all imported types from the distributed wrapper that are public +# Change __module__ of all imported types from torch._C._distributed_c10d that are public def _export_c_types() -> None: _public_types_to_change_module = [ AllreduceCoalescedOptions, @@ -173,26 +167,45 @@ def _export_c_types() -> None: _export_c_types() -# Add process groups to __all__ and set their module based on availability -if _MPI_AVAILABLE: +try: + from torch._C._distributed_c10d import ProcessGroupMPI + ProcessGroupMPI.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupMPI"] +except ImportError: + _MPI_AVAILABLE = False + +try: + from torch._C._distributed_c10d import ProcessGroupNCCL -if _NCCL_AVAILABLE: ProcessGroupNCCL.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupNCCL"] +except ImportError: + _NCCL_AVAILABLE = False + +try: + from torch._C._distributed_c10d import _ProcessGroupWrapper, ProcessGroupGloo -if _GLOO_AVAILABLE: ProcessGroupGloo.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupGloo"] +except ImportError: + _GLOO_AVAILABLE = False + +try: + from torch._C._distributed_c10d import ProcessGroupUCC -if _UCC_AVAILABLE: ProcessGroupUCC.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupUCC"] +except ImportError: + _UCC_AVAILABLE = False + +try: + from torch._C._distributed_c10d import ProcessGroupXCCL -if _XCCL_AVAILABLE: ProcessGroupXCCL.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupXCCL"] +except ImportError: + _XCCL_AVAILABLE = False logger = logging.getLogger(__name__) @@ -1312,8 +1325,7 @@ def _get_default_store() -> Store: def _update_default_pg(pg) -> None: _world.default_pg = pg rank = pg.rank() if pg is not None and pg != GroupMember.NON_GROUP_MEMBER else -1 - - _c10d._set_global_rank(rank) + torch._C._distributed_c10d._set_global_rank(rank) def get_backend_config(group: Optional[ProcessGroup] = None) -> str: @@ -1950,7 +1962,7 @@ def _new_process_group_helper( if device_id: pg.bound_device_id = device_id - backend_class: _c10d.Backend + backend_class: torch._C._distributed_c10d.Backend for device, backend_str in backend_config.get_device_backend_map().items(): # Use the group name as prefix in the default store, such that # a single store can be reused by multiple groups. @@ -3065,9 +3077,7 @@ def _object_to_tensor(obj, device, group): if get_debug_level() == DebugLevel.DETAIL and is_nccl_available(): backend = get_backend(group) if backend == Backend.NCCL: - from torch.distributed._distributed_c10d import _hash_tensors - - hash = _hash_tensors([byte_tensor]) + hash = torch._C._distributed_c10d._hash_tensors([byte_tensor]) logger.warning( "_object_to_tensor size: %s hash value: %s", byte_tensor.numel(), @@ -3082,9 +3092,7 @@ def _tensor_to_object(tensor, tensor_size, group): if get_debug_level() == DebugLevel.DETAIL and is_nccl_available(): backend = get_backend(group) if backend == Backend.NCCL: - from torch.distributed._distributed_c10d import _hash_tensors - - hash = _hash_tensors([tensor]) + hash = torch._C._distributed_c10d._hash_tensors([tensor]) logger.warning( "_tensor_to_object size: %s hash value: %s", tensor.numel(), hash ) @@ -4961,7 +4969,7 @@ def monitored_barrier( def _create_process_group_wrapper( - wrapped_pg: _c10d.Backend, + wrapped_pg: torch._C._distributed_c10d.Backend, store_prefix: str, store: Store, rank: int, diff --git a/torch/distributed/elastic/control_plane.py b/torch/distributed/elastic/control_plane.py index 63334a0ca3f62..817255edd23dc 100644 --- a/torch/distributed/elastic/control_plane.py +++ b/torch/distributed/elastic/control_plane.py @@ -14,7 +14,7 @@ @contextmanager def _worker_server(socket_path: str) -> Generator[None, None, None]: - from torch.distributed._distributed_c10d import _WorkerServer + from torch._C._distributed_c10d import _WorkerServer server = _WorkerServer(socket_path) try: diff --git a/torch/distributed/nn/functional.py b/torch/distributed/nn/functional.py index 2bdf3fe2bdffd..eeff877260bcc 100644 --- a/torch/distributed/nn/functional.py +++ b/torch/distributed/nn/functional.py @@ -2,6 +2,10 @@ import torch import torch.distributed as dist from torch.autograd import Function + +# The two imports below are not always available depending on the +# USE_DISTRIBUTED compile flag. Make sure they raise import error +# if we're trying to use them. from torch.distributed import group, ReduceOp diff --git a/torch/distributed/rpc/__init__.py b/torch/distributed/rpc/__init__.py index 27a945a92e44c..adf901d6b6e3e 100644 --- a/torch/distributed/rpc/__init__.py +++ b/torch/distributed/rpc/__init__.py @@ -37,6 +37,7 @@ def is_available() -> bool: import numbers import torch.distributed.autograd as dist_autograd + from torch._C._distributed_c10d import Store from torch._C._distributed_rpc import ( # noqa: F401 _cleanup_python_rpc_handler, _DEFAULT_INIT_METHOD, @@ -69,7 +70,6 @@ def is_available() -> bool: RpcBackendOptions, WorkerInfo, ) - from torch.distributed._distributed_c10d import Store if _is_tensorpipe_available: from torch._C._distributed_rpc import ( # noqa: F401 diff --git a/torch/distributed/tensor/_collective_utils.py b/torch/distributed/tensor/_collective_utils.py index f01836c59592b..4fce6fea538a6 100644 --- a/torch/distributed/tensor/_collective_utils.py +++ b/torch/distributed/tensor/_collective_utils.py @@ -8,10 +8,8 @@ import torch import torch.distributed._functional_collectives as funcol import torch.distributed.tensor._dtensor_spec as dtensor_spec +from torch._C._distributed_c10d import _resolve_process_group from torch._logging import warning_once - -# Import from centralized fallback module - no conditional imports needed -from torch.distributed._distributed_c10d import _resolve_process_group from torch.distributed.device_mesh import _mesh_resources, DeviceMesh from torch.distributed.distributed_c10d import ( _get_group_size_by_name, diff --git a/torch/testing/_internal/distributed/fake_pg.py b/torch/testing/_internal/distributed/fake_pg.py index 035a8bb7c586d..0a2814c246459 100644 --- a/torch/testing/_internal/distributed/fake_pg.py +++ b/torch/testing/_internal/distributed/fake_pg.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import torch.distributed as dist -from torch.distributed._distributed_c10d import FakeProcessGroup +from torch._C._distributed_c10d import FakeProcessGroup class FakeStore(dist.Store): From 75de5b65b4e31aedf01317e576a985cd96524a88 Mon Sep 17 00:00:00 2001 From: Michael Lazos Date: Fri, 12 Sep 2025 07:15:10 +0000 Subject: [PATCH 165/693] [Dynamo] Don't guard data ptrs by default with mark_static_address (#162208) Fixes https://github.com/pytorch/pytorch/issues/156377 Since we now re-record cudagraphs, it's not necessary to guard by default anymore and induce a full recompile. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162208 Approved by: https://github.com/anijain2305 --- test/inductor/test_compiled_optimizers.py | 2 +- torch/_dynamo/decorators.py | 11 ++++++----- torch/_dynamo/variables/optimizer.py | 8 ++++---- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/test/inductor/test_compiled_optimizers.py b/test/inductor/test_compiled_optimizers.py index c313348e93346..4c3d394b3e9f6 100644 --- a/test/inductor/test_compiled_optimizers.py +++ b/test/inductor/test_compiled_optimizers.py @@ -899,7 +899,7 @@ def test_get_value_on_static_address(self): compiled = torch.compile(_get_value) x = torch.ones(2, 2) - mark_static_address(x) + mark_static_address(x, guard=True) ret_val = compiled(x) diff --git a/torch/_dynamo/decorators.py b/torch/_dynamo/decorators.py index 8143a31608d57..52ea79dc30a48 100644 --- a/torch/_dynamo/decorators.py +++ b/torch/_dynamo/decorators.py @@ -752,12 +752,13 @@ def mark_static( @forbid_in_graph -def mark_static_address(t: Any, guard: bool = True) -> None: +def mark_static_address(t: Any, guard: bool = False) -> None: """ - Marks an input tensor whose data_ptr will not change across multiple calls - to a dynamo-compiled function. This indicates to cudagraphs that an extra allocation - is not needed for this input. The data_ptr will be guarded if guard=True. Note: - Tensors marked in this way will be kept alive until `torch._dynamo.reset()` is called. + Marks an input tensor whose address should be treated as constant across calls to the + same dynamo-compiled function. This indicates to cudagraphs that an extra allocation + is not needed for this input. The data_ptr will be guarded if guard=True, and cause a full + recompile if the data_ptr changes. Note: If this address changes, cudagraphs will re-record + if guard=False. """ if not isinstance(t, torch.Tensor): raise TypeError(f"mark_static_address expects a tensor but received {type(t)}") diff --git a/torch/_dynamo/variables/optimizer.py b/torch/_dynamo/variables/optimizer.py index 499c956843beb..776f7f34d9c37 100644 --- a/torch/_dynamo/variables/optimizer.py +++ b/torch/_dynamo/variables/optimizer.py @@ -147,7 +147,7 @@ def var_getattr(self, tx: "InstructionTranslator", name): for group in self.value.param_groups: for p in group["params"]: - mark_static_address(p) + mark_static_address(p, guard=True) self._set_capturable(tx) @@ -240,7 +240,7 @@ def map_sources_and_install_guards(self, tx): self.tensor_to_source = {} def mark_static(x): - mark_static_address(x) + mark_static_address(x, guard=True) tree_map_only(torch.Tensor, mark_static, self.value.state) @@ -348,14 +348,14 @@ def wrap_tensor(self, tx: "InstructionTranslator", tensor_value): if tensor_value in self.tensor_to_source: # mark these tensors as static for cudagraphs - mark_static_address(tensor_value) + mark_static_address(tensor_value, guard=True) source = self.tensor_to_source[tensor_value] self.static_tensor_names.add(tx.output.module_key_name(source.name())) elif tensor_value in self.grad_to_source: source = self.grad_to_source[tensor_value] else: # mark these tensors as static for cudagraphs - mark_static_address(tensor_value) + mark_static_address(tensor_value, guard=True) global_name = tx.store_global_weakref_by_id(GLOBAL_KEY_PREFIX, tensor_value) source = GlobalWeakRefSource(global_name) From 636a5110846f7b1797c570bf15eb50ff055c5e8e Mon Sep 17 00:00:00 2001 From: Shangdi Yu Date: Fri, 12 Sep 2025 07:31:01 +0000 Subject: [PATCH 166/693] [aoti] add config for libtorch free so (#162655) Users can specify the following to get a libtorch_free `.so`. "aot_inductor.use_libtorch": False, The following config is only used for torchnative (see https://github.com/meta-pytorch/torchnative/pull/110). It's not intended to be used by executorch. The reason we need it for torchnative is because a lot of the symbol definitions in torchnative repo is only in header files. "aot_inductor.libtorch_free_header": "/data/users/shangdiy/torchnative/standalone,/data/users/shangdiy/torchnative/" (or their custom headers) The main motivating use case is for executorch to produce a libtorch free `.so`. TODO for follow-up PR: this flag should be consolidated with the `compile_standalone` flag. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162655 Approved by: https://github.com/angelayi --- test/inductor/test_aot_inductor.py | 58 ++++++++++++++++++++++++++++++ torch/_inductor/config.py | 11 ++++++ torch/_inductor/cpp_builder.py | 35 +++++++++++------- torch/utils/cpp_extension.py | 24 ++++++++----- 4 files changed, 107 insertions(+), 21 deletions(-) diff --git a/test/inductor/test_aot_inductor.py b/test/inductor/test_aot_inductor.py index 3fcf4332c2257..9e98b18c5067d 100644 --- a/test/inductor/test_aot_inductor.py +++ b/test/inductor/test_aot_inductor.py @@ -2,6 +2,8 @@ import itertools import logging import os +import pathlib +import subprocess import sys import tempfile import unittest @@ -7091,6 +7093,62 @@ def forward(self, x): "RAIIAtenTensorHandle buf0(buf0_handle_restrided);" ).run(code) + @unittest.skipIf(IS_MACOS, "might have no readelf on Mac") + def test_libtorch_free_so(self): + class Model(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + self.linear = torch.nn.Linear(10, 10) + + def forward(self, x, y): + return x + self.linear(y) + + example_inputs = ( + torch.randn(10, 10, device=self.device), + torch.randn(10, 10, device=self.device), + ) + + model = Model().to(self.device) + ep = torch.export.export(model, example_inputs) + + package_path = torch._inductor.aoti_compile_and_package( + ep, + inductor_configs={ + "aot_inductor.link_libtorch": False, + }, + ) + + torch_libs = { + "libtorch.so", + "libc10.so", + "libtorch_cuda.so", + "libc10_cuda.so", + "libtorch_cpu.so", + } + + with tempfile.TemporaryDirectory() as tmpdir: + # Unpack + with zipfile.ZipFile(package_path, "r") as zf: + zf.extractall(tmpdir) + + so_files = list(pathlib.Path(tmpdir).rglob("*.so")) + self.assertTrue(len(so_files) > 0) + + for so_file in so_files: + so_copy = pathlib.Path(tmpdir) / f"{so_file.name}.checkcopy" + so_copy.write_bytes(so_file.read_bytes()) + + result = subprocess.run( + ["readelf", "-d", str(so_copy)], + check=True, + capture_output=True, + text=True, + ) + for line in result.stdout.splitlines(): + if "NEEDED" in line: + for lib in torch_libs: + self.assertTrue(lib not in line) + class AOTInductorLoggingTest(LoggingTestCase): @make_logging_test(dynamic=logging.DEBUG) diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py index 32c915795967b..d08e8c0f02489 100644 --- a/torch/_inductor/config.py +++ b/torch/_inductor/config.py @@ -1580,6 +1580,17 @@ class aot_inductor: # Whether to enable link-time-optimization enable_lto = os.environ.get("AOT_INDUCTOR_ENABLE_LTO", "0") == "1" + # Whether the compiled .so should link to libtorch + # TODO: should consolidate this flag with compile_standalone + link_libtorch: bool = True + + # If None, the default torch headers such as torch/include + # will be used. Otherwise, the provided path will be used instead. + # This is needed for torchnative to load libtorch-free .so. + # Such as [f"{torchnative_dir}/standalone",f"{torchnative_dir}/",]. + # TODO: should consolidate this flag with compile_standalone + libtorch_free_headers: Optional[list[str]] = None + class cuda: """Settings for cuda backend, today this consists of cutlass""" diff --git a/torch/_inductor/cpp_builder.py b/torch/_inductor/cpp_builder.py index e2cb445ed1080..ec1a0cde20843 100644 --- a/torch/_inductor/cpp_builder.py +++ b/torch/_inductor/cpp_builder.py @@ -1086,13 +1086,17 @@ def _get_torch_related_args( ) -> tuple[list[str], list[str], list[str]]: from torch.utils.cpp_extension import include_paths, TORCH_LIB_PATH - include_dirs = include_paths() - libraries_dirs = [TORCH_LIB_PATH] libraries = [] - if sys.platform != "darwin" and not config.is_fbcode(): - libraries = ["torch", "torch_cpu"] - if not aot_mode: - libraries.append("torch_python") + include_dirs = config.aot_inductor.libtorch_free_headers or include_paths() + + if config.aot_inductor.link_libtorch: + libraries_dirs = [TORCH_LIB_PATH] + if sys.platform != "darwin" and not config.is_fbcode(): + libraries.extend(["torch", "torch_cpu"]) + if not aot_mode: + libraries.append("torch_python") + else: + libraries_dirs = [] if _IS_WINDOWS: libraries.append("sleef") @@ -1562,21 +1566,26 @@ def get_cpp_torch_device_options( _set_gpu_runtime_env() from torch.utils import cpp_extension - include_dirs = cpp_extension.include_paths(device_type) - libraries_dirs = cpp_extension.library_paths(device_type) - if not config.is_fbcode(): + include_dirs = cpp_extension.include_paths( + device_type, config.aot_inductor.link_libtorch is None + ) + link_libtorch = config.aot_inductor.link_libtorch + libraries_dirs = cpp_extension.library_paths( + device_type, torch_include_dirs=link_libtorch + ) + if not config.is_fbcode() and link_libtorch: libraries += ["c10"] if device_type == "cuda": definitions.append(" USE_ROCM" if torch.version.hip else " USE_CUDA") if torch.version.hip is not None: - if config.is_fbcode(): + if config.is_fbcode() or not link_libtorch: libraries += ["amdhip64"] else: libraries += ["c10_hip", "torch_hip"] definitions.append(" __HIP_PLATFORM_AMD__") else: - if config.is_fbcode(): + if config.is_fbcode() or not link_libtorch: libraries += ["cuda"] else: libraries += ["c10_cuda", "cuda", "torch_cuda"] @@ -1689,7 +1698,9 @@ def __init__( device_libraries, device_passthrough_args, ) = get_cpp_torch_device_options( - device_type=device_type, aot_mode=aot_mode, compile_only=compile_only + device_type=device_type, + aot_mode=aot_mode, + compile_only=compile_only, ) _append_list(self._definitions, device_definitions) _append_list(self._include_dirs, device_include_dirs) diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py index 902d2fe6ce0f5..eb04ae4a23566 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py @@ -1496,7 +1496,7 @@ def SyclExtension(name, sources, *args, **kwargs): return setuptools.Extension(name, sources, *args, **kwargs) -def include_paths(device_type: str = "cpu") -> list[str]: +def include_paths(device_type: str = "cpu", torch_include_dirs=True) -> list[str]: """ Get the include paths required to build a C++ or CUDA or SYCL extension. @@ -1505,12 +1505,14 @@ def include_paths(device_type: str = "cpu") -> list[str]: Returns: A list of include path strings. """ + paths = [] lib_include = os.path.join(_TORCH_PATH, 'include') - paths = [ - lib_include, - # Remove this once torch/torch.h is officially no longer supported for C++ extensions. - os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'), - ] + if torch_include_dirs: + paths.extend([ + lib_include, + # Remove this once torch/torch.h is officially no longer supported for C++ extensions. + os.path.join(lib_include, 'torch', 'csrc', 'api', 'include'), + ]) if device_type == "cuda" and IS_HIP_EXTENSION: paths.append(os.path.join(lib_include, 'THH')) paths.append(_join_rocm_home('include')) @@ -1533,7 +1535,7 @@ def include_paths(device_type: str = "cpu") -> list[str]: return paths -def library_paths(device_type: str = "cpu") -> list[str]: +def library_paths(device_type: str = "cpu", torch_include_dirs=True) -> list[str]: """ Get the library paths required to build a C++ or CUDA extension. @@ -1543,8 +1545,12 @@ def library_paths(device_type: str = "cpu") -> list[str]: Returns: A list of library path strings. """ - # We need to link against libtorch.so - paths = [TORCH_LIB_PATH] + + paths = [] + + if torch_include_dirs: + # We need to link against libtorch.so + paths.extend([TORCH_LIB_PATH]) if device_type == "cuda" and IS_HIP_EXTENSION: lib_dir = 'lib' From 195ac549d7d6538c4212ca73f69488e990b9527d Mon Sep 17 00:00:00 2001 From: fduwjj Date: Thu, 11 Sep 2025 10:47:52 -0700 Subject: [PATCH 167/693] [DeviceMesh] Make CuTe layout as mesh layout to be ready for using in DeviceMesh (#162414) We create a wrapper class acting as a layout for device mesh so that we can add new methods more specific to DeviceMesh and keep the core logic of CuTe manipulation inside pycute module. This PR create the main body of the code and then next PR will come with actual implementation and unit test for device mesh layout. (Actual implementation can be found in https://github.com/pytorch/pytorch/pull/161016) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162414 Approved by: https://github.com/ezyang ghstack dependencies: #162413, #162534 --- torch/distributed/_mesh_layout.py | 75 +++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 torch/distributed/_mesh_layout.py diff --git a/torch/distributed/_mesh_layout.py b/torch/distributed/_mesh_layout.py new file mode 100644 index 0000000000000..6ada8e388cce1 --- /dev/null +++ b/torch/distributed/_mesh_layout.py @@ -0,0 +1,75 @@ +""" +Definition of CuTe inspired Layouts for DeviceMesh internal bookkeeping and functions to manipulate them +""" + +import math +from collections.abc import Iterator +from dataclasses import dataclass +from typing import TypeAlias + +from torch.distributed._pycute import ( + coalesce, + complement, + composition, + flatten, + is_tuple, + Layout, +) + + +NestedIntTuple: TypeAlias = tuple["int | NestedIntTuple", ...] + + +@dataclass(frozen=True, init=True) +class _Layout(Layout): + shape: NestedIntTuple + stride: NestedIntTuple + + def __post_init__(self) -> None: + if not is_tuple(self.shape): + raise ValueError(f"shape must be a tuple, got {type(self.shape)}") + if not is_tuple(self.stride): + raise ValueError(f"stride must be a tuple, got {type(self.stride)}") + if len(flatten(self.shape)) != len(flatten(self.stride)): + raise ValueError( + f"sizes {len(flatten(self.shape))} and " + f"strides {len(flatten(self.stride))} must have the same length" + ) + + @property + def sizes(self) -> NestedIntTuple: + return self.shape + + @property + def strides(self) -> NestedIntTuple: + return self.stride + + @property + def sizes_and_strides(self) -> Iterator[tuple[int, int]]: + return zip(flatten(self.shape), flatten(self.stride)) # type: ignore[arg-type] + + def numel(self) -> int: + return math.prod(flatten(self.shape)) + + # operator [] (get-i like tuples) + def __getitem__(self, i: int) -> "_Layout": + size = self.sizes[i] + stride = self.strides[i] + if is_tuple(size) and is_tuple(stride): + return _Layout(size, stride) # type: ignore[arg-type] + elif isinstance(size, int) and isinstance(stride, int): + return _Layout((size,), (stride,)) + else: + raise ValueError("size and stride must be either int or tuple") + + def coalesce(self) -> "_Layout": + layout = coalesce(self) + return _Layout(layout.shape, layout.stride) # type: ignore[arg-type] + + def composition(self, layout: "_Layout") -> "_Layout": + result = composition(self, layout) + return _Layout(result.shape, result.stride) # type: ignore[arg-type] + + def complement(self, max_idx: int) -> "_Layout": + layout = complement(self, max_idx) + return _Layout(layout.shape, layout.stride) # type: ignore[arg-type] From 179f10621b418427fc6e92f58ea2b0bbe4cc9c52 Mon Sep 17 00:00:00 2001 From: "Liao, Wei" Date: Fri, 12 Sep 2025 07:57:29 +0000 Subject: [PATCH 168/693] port some distributed tensor test files for Intel GPU (#161703) it's another pr to port distributed tensor test for Intel GPU, while the other pr is https://github.com/pytorch/pytorch/pull/161604 We could enable Intel GPU with following methods and try the best to keep the original code styles: Use torch.accelerator for general gpu Skip the case if running on xpu which has known issues Pull Request resolved: https://github.com/pytorch/pytorch/pull/161703 Approved by: https://github.com/guangyey, https://github.com/d4l3k --- .../tensor/debug/test_comm_mode.py | 11 +++---- test/distributed/tensor/test_dtensor.py | 14 ++++----- .../tensor/test_dtensor_compile.py | 30 ++++++++++--------- test/distributed/tensor/test_redistribute.py | 4 +-- test/distributed/tensor/test_tensor_ops.py | 2 +- .../distributed/_tensor/common_dtensor.py | 2 +- 6 files changed, 32 insertions(+), 31 deletions(-) diff --git a/test/distributed/tensor/debug/test_comm_mode.py b/test/distributed/tensor/debug/test_comm_mode.py index c87164750c684..ca2d6b7c77ac1 100644 --- a/test/distributed/tensor/debug/test_comm_mode.py +++ b/test/distributed/tensor/debug/test_comm_mode.py @@ -6,7 +6,7 @@ import torch.nn as nn from torch.distributed.tensor import DeviceMesh, DTensor, Shard from torch.distributed.tensor.debug import CommDebugMode -from torch.testing._internal.common_distributed import requires_nccl +from torch.testing._internal.common_distributed import requires_accelerator_dist_backend from torch.testing._internal.common_utils import run_tests, TestCase from torch.testing._internal.distributed._tensor.common_dtensor import MLPModule from torch.testing._internal.distributed.fake_pg import FakeStore @@ -14,6 +14,7 @@ c10d_functional = torch.ops.c10d_functional c10d_ops = torch.ops.c10d +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" class TestCommMode(TestCase): @@ -28,7 +29,7 @@ def setUp(self): dist.init_process_group( backend="fake", rank=1, world_size=self.world_size, store=store ) - self.device_type = "cuda" if torch.cuda.is_available() else "cpu" + self.device_type = device_type self.world_pg = dist.distributed_c10d._get_default_group() def checksAssert(self, comm_mode, key, expected_value, expected_total_value): @@ -111,12 +112,12 @@ def f(x, y): self.assertEqual(comm_counts[c10d_functional.all_gather_into_tensor], 1) self.assertEqual(comm_counts[c10d_functional.reduce_scatter_tensor], 0) - @requires_nccl() + @requires_accelerator_dist_backend(["nccl", "xccl"]) def test_comm_mode_with_c10d(self): - if not torch.cuda.is_available(): + if not torch.accelerator.is_available(): return - inp = torch.rand(2, 8, 16).cuda() + inp = torch.rand(2, 8, 16).to(device_type) all_gather_out = inp.new_empty(self.world_size * 2, 8, 16) comm_mode = CommDebugMode() diff --git a/test/distributed/tensor/test_dtensor.py b/test/distributed/tensor/test_dtensor.py index 083f6d459c7e0..19225864e67d6 100644 --- a/test/distributed/tensor/test_dtensor.py +++ b/test/distributed/tensor/test_dtensor.py @@ -616,11 +616,11 @@ def sub_mesh_assert_equal(self, mesh, exp_in_mesh, exp_out_of_mesh, tensor): @with_comms def test_dtensor_device_mesh_device_conversion(self): - # construct a cuda device mesh + # construct a gpu device mesh mesh = self.build_device_mesh() - # construct from a cpu local tensor with cuda device mesh - # should automatically convert the dist tensor to cuda + # construct from a cpu local tensor with gpu device mesh + # should automatically convert the dist tensor to gpu placements = [Shard(0)] local_tensor = torch.randn(3, 3) dist_tensor = DTensor.from_local(local_tensor, mesh, placements) @@ -669,7 +669,7 @@ def test_dtensor_api_device_mesh_context_manager(self): @with_comms def test_dtensor_2d_mesh(self): mesh_tensor = torch.arange(self.world_size).reshape(2, 4) - # construct a cuda device mesh + # construct a gpu device mesh mesh = DeviceMesh(self.device_type, mesh_tensor) # construct a dist tensor on 2d device mesh and test if works @@ -691,7 +691,7 @@ def test_dtensor_2d_mesh(self): @with_comms def test_device_mesh_nd(self): - # construct a cuda device mesh + # construct a gpu device mesh mesh_tensor = torch.arange(self.world_size).reshape(2, 2, 2) mesh = DeviceMesh(self.device_type, mesh_tensor) # construct a dist tensor on 3d device mesh and test if works @@ -953,8 +953,8 @@ def _create_tensor(self, size): # Keep everything deterministic. torch.manual_seed(0) tensor = torch.rand(size) - if self.device_type == "cuda": - return tensor.cuda() + if self.device_type != "cpu": + return tensor.to(self.device_type) else: return tensor diff --git a/test/distributed/tensor/test_dtensor_compile.py b/test/distributed/tensor/test_dtensor_compile.py index fa4c217716b2c..4a94f50362349 100644 --- a/test/distributed/tensor/test_dtensor_compile.py +++ b/test/distributed/tensor/test_dtensor_compile.py @@ -39,6 +39,7 @@ RowwiseParallel, ) from torch.distributed.tensor.placement_types import _StridedShard +from torch.testing._internal.common_device_type import skipXPUIf from torch.testing._internal.common_distributed import skip_if_lt_x_gpu from torch.testing._internal.common_fsdp import get_devtype from torch.testing._internal.common_utils import ( @@ -47,8 +48,6 @@ run_tests, skipIfHpu, skipIfTorchDynamo, - TEST_CUDA, - TEST_HPU, ) from torch.testing._internal.distributed._tensor.common_dtensor import ( DTensorTestBase, @@ -95,6 +94,8 @@ def extract_graph(fx_g, _, graph_cell): partition_fn=min_cut_rematerialization_partition, ) +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + def _apply_sharding(mod: nn.Module, shard_dim: int, device_mesh: DeviceMesh): """ @@ -141,7 +142,7 @@ def tearDown(self): @property def device_type(self) -> str: - return "cuda" if TEST_CUDA else "hpu" if TEST_HPU else "cpu" + return device_type @property def world_size(self) -> int: @@ -160,9 +161,9 @@ def fn(x): res = fn(x) res.to_local().sum().backward() - @unittest.skipIf(not TEST_CUDA, "CUDA not available") + @unittest.skipIf(not torch.accelerator.is_available(), "accelerator not available") def test_dtensor_basic_export(self): - mesh = DeviceMesh("cuda", torch.arange(self.world_size)) + mesh = DeviceMesh(self.device_type, torch.arange(self.world_size)) param = torch.randn(4, 4) param_x = DTensor.from_local(param, mesh, [Shard(0)], run_check=False) @@ -188,10 +189,10 @@ def forward(self, x): ) self.assertExpectedInline( str(ep.graph_module.code).strip(), - """\ + f"""\ def forward(self, b_buffer, x): _assert_tensor_metadata_default = torch.ops.aten._assert_tensor_metadata.default(x, dtype = torch.float64, device = device(type='cpu'), layout = torch.strided); _assert_tensor_metadata_default = None - to = torch.ops.aten.to.dtype_layout(x, dtype = torch.float64, layout = torch.strided, device = device(type='cuda')); x = None + to = torch.ops.aten.to.dtype_layout(x, dtype = torch.float64, layout = torch.strided, device = device(type='{self.device_type}')); x = None view_as = torch.ops.aten.view_as.default(to, to); to = None dtensor___init__0 = self.dtensor___init__0 dtensor_const_func_spec0 = self.dtensor_const_func_spec0 @@ -206,10 +207,10 @@ def forward(self, b_buffer, x): # add is performed in _propagate_tensor_meta_non_cached, hence add_1 instead of add self.assertExpectedInline( str(ep.run_decompositions({}).graph_module.code).strip(), - """\ + f"""\ def forward(self, b_parametrizations_buffer_original0, x): _assert_tensor_metadata = torch.ops.aten._assert_tensor_metadata.default(x, None, None, torch.float64, device = device(type='cpu'), layout = torch.strided); _assert_tensor_metadata = None - _to_copy = torch.ops.aten._to_copy.default(x, dtype = torch.float64, layout = torch.strided, device = device(type='cuda', index=0)); x = None + _to_copy = torch.ops.aten._to_copy.default(x, dtype = torch.float64, layout = torch.strided, device = device(type='{self.device_type}', index=0)); x = None view = torch.ops.aten.view.default(_to_copy, [4, 4]); _to_copy = None add_1 = torch.ops.aten.add.Tensor(b_parametrizations_buffer_original0, view); b_parametrizations_buffer_original0 = view = None view_1 = torch.ops.aten.view.default(add_1, [4, 4]); add_1 = None @@ -339,6 +340,7 @@ def fn(x): self.assertEqual(res, ref) @skipIfHpu + @skipXPUIf(True, "https://github.com/intel/torch-xpu-ops/issues/1981") def test_dtensor_dynamic_loss_parallel_log_softmax(self): mesh = DeviceMesh(self.device_type, torch.arange(self.world_size)) @@ -714,13 +716,13 @@ def fn(x, y, z): out = layer_norm.permute(0, 2, 1) return out - x = torch.randn(4, 2, 4, requires_grad=True, device="cuda") + x = torch.randn(4, 2, 4, requires_grad=True, device=self.device_type) x_dt = DTensor.from_local(x, mesh, [Shard(1)], run_check=False) - y = torch.randn(4, requires_grad=True, device="cuda") + y = torch.randn(4, requires_grad=True, device=self.device_type) y_dt = DTensor.from_local(y, mesh, [Replicate()], run_check=False) - z = torch.randn(4, requires_grad=True, device="cuda") + z = torch.randn(4, requires_grad=True, device=self.device_type) z_dt = DTensor.from_local(z, mesh, [Replicate()], run_check=False) opt_fn = torch.compile(fn, backend="inductor", fullgraph=True) @@ -818,7 +820,7 @@ def test_dtensor_dynamo_device_mesh_attrs(self): # pass in tensor as inputs/outputs, create DTensor and run redistribute # (allgather collective) inside the fn def fn(x_dt): - if x_dt.device_mesh.device_type == "cuda": + if x_dt.device_mesh.device_type == f"{self.device_type}": return x_dt + 1 else: return x_dt + 2 @@ -947,7 +949,7 @@ def forward(self, input): model = FakeTransformer().to(self.device_type) - tp_mesh = init_device_mesh("cuda", (2,), mesh_dim_names=("tp",)) + tp_mesh = init_device_mesh(self.device_type, (2,), mesh_dim_names=("tp",)) # apply sequence parallel parallel_plan = { diff --git a/test/distributed/tensor/test_redistribute.py b/test/distributed/tensor/test_redistribute.py index fe07b0dd6a241..b385b92f960e6 100644 --- a/test/distributed/tensor/test_redistribute.py +++ b/test/distributed/tensor/test_redistribute.py @@ -19,8 +19,6 @@ instantiate_parametrized_tests, parametrize, run_tests, - TEST_CUDA, - TEST_HPU, ) from torch.testing._internal.distributed._tensor.common_dtensor import ( DTensorTestBase, @@ -519,7 +517,7 @@ def test_redistribute_shard_dim_change(self, dtype): local_out_dt = out_dt.to_local() local_expected_dt = expected_dt.to_local() self.assertEqual(out_dt.to_local(), expected_dt.to_local()) - if TEST_HPU or TEST_CUDA: + if torch.accelerator.is_available(): self.assertEqual( comm_mode.get_comm_counts()[ torch.ops._dtensor.shard_dim_alltoall diff --git a/test/distributed/tensor/test_tensor_ops.py b/test/distributed/tensor/test_tensor_ops.py index eaa1969068c1f..1e117353da28d 100644 --- a/test/distributed/tensor/test_tensor_ops.py +++ b/test/distributed/tensor/test_tensor_ops.py @@ -295,8 +295,8 @@ def test_zeros_like(self): self.assertEqual(dist_tensor.dtype, torch.float32) self.assertEqual(zeros_like_dt.dtype, torch.bfloat16) - @with_comms @skip_if_lt_x_gpu(4) + @with_comms def test_stack(self): mesh_2d = DeviceMesh( self.device_type, torch.arange(self.world_size).reshape(2, 2) diff --git a/torch/testing/_internal/distributed/_tensor/common_dtensor.py b/torch/testing/_internal/distributed/_tensor/common_dtensor.py index e25e08fbf5090..92a232fd9b0db 100644 --- a/torch/testing/_internal/distributed/_tensor/common_dtensor.py +++ b/torch/testing/_internal/distributed/_tensor/common_dtensor.py @@ -367,7 +367,7 @@ def device_type(self) -> str: @property def backend(self) -> str: - backend = dist.get_default_backend_for_device(DEVICE_TYPE) + backend = dist.get_default_backend_for_device(self.device_type) return backend def build_device_mesh(self) -> DeviceMesh: From 52af91e4c14e490c44944c3a210e618a9a69edf6 Mon Sep 17 00:00:00 2001 From: Aaryaman Vasishta Date: Fri, 12 Sep 2025 08:10:04 +0000 Subject: [PATCH 169/693] [ROCm/Windows] Support load_inline on windows (#162577) Supports `torch.utils.cpp_extension.load_inline` on Windows with ROCm. Tested on Windows with gfx1201. Note that it currently only works when CC and CXX are set to `clang-cl`. This is also needed when building extensions via. `setuptools` due to linker errors when using `cl` directly. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162577 Approved by: https://github.com/ezyang --- torch/utils/cpp_extension.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py index eb04ae4a23566..43caf5782fee5 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py @@ -2317,10 +2317,10 @@ def _prepare_ldflags(extra_ldflags, with_cuda, verbose, is_standalone): extra_ldflags.append('c10.lib') if with_cuda: - extra_ldflags.append('c10_cuda.lib') + extra_ldflags.append('c10_hip.lib' if IS_HIP_EXTENSION else 'c10_cuda.lib') extra_ldflags.append('torch_cpu.lib') if with_cuda: - extra_ldflags.append('torch_cuda.lib') + extra_ldflags.append('torch_hip.lib' if IS_HIP_EXTENSION else 'torch_cuda.lib') # /INCLUDE is used to ensure torch_cuda is linked against in a project that relies on it. # Related issue: https://github.com/pytorch/pytorch/issues/31611 extra_ldflags.append('-INCLUDE:?warp_size@cuda@at@@YAHXZ') @@ -2348,7 +2348,7 @@ def _prepare_ldflags(extra_ldflags, with_cuda, verbose, is_standalone): if with_cuda: if verbose: logger.info('Detected CUDA files, patching ldflags') - if IS_WINDOWS: + if IS_WINDOWS and not IS_HIP_EXTENSION: extra_ldflags.append(f'/LIBPATH:{_join_cuda_home("lib", "x64")}') extra_ldflags.append('cudart.lib') if CUDNN_HOME is not None: @@ -2365,8 +2365,12 @@ def _prepare_ldflags(extra_ldflags, with_cuda, verbose, is_standalone): if CUDNN_HOME is not None: extra_ldflags.append(f'-L{os.path.join(CUDNN_HOME, "lib64")}') elif IS_HIP_EXTENSION: - extra_ldflags.append(f'-L{_join_rocm_home("lib")}') - extra_ldflags.append('-lamdhip64') + if IS_WINDOWS: + extra_ldflags.append(f'/LIBPATH:{_join_rocm_home("lib")}') + extra_ldflags.append('amdhip64.lib') + else: + extra_ldflags.append(f'-L{_join_rocm_home("lib")}') + extra_ldflags.append('-lamdhip64') return extra_ldflags @@ -2693,16 +2697,20 @@ def _write_ninja_file_to_build_library(path, common_cflags += [f'-isystem {shlex.quote(include)}' for include in system_includes] if IS_WINDOWS: + COMMON_HIP_FLAGS.extend(['-fms-runtime-lib=dll']) cflags = common_cflags + ['/std:c++17'] + extra_cflags - cflags += COMMON_HIP_FLAGS if IS_HIP_EXTENSION else COMMON_MSVC_FLAGS + cflags += COMMON_MSVC_FLAGS + (COMMON_HIP_FLAGS if IS_HIP_EXTENSION else []) cflags = _nt_quote_args(cflags) else: cflags = common_cflags + ['-fPIC', '-std=c++17'] + extra_cflags if with_cuda and IS_HIP_EXTENSION: - cuda_flags = ['-DWITH_HIP'] + cflags + COMMON_HIP_FLAGS + COMMON_HIPCC_FLAGS + cuda_flags = ['-DWITH_HIP'] + common_cflags + extra_cflags + COMMON_HIP_FLAGS + COMMON_HIPCC_FLAGS + cuda_flags = cuda_flags + ['-std=c++17'] cuda_flags += _get_rocm_arch_flags(cuda_flags) cuda_flags += extra_cuda_cflags + if IS_WINDOWS: + cuda_flags = _nt_quote_args(cuda_flags) elif with_cuda: cuda_flags = common_cflags + COMMON_NVCC_FLAGS + _get_cuda_arch_flags(extra_cuda_cflags) if IS_WINDOWS: From 972e409829343cc2062aeee0994a9c1c735d216a Mon Sep 17 00:00:00 2001 From: Yuanyuan Chen Date: Fri, 12 Sep 2025 08:31:51 +0000 Subject: [PATCH 170/693] [Reland] Use std::string_view in torchgen (#158625) Reland of #157050, which is incidentally closed. Pull Request resolved: https://github.com/pytorch/pytorch/pull/158625 Approved by: https://github.com/albanD --- test/test_overrides.py | 2 -- tools/autograd/load_derivatives.py | 2 +- torch/csrc/utils/python_arg_parser.cpp | 1 - torchgen/api/python.py | 2 +- torchgen/api/types/types.py | 2 +- torchgen/api/types/types_base.py | 2 ++ torchgen/dest/lazy_ir.py | 6 +++--- torchgen/static_runtime/generator.py | 3 +-- 8 files changed, 9 insertions(+), 11 deletions(-) diff --git a/test/test_overrides.py b/test/test_overrides.py index 8454677856d0f..2a4f244bad11a 100644 --- a/test/test_overrides.py +++ b/test/test_overrides.py @@ -940,8 +940,6 @@ def _simple_type_parser(func, arg_name, arg_type): return None elif arg_type == "ScalarType": return torch.float32 - elif arg_type == "c10::string_view": - return "" elif arg_type in ("std::string_view", "::std::string_view"): return "" elif arg_type == "SymInt": diff --git a/tools/autograd/load_derivatives.py b/tools/autograd/load_derivatives.py index f61226f25fb90..28fddf0fb8513 100644 --- a/tools/autograd/load_derivatives.py +++ b/tools/autograd/load_derivatives.py @@ -969,7 +969,7 @@ def repl(m: re.Match[str]) -> str: if nctype.type == OptionalCType(BaseCType(stringT)): formula = re.sub( rf"\b{name}\b", - f"{name}.has_value() ? std::optional({name}.value()) : std::nullopt", + f"{name}.has_value() ? std::optional<::std::string_view>({name}.value()) : std::nullopt", formula, ) diff --git a/torch/csrc/utils/python_arg_parser.cpp b/torch/csrc/utils/python_arg_parser.cpp index 613657e03b926..d801c7f730b01 100644 --- a/torch/csrc/utils/python_arg_parser.cpp +++ b/torch/csrc/utils/python_arg_parser.cpp @@ -46,7 +46,6 @@ static std::unordered_map type_map = { {"DeviceIndex", ParameterType::INT64}, {"Stream", ParameterType::STREAM}, {"std::string", ParameterType::STRING}, - {"c10::string_view", ParameterType::STRING}, {"std::string_view", ParameterType::STRING}, {"::std::string_view", ParameterType::STRING}, {"Dimname", ParameterType::DIMNAME}, diff --git a/torchgen/api/python.py b/torchgen/api/python.py index dbfa730601630..0c5b9ad5e7b4d 100644 --- a/torchgen/api/python.py +++ b/torchgen/api/python.py @@ -683,7 +683,7 @@ def argument_type_str( elif t.name == BaseTy.float: return "double" elif t.name == BaseTy.str: - return "c10::string_view" + return "std::string_view" elif t.name in [ BaseTy.Tensor, BaseTy.bool, diff --git a/torchgen/api/types/types.py b/torchgen/api/types/types.py index 41c05653fffdf..97724384c2a20 100644 --- a/torchgen/api/types/types.py +++ b/torchgen/api/types/types.py @@ -52,7 +52,7 @@ float8_e4m3fnT = BaseCppType("at", "Float8_e4m3fn") float8_e4m3fnuzT = BaseCppType("at", "Float8_e4m3fnuz") float8_e8m0fnuT = BaseCppType("at", "Float8_e8m0fnu") -stringT = BaseCppType("c10", "string_view") +stringT = BaseCppType("std", "string_view") generatorT = BaseCppType("at", "Generator") scalarTypeT = BaseCppType("at", "ScalarType") tensorT = BaseCppType("at", "Tensor") diff --git a/torchgen/api/types/types_base.py b/torchgen/api/types/types_base.py index 08085fa0fa2bf..2288ebce71835 100644 --- a/torchgen/api/types/types_base.py +++ b/torchgen/api/types/types_base.py @@ -81,6 +81,8 @@ class BaseCType(CType): type: BaseCppType def cpp_type(self, *, strip_ref: bool = False) -> str: + if self.type.ns == "std": + return "::" + str(self.type) return str(self.type) def remove_const_ref(self) -> CType: diff --git a/torchgen/dest/lazy_ir.py b/torchgen/dest/lazy_ir.py index b912b8f2427f8..6231a36d5d460 100644 --- a/torchgen/dest/lazy_ir.py +++ b/torchgen/dest/lazy_ir.py @@ -256,7 +256,7 @@ def gen(self, schema: LazyIrSchema) -> list[str]: [ # This code is just special casing the mapping from string_view -> strings f"{a.name}({a.name}.has_value() ? ::std::make_optional(std::string(*{a.name})) : ::std::nullopt)" - if a.lazy_type.cpp_type() == "::std::optional" + if a.lazy_type.cpp_type() == "::std::optional<::std::string_view>" else f"{a.name}({a.name})" for a in scalar_args ] @@ -266,9 +266,9 @@ def gen(self, schema: LazyIrSchema) -> list[str]: scalar_decls = "\n ".join( [ f"std::string {a.name};" - if a.lazy_type.cpp_type() == "c10::string_view" + if a.lazy_type.cpp_type() == "::std::string_view" else f"::std::optional {a.name};" - if a.lazy_type.cpp_type() == "::std::optional" + if a.lazy_type.cpp_type() == "::std::optional<::std::string_view>" else f"{a.lazy_type.cpp_type()} {a.name};" for a in scalar_args ] diff --git a/torchgen/static_runtime/generator.py b/torchgen/static_runtime/generator.py index 8ad2fd3c45889..a9814bd4dee1f 100644 --- a/torchgen/static_runtime/generator.py +++ b/torchgen/static_runtime/generator.py @@ -323,8 +323,7 @@ def ivalue_type_conversion_method( ), BaseTy.str: ( (False, "toStringView()"), - (False, "toOptional()"), - (False, "toOptional<::std::string_view>()"), + (False, "toOptional()"), ), } From 66c0f14eccbc8a170394caf6230091ddcb95e5c3 Mon Sep 17 00:00:00 2001 From: Miroslaw Oksiucik Date: Fri, 12 Sep 2025 08:32:00 +0000 Subject: [PATCH 171/693] Support XPU in --nproc-per-node option to torchrun (#159474) Support both --nproc-per-node=xpu and autodetection of XPU device in case of --nproc-per-node=auto Pull Request resolved: https://github.com/pytorch/pytorch/pull/159474 Approved by: https://github.com/tsocha, https://github.com/guangyey, https://github.com/d4l3k Co-authored-by: Yu, Guangye <106960996+guangyey@users.noreply.github.com> --- test/distributed/launcher/test_run.py | 21 ++++++++++++++++++++- torch/distributed/run.py | 21 +++++++++++---------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/test/distributed/launcher/test_run.py b/test/distributed/launcher/test_run.py index f71bffd527c1e..d271e60954ae7 100644 --- a/test/distributed/launcher/test_run.py +++ b/test/distributed/launcher/test_run.py @@ -273,10 +273,29 @@ def test_nproc_launch_unknown_configurations(self): ) @patch("torch.cuda.is_available", return_value=True) @patch("torch.cuda.device_count", return_value=3) - def test_nproc_gpu_launch_configurations(self, _mock1, _mock2): + @patch("torch.accelerator.is_available", return_value=True) + @patch("torch.accelerator.device_count", return_value=3) + @patch("torch.accelerator.current_accelerator", return_value=MagicMock(type="gpu")) + def test_nproc_gpu_launch_configurations( + self, _mock1, _mock2, _mock3, _mock4, _mock5 + ): self._test_nproc_launch_configuration("auto", 3) self._test_nproc_launch_configuration("gpu", 3) + @skip_but_pass_in_sandcastle_if( + TEST_WITH_DEV_DBG_ASAN, "test incompatible with dev/dbg asan" + ) + @patch("torch.xpu.is_available", return_value=True) + @patch("torch.xpu.device_count", return_value=3) + @patch("torch.accelerator.is_available", return_value=True) + @patch("torch.accelerator.device_count", return_value=3) + @patch("torch.accelerator.current_accelerator", return_value=MagicMock(type="xpu")) + def test_nproc_xpu_launch_configurations( + self, _mock1, _mock2, _mock3, _mock4, _mock5 + ): + self._test_nproc_launch_configuration("auto", 3) + self._test_nproc_launch_configuration("xpu", 3) + @skip_but_pass_in_sandcastle_if( TEST_WITH_DEV_DBG_ASAN, "test incompatible with dev/dbg asan" ) diff --git a/torch/distributed/run.py b/torch/distributed/run.py index d7bedb4335c24..ba8fd3dc69e58 100644 --- a/torch/distributed/run.py +++ b/torch/distributed/run.py @@ -77,7 +77,9 @@ .. note:: ``--nproc-per-node`` may be ``"gpu"`` (spawn one process per GPU), ``"cpu"`` (spawn one process per CPU), + ``"xpu"`` (spawn one process per XPU), ``"auto"`` (equivalent to ``"gpu"`` if CUDA is available, + else equivalent to ``"xpu"`` if XPU is available, else equivalent to ``"cpu"``), or an integer specifying the number of processes. See `torch.distributed.run.determine_local_world_size @@ -413,7 +415,7 @@ def get_args_parser() -> ArgumentParser: action=env, type=str, default="1", - help="Number of workers per node; supported values: [auto, cpu, gpu, int].", + help="Number of workers per node; supported values: [auto, cpu, gpu, xpu, int].", ) # @@ -705,21 +707,20 @@ def determine_local_world_size(nproc_per_node: str): raise ValueError("Cuda is not available.") from e device_type = "gpu" num_proc = torch.cuda.device_count() + elif nproc_per_node == "xpu": + if not torch.xpu.is_available(): + raise ValueError("Xpu is not available.") from e + device_type = "xpu" + num_proc = torch.xpu.device_count() elif nproc_per_node == torch._C._get_privateuse1_backend_name(): if not _get_custom_mod_func("is_available")(): raise ValueError(f"{nproc_per_node} is not available.") from e device_type = nproc_per_node num_proc = _get_custom_mod_func("device_count")() elif nproc_per_node == "auto": - if torch.cuda.is_available(): - num_proc = torch.cuda.device_count() - device_type = "gpu" - elif ( - hasattr(torch, torch._C._get_privateuse1_backend_name()) - and _get_custom_mod_func("is_available")() - ): - num_proc = _get_custom_mod_func("device_count")() - device_type = torch._C._get_privateuse1_backend_name() + if torch.accelerator.is_available(): + num_proc = torch.accelerator.device_count() + device_type = torch.accelerator.current_accelerator().type # type: ignore[union-attr] else: num_proc = os.cpu_count() device_type = "cpu" From 98e9440f30b797aa990b59115435a5ec3245448f Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 12 Sep 2025 08:36:17 +0000 Subject: [PATCH 172/693] [1/N] Port 5 _composable/fsdp distributed test cases to Intel GPU (#159118) For https://github.com/pytorch/pytorch/issues/114850, we will port distributed tests to Intel GPU. We could enable Intel GPU with following methods and try the best to keep the original code styles: - use "torch.accelerator.current_accelerator()" to determine the accelerator backend - enabled XPU for some test path - skip some test cases which Intel GPU does not support Pull Request resolved: https://github.com/pytorch/pytorch/pull/159118 Approved by: https://github.com/guangyey, https://github.com/d4l3k --- .../fsdp/test_fully_shard_clip_grad_norm_.py | 3 ++- .../_composable/fsdp/test_fully_shard_comm.py | 10 +++++++++- .../_composable/fsdp/test_fully_shard_compile.py | 2 +- .../_composable/fsdp/test_fully_shard_memory.py | 12 +++++++++--- .../fsdp/test_fully_shard_mixed_precision.py | 2 +- .../_composable/fsdp/test_fully_shard_training.py | 12 ++++++++---- 6 files changed, 30 insertions(+), 11 deletions(-) diff --git a/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py b/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py index 6c7a16608e195..87e056c02e562 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py @@ -12,7 +12,7 @@ from torch.distributed.tensor.debug import CommDebugMode from torch.testing._internal.common_distributed import skip_if_lt_x_gpu from torch.testing._internal.common_fsdp import FSDPTest, get_devtype, MLPStack -from torch.testing._internal.common_utils import run_tests +from torch.testing._internal.common_utils import run_tests, TEST_XPU, xfailIf from torch.testing._internal.distributed._tensor.common_dtensor import ( ModelArgs, Transformer, @@ -123,6 +123,7 @@ def world_size(self) -> int: return min(torch.get_device_module(device_type).device_count(), 4) @skip_if_lt_x_gpu(4) + @xfailIf(TEST_XPU) # https://github.com/intel/torch-xpu-ops/issues/1661 def test_clip_grad_norm_2d(self): for norm_type in (2, 1, 3, float("inf")): dp_size = 2 diff --git a/test/distributed/_composable/fsdp/test_fully_shard_comm.py b/test/distributed/_composable/fsdp/test_fully_shard_comm.py index c52c1e539ff6d..5ae26ae9b9766 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_comm.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_comm.py @@ -5,6 +5,7 @@ import itertools import os import tempfile +import unittest from typing import Callable, Optional, Union from unittest.mock import MagicMock @@ -54,7 +55,7 @@ patch_reshard, patch_unshard, ) -from torch.testing._internal.common_utils import run_tests +from torch.testing._internal.common_utils import run_tests, TEST_XPU, xfailIf from torch.testing._internal.distributed._tensor.common_dtensor import ( ModelArgs, Transformer, @@ -414,6 +415,7 @@ def test_manual_reshard_with_reshard_after_forward_false(self): ) @skip_if_lt_x_gpu(2) + @xfailIf(TEST_XPU) # https://github.com/intel/torch-xpu-ops/issues/1571 def test_set_reduce_scatter_divide_factor(self): self.run_subtests( {"divide_factor": [self.world_size * 2, self.world_size]}, @@ -1454,6 +1456,9 @@ def _run(cls, *args, **kwargs): # Test reduce-scatter only on plain FSDP on 2 GPUs @skip_if_lt_x_gpu(2) + @unittest.skipIf( + TEST_XPU, "Related environment variable is not supported with XCCL" + ) def test_fully_shard_force_sum_reduce_scatter(self): torch.manual_seed(42) model_args = ModelArgs() @@ -1506,6 +1511,9 @@ def test_fully_shard_force_sum_reduce_scatter(self): # Test both reduce-scatter and all-reduce on HSDP (DDP+FSDP) on 4 GPUs @skip_if_lt_x_gpu(4) + @unittest.skipIf( + TEST_XPU, "Related environment variable is not supported with XCCL" + ) def test_fully_shard_force_sum_both_reductions(self): mesh = init_device_mesh( device_type.type, (2, self.world_size // 2), mesh_dim_names=("ddp", "fsdp") diff --git a/test/distributed/_composable/fsdp/test_fully_shard_compile.py b/test/distributed/_composable/fsdp/test_fully_shard_compile.py index b64d4107ee0ca..630e20a2540fe 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_compile.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_compile.py @@ -133,7 +133,7 @@ def skipTestForOldSm(self): device_type.type, self.rank % torch.get_device_module(device_type).device_count(), ) - if not sm_is_or_higher_than(device, 8, 0): + if device_type.type == "cuda" and not sm_is_or_higher_than(device, 8, 0): self.skipTest("bf16 requires sm >= 8.0") def test_dynamo_trace_use_training_state(self): diff --git a/test/distributed/_composable/fsdp/test_fully_shard_memory.py b/test/distributed/_composable/fsdp/test_fully_shard_memory.py index 44d05ade98f75..eda7468c833da 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_memory.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_memory.py @@ -8,7 +8,12 @@ from torch.distributed.fsdp import CPUOffloadPolicy, fully_shard, OffloadPolicy from torch.testing._internal.common_distributed import skip_if_lt_x_gpu from torch.testing._internal.common_fsdp import FSDPTest, get_devtype -from torch.testing._internal.common_utils import run_tests, TEST_CUDA, TEST_HPU +from torch.testing._internal.common_utils import ( + run_tests, + TEST_CUDA, + TEST_HPU, + TEST_XPU, +) from torch.testing._internal.distributed._tensor.common_dtensor import ( ModelArgs, Transformer, @@ -236,14 +241,15 @@ def test_fully_shard_del_memory(self): def _get_peak_active_memory_mb(self) -> int: mem_stats = torch.get_device_module(device_type).memory_stats() - if TEST_CUDA: + + if TEST_CUDA or TEST_XPU: return round(mem_stats["active_bytes.all.peak"] / 1e6) if TEST_HPU: return round(mem_stats["MaxInUse"] / 1e6) def _get_curr_active_memory_mb(self) -> int: mem_stats = torch.get_device_module(device_type).memory_stats() - if TEST_CUDA: + if TEST_CUDA or TEST_XPU: return round(mem_stats["active_bytes.all.current"] / 1e6) if TEST_HPU: return round(mem_stats["InUse"] / 1e6) diff --git a/test/distributed/_composable/fsdp/test_fully_shard_mixed_precision.py b/test/distributed/_composable/fsdp/test_fully_shard_mixed_precision.py index af25d4f35fd1e..212420c784516 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_mixed_precision.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_mixed_precision.py @@ -614,7 +614,7 @@ def forward(self, input: Input): torch.bfloat16, torch.bfloat16, torch.bfloat16, True ) model = Model() - inp = Input(torch.randn(2, 10).cuda()) + inp = Input(torch.randn(2, 10).to(device_type)) fully_shard(model, mp_policy=mp_policy) loss = model(inp).sum() diff --git a/test/distributed/_composable/fsdp/test_fully_shard_training.py b/test/distributed/_composable/fsdp/test_fully_shard_training.py index 3991fda639108..e7e262cb1d6a3 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_training.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_training.py @@ -42,7 +42,9 @@ get_cycles_per_ms, run_tests, TEST_HPU, + TEST_XPU, wrapSwapTensorsTest, + xfailIf, ) from torch.testing._internal.distributed._tensor.common_dtensor import ( ModelArgs, @@ -324,7 +326,7 @@ def _shard_placement_fn(param: nn.Parameter) -> Optional[Shard]: self.assertEqual(losses[0], losses[1]) @skip_if_lt_x_gpu(2) - @unittest.skipIf(TEST_HPU, "Sleep kernel not supported for HPU") + @unittest.skipIf(TEST_HPU or TEST_XPU, "Sleep kernel not supported for HPU/XPU") @compiled_fsdp_test(compile_compute_on_module=Transformer) def test_train_parity_multi_group(self): """ @@ -347,7 +349,7 @@ def test_train_parity_multi_group(self): ) @skip_if_lt_x_gpu(2) - @unittest.skipIf(TEST_HPU, "sleep kernel not supported on HPU") + @unittest.skipIf(TEST_HPU or TEST_XPU, "sleep kernel not supported on HPU/XPU") def test_train_parity_multi_group_cpu_offload_eager(self): """ Tests train parity against DDP when using multiple parameter groups for @@ -371,7 +373,7 @@ def test_train_parity_multi_group_cpu_offload_eager(self): ) @skip_if_lt_x_gpu(2) - @unittest.skipIf(TEST_HPU, "sleep kernel not supported on HPU") + @unittest.skipIf(TEST_HPU or TEST_XPU, "sleep kernel not supported on HPU/XPU") @compiled_fsdp_test(compile_compute_on_module=Transformer) def test_train_parity_multi_group_unshard_async_op(self): """ @@ -495,6 +497,7 @@ def delayed_reduce_scatter(*args, **kwargs): self.assertEqual(losses[0], losses[1]) @skip_if_lt_x_gpu(2) + @unittest.skipIf(TEST_XPU, "Sleep is not supported on XPU") def test_non_root_forward_backward(self): """ Tests running forward/backward through the root and then through a @@ -625,7 +628,7 @@ def test_explicit_prefetching(self): self.assertEqual(losses[0], losses[1]) @skip_if_lt_x_gpu(2) - @unittest.skipIf(TEST_HPU, "Sleep is not supported on HPU") + @unittest.skipIf(TEST_HPU or TEST_XPU, "Sleep is not supported on HPU/XPU") def test_post_optim_event(self): torch.manual_seed(42) model_args = ModelArgs(dropout_p=0.0) @@ -678,6 +681,7 @@ def world_size(self) -> int: @skip_if_lt_x_gpu(2) @compiled_fsdp_test(compile_compute_on_module=Transformer) + @xfailIf(TEST_XPU) # https://github.com/intel/torch-xpu-ops/issues/1661 def test_train_parity_with_activation_checkpointing(self): """ Tests train parity against DDP when composing with activation From a7bbc5fea73853c1133a2b9beb930485efd88912 Mon Sep 17 00:00:00 2001 From: Blaine Burton Rister <145300525+blaine-rister@users.noreply.github.com> Date: Fri, 12 Sep 2025 08:41:47 +0000 Subject: [PATCH 173/693] [Inductor-FX] Support ScatterFallback (#162686) # Problem Inductor has a `ScatterFallback` op with custom Python and C++ wrapper codegen macros. This is used in certain situations where the default Triton codegen doesn't apply, and especially for reductions which need to be deterministic. Since this op used direct Python/C++ codegen, it wasn't compatible with the FX backend. # Feature This PR refactors the associated wrapper codegen to support `ScatterFallback`. This follows the same basic steps that were used for other fallback ops including `MultiOutput` and `ExternKernel`: 1. Create a new wrapper IR op called `ScatterFallbackLine`. Move the logic in `ScatterFallback.cogeden` to `ScatterFallbackLine.codegen`, to prevent it from affecting the FX backend. This logic is unsafe for FX because it may generate Python or C++ strings with methods like `codegen_reference()`. 2. To eleminate the dependence on `V.graph`, move language-specific logic to the respective wrapper codegen subclasses. In this case, C++ codegen has some special logic, which is moved to `CppWrapperCpu`. 3. Create a new method in `FXWrapperCodegen` to handle `ScatterFallbackLine`. # Test plan Added a couple of CI tests for the FX backend with scatter fallbacks. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162686 Approved by: https://github.com/jansel --- test/inductor/test_fxir_backend.py | 45 +++++++++++++++++++ torch/_inductor/codegen/cpp_wrapper_cpu.py | 12 ++++- .../codegen/cpp_wrapper_cpu_array_ref.py | 10 +++-- torch/_inductor/codegen/wrapper.py | 32 ++++++++++++- torch/_inductor/codegen/wrapper_fxir.py | 21 +++++++++ torch/_inductor/ir.py | 23 +--------- 6 files changed, 116 insertions(+), 27 deletions(-) diff --git a/test/inductor/test_fxir_backend.py b/test/inductor/test_fxir_backend.py index a8c4030af3201..d2f274f2e412c 100644 --- a/test/inductor/test_fxir_backend.py +++ b/test/inductor/test_fxir_backend.py @@ -25,6 +25,7 @@ from torch._inductor.test_case import TestCase as InductorTestCase from torch.export import Dim from torch.testing._internal.common_utils import ( + DeterministicGuard, instantiate_parametrized_tests, parametrize, ) @@ -567,6 +568,50 @@ def compile_module(*inps): self.assertTrue(same(ref, result)) + def test_scatter_fallback_scalar_src(self): + """ + Test a special case where ScatterFallback takes a scalar 'src' argument. + """ + + def foo(input_): + dim = 0 + src = 1.5 + return torch.ops.aten.scatter(input_, dim, index, src) + + length = 8 + index = torch.randint(length, (length,), device=self.device) + input_ = torch.randn(length, device=self.device) + with DeterministicGuard(True): + (gm,) = self._compile_and_check( + foo, + (input_,), + ) + + # Check for the fallback op. + num_fallback = self._count_ops(gm, torch.ops.aten.scatter_.value) + self.assertEqual(num_fallback, 1) + + def test_scatter_reduce_fallback(self): + """ + Test the customized wrapper codegen for ScatterFallback ops. + """ + fallback_op = torch.ops.aten.scatter_reduce_.two + + def foo(out, index, src): + dim = 0 + out = fallback_op(out, dim, index, src, reduce="amax", include_self=False) + return out + 1 + + length = 8 + out, src = [torch.randn(length, device=self.device) for _ in range(2)] + index = torch.randint(length, (length,), device=self.device) + (gm,) = self._compile_and_check( + foo, (out, index, src), expected_num_triton_kernels=2 + ) + + # Check for the fallback. + self.assertEqual(self._count_ops(gm, fallback_op), 1) + @torch._inductor.config.patch("graph_partition", True) def test_subgraph_raises(self): """ diff --git a/torch/_inductor/codegen/cpp_wrapper_cpu.py b/torch/_inductor/codegen/cpp_wrapper_cpu.py index 83d1d0614674b..d9be6cc71eb61 100644 --- a/torch/_inductor/codegen/cpp_wrapper_cpu.py +++ b/torch/_inductor/codegen/cpp_wrapper_cpu.py @@ -1398,7 +1398,15 @@ def _generate_extern_kernel_out_helper( kernel, args, device, debug_handle=debug_handle ) - def generate_scatter_fallback( + def _get_scatter_reduce_enum(self, reduce): + # Follow aten/src/ATen/native/ReductionType.h:get_operator_enum + get_operator_enum = {"add": "sum", "multiply": "prod"} + if reduce in get_operator_enum: + reduce = get_operator_enum[reduce] + + return reduce + + def _generate_scatter_fallback( self, output, inputs, @@ -1408,6 +1416,8 @@ def generate_scatter_fallback( reduce, kwargs, ): + reduce = self._get_scatter_reduce_enum(reduce) + # call the ABI shim function instead of the ATen one cpp_kernel_name = self.get_c_shim_func_name(cpp_kernel_name, self.device) # TODO: consider remove "_out" and add missing inplace variants to fallback_ops.py diff --git a/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py b/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py index 63c5bc2debe8b..086a9bc37a6d5 100644 --- a/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py +++ b/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py @@ -694,7 +694,12 @@ def generate_c_shim_extern_kernel_call( kernel, wrapped_args, device, debug_args=args ) - def generate_scatter_fallback( + def generate_scatter_fallback(self, node: ir.ScatterFallback): + # No stack allocation when there is a fallback op + self.allow_stack_allocation = False + super().generate_scatter_fallback(node) + + def _generate_scatter_fallback( self, output, inputs, @@ -704,8 +709,7 @@ def generate_scatter_fallback( reduce, kwargs, ): - # No stack allocation when there is a fallback op - self.allow_stack_allocation = False + reduce = self._get_scatter_reduce_enum(reduce) # call the ABI shim function instead of the ATen one cpp_kernel_name = self.get_c_shim_func_name(cpp_kernel_name, self.device) diff --git a/torch/_inductor/codegen/wrapper.py b/torch/_inductor/codegen/wrapper.py index 53037195b0577..6154df7dccf31 100644 --- a/torch/_inductor/codegen/wrapper.py +++ b/torch/_inductor/codegen/wrapper.py @@ -908,6 +908,33 @@ def codegen_fx(self, converter: FxConverter) -> FxConversionFunc: return converter._generate_multi_output +@dataclasses.dataclass +class ScatterFallbackLine(WrapperLine): + wrapper: PythonWrapperCodegen + node: ir.ScatterFallback + + def codegen(self, code: IndentedBuffer) -> None: + node = self.node + assert ir.is_node_sequence(node.inputs) + if node.src_is_tensor: + (x, index, src) = (t.codegen_reference() for t in node.inputs) + else: + (x, index) = (t.codegen_reference() for t in node.inputs) + src = node.constant_args[1] + self.wrapper._generate_scatter_fallback( + x, + [x, node.constant_args[0], index, src], + node.cpp_kernel_name, + node.python_kernel_name, + node.src_is_tensor, + node.kwargs["reduce"], + node.codegen_kwargs(), + ) + + def codegen_fx(self, converter: FxConverter) -> FxConversionFunc: + return converter._generate_scatter_fallback + + @dataclasses.dataclass class SymbolicCallArgLine(WrapperLine): wrapper: PythonWrapperCodegen @@ -1511,7 +1538,10 @@ def generate_tma_descriptor(self, desc): line = f"{desc.name} = {call}{self.ending}" self.writeline(line) - def generate_scatter_fallback( + def generate_scatter_fallback(self, node: ir.ScatterFallback): + self.writeline(ScatterFallbackLine(self, node)) + + def _generate_scatter_fallback( self, output, inputs, diff --git a/torch/_inductor/codegen/wrapper_fxir.py b/torch/_inductor/codegen/wrapper_fxir.py index 29905b11f3b97..133e307096453 100644 --- a/torch/_inductor/codegen/wrapper_fxir.py +++ b/torch/_inductor/codegen/wrapper_fxir.py @@ -63,6 +63,7 @@ PythonWrapperCodegen, ReinterpretLine, ReuseLine, + ScatterFallbackLine, SymbolicCallArg, SymbolicCallArgLine, WrapperLine, @@ -653,6 +654,26 @@ def _generate_multi_output(self, line: WrapperLine) -> None: node.name = line.result_name self.buffer_to_node[line.result_name] = node + def _generate_scatter_fallback(self, line: WrapperLine) -> None: + assert isinstance(line, ScatterFallbackLine) + ir_node = line.node + assert ir.is_node_sequence(ir_node.inputs) + (x, index, src) = [self._generate_buffer(t) for t in ir_node.inputs] + ( + [] if ir_node.src_is_tensor else [ir_node.constant_args[1]] + ) + args = (x, ir_node.constant_args[0], index, src) + kwargs = {} + if reduce := ir_node.kwargs.get("reduce"): + kwargs["reduce"] = reduce + + fx_node = self.gm.graph.call_function( + ir_node.op_overload, # type: ignore[arg-type] + args=args, + kwargs=kwargs, + ) + result_buffer = ir_node.codegen_reference() + self.buffer_to_node[result_buffer] = fx_node + def _generate_null(self, line: WrapperLine) -> None: assert isinstance(line, NullLine) # Does nothing. diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index a380cd930fc9d..427ca54544a31 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -7060,28 +7060,7 @@ class ScatterFallback(ExternKernel): """ def codegen(self, wrapper: PythonWrapperCodegen) -> None: - reduce = self.kwargs["reduce"] - if V.graph.cpp_wrapper: - # Follow aten/src/ATen/native/ReductionType.h:get_operator_enum - get_operator_enum = {"add": "sum", "multiply": "prod"} - if reduce in get_operator_enum: - reduce = get_operator_enum[reduce] - - assert is_node_sequence(self.inputs) - if self.src_is_tensor: - (x, index, src) = (t.codegen_reference() for t in self.inputs) - else: - (x, index) = (t.codegen_reference() for t in self.inputs) - src = self.constant_args[1] - wrapper.generate_scatter_fallback( - x, - [x, self.constant_args[0], index, src], - self.cpp_kernel_name, - self.python_kernel_name, - self.src_is_tensor, - reduce, - self.codegen_kwargs(), - ) + wrapper.generate_scatter_fallback(self) def should_allocate(self) -> bool: return False From 6c334885d48725197b5d35e2c1543efc0f4198d0 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Fri, 12 Sep 2025 10:54:39 +0000 Subject: [PATCH 174/693] [RELAND] Always build USE_DISTRIBUTED (#160449) and Make distributed modules importable even when backend not built (#159889) (#162594) Summary: Original: D81957844 and D81957923 Also, https://github.com/pytorch/pytorch/pull/162142 is patched in as well #buildall Test Plan: sandcastle and oss ci Rollback Plan: Reviewed By: H-Huang Pull Request resolved: https://github.com/pytorch/pytorch/pull/162594 Approved by: https://github.com/H-Huang, https://github.com/dcci --- .ci/pytorch/macos-build.sh | 7 +- .ci/pytorch/macos-test.sh | 4 + .ci/wheel/build_wheel.sh | 3 +- BUILD.bazel | 3 +- CMakeLists.txt | 12 +- buckbuild.bzl | 4 +- c10/ovrsource_defs.bzl | 4 +- caffe2/CMakeLists.txt | 144 +++++----- cmake/Dependencies.cmake | 2 +- cmake/Summary.cmake | 12 +- docs/source/conf.py | 7 - test/cpp/dist_autograd/CMakeLists.txt | 2 +- test/distributed/tensor/test_fake.py | 41 +++ test/export/test_export.py | 10 +- test/test_numa_binding.py | 5 +- tools/build_pytorch_libs.py | 3 +- torch/CMakeLists.txt | 50 ++-- torch/_C/_distributed_c10d.pyi | 9 + torch/csrc/Exceptions.h | 2 - torch/csrc/Module.cpp | 10 - torch/csrc/autograd/functions/init.cpp | 4 - torch/csrc/distributed/c10d/HashStore.cpp | 1 - torch/csrc/distributed/c10d/Work.cpp | 2 +- torch/csrc/distributed/c10d/init.cpp | 1 + torch/csrc/inductor/aoti_torch/shim_cpu.cpp | 4 - torch/csrc/jit/python/pybind_utils.h | 6 +- .../csrc/jit/python/python_sugared_value.cpp | 3 +- torch/csrc/jit/runtime/interpreter.h | 14 +- torch/csrc/jit/serialization/pickler.h | 2 - torch/csrc/jit/serialization/unpickler.h | 2 - .../standalone/execution_trace_observer.cpp | 9 - torch/csrc/profiler/util.cpp | 6 +- torch/csrc/profiler/util.h | 2 - torch/distributed/_C_stubs.py | 150 ++++++++++ torch/distributed/__init__.py | 258 +++++++++--------- torch/distributed/_dist2.py | 2 +- torch/distributed/_distributed_c10d.py | 245 +++++++++++++++++ torch/distributed/_functional_collectives.py | 12 +- .../_shard/sharded_tensor/reshard.py | 2 +- .../chunk_sharding_spec_ops/embedding_bag.py | 2 +- .../distributed/_symmetric_memory/__init__.py | 22 +- .../_symmetric_memory/_nvshmem_triton.py | 2 +- torch/distributed/_tools/fake_collectives.py | 4 +- .../algorithms/model_averaging/utils.py | 4 - torch/distributed/constants.py | 15 +- torch/distributed/device_mesh.py | 44 +-- torch/distributed/distributed_c10d.py | 70 +++-- torch/distributed/elastic/control_plane.py | 2 +- torch/distributed/nn/functional.py | 4 - torch/distributed/rpc/__init__.py | 2 +- torch/distributed/tensor/_collective_utils.py | 4 +- .../testing/_internal/distributed/fake_pg.py | 2 +- 52 files changed, 778 insertions(+), 458 deletions(-) create mode 100644 test/distributed/tensor/test_fake.py create mode 100644 torch/distributed/_C_stubs.py create mode 100644 torch/distributed/_distributed_c10d.py diff --git a/.ci/pytorch/macos-build.sh b/.ci/pytorch/macos-build.sh index d7447e7d48582..d41c3c08e6288 100755 --- a/.ci/pytorch/macos-build.sh +++ b/.ci/pytorch/macos-build.sh @@ -35,11 +35,10 @@ fi print_cmake_info if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then - # Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls - USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel + USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel else - # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests - # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448 + # NB: we always build with distributed; USE_DISTRIBUTED turns off all + # backends (specifically the gloo backend), so test that this case works too USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64 fi if which sccache > /dev/null; then diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh index a859901191e03..79d47da431712 100755 --- a/.ci/pytorch/macos-test.sh +++ b/.ci/pytorch/macos-test.sh @@ -13,9 +13,13 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available( fi popd +python -mpip install -r requirements.txt + # enable debug asserts in serialization export TORCH_SERIALIZATION_DEBUG=1 +python -mpip install --no-input -r requirements.txt + setup_test_python() { # The CircleCI worker hostname doesn't resolve to an address. # This environment variable makes ProcessGroupGloo default to diff --git a/.ci/wheel/build_wheel.sh b/.ci/wheel/build_wheel.sh index 2d5f4d30b4c82..98b50c0ceeafe 100755 --- a/.ci/wheel/build_wheel.sh +++ b/.ci/wheel/build_wheel.sh @@ -177,7 +177,8 @@ source ~/${desired_python}-build/bin/activate retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements.txt" retry brew install libomp -# For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule +# For USE_DISTRIBUTED=1 on macOS, this enables gloo, which needs libuv, which +# is build as part of tensorpipe submodule export USE_DISTRIBUTED=1 export USE_MKLDNN=OFF diff --git a/BUILD.bazel b/BUILD.bazel index d4202e7a2c1e4..635f39eed2cee 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -22,7 +22,6 @@ COMMON_COPTS = [ "-DHAVE_SHM_UNLINK=1", "-D_FILE_OFFSET_BITS=64", "-DUSE_FBGEMM", - "-DUSE_DISTRIBUTED", "-DAT_PER_OPERATOR_HEADERS", "-DATEN_THREADING=NATIVE", "-DNO_CUDNN_DESTROY_HANDLE", @@ -811,7 +810,7 @@ cc_library( name = "torch_python", srcs = libtorch_python_core_sources + if_cuda(libtorch_python_cuda_sources) - + if_cuda(libtorch_python_distributed_sources) + + libtorch_python_distributed_sources + GENERATED_AUTOGRAD_PYTHON, hdrs = glob([ "torch/csrc/generic/*.cpp", diff --git a/CMakeLists.txt b/CMakeLists.txt index efad5419aaffa..f3e4b28bcff98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,8 +181,9 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le)") set(CPU_POWER ON) endif() -# For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not -# tested and likely won't work without additional changes. +# For non-supported platforms, turn USE_DISTRIBUTED off by default. +# NB: USE_DISTRIBUTED simply disables the backend; distributed code +# still gets built if(NOT LINUX AND NOT WIN32) set(USE_DISTRIBUTED OFF @@ -262,11 +263,11 @@ option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF) option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF) option(USE_NATIVE_ARCH "Use -march=native" OFF) cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF) -option(USE_DISTRIBUTED "Use distributed" ON) +option(USE_DISTRIBUTED "Enable default distributed backends" ON) cmake_dependent_option(USE_NCCL "Use NCCL" ON "USE_DISTRIBUTED;USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF) cmake_dependent_option(USE_XCCL "Use XCCL" ON - "USE_XPU;UNIX;NOT APPLE" OFF) + "USE_DISTRIBUTED;USE_XPU;UNIX;NOT APPLE" OFF) cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF) cmake_dependent_option(USE_RCCL "Use RCCL" ON "USE_NCCL;NOT WIN32" OFF) cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF) @@ -431,11 +432,10 @@ if(WIN32) PATH_SUFFIXES lib NO_DEFAULT_PATH) if(NOT libuv_tmp_LIBRARY) - set(USE_DISTRIBUTED OFF) set(USE_GLOO OFF) message( WARNING - "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. " + "Libuv is not installed in current conda env. Set USE_GLOO to OFF. " "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv." ) else() diff --git a/buckbuild.bzl b/buckbuild.bzl index e079d98395441..3e3af13f9118a 100644 --- a/buckbuild.bzl +++ b/buckbuild.bzl @@ -156,7 +156,7 @@ ROOT = "//" if IS_OSS else "//xplat/caffe2" # for targets in subfolders ROOT_PATH = "//" if IS_OSS else "//xplat/caffe2/" -C10 = "//c10:c10" if IS_OSS else "//xplat/caffe2/c10:c10" +C10 = "//c10:c10" if IS_OSS else ("//xplat/caffe2/c10:c10_ovrsource" if is_arvr_mode() else "//xplat/caffe2/c10:c10") # a dictionary maps third party library name to fbsource and oss target THIRD_PARTY_LIBS = { @@ -948,6 +948,7 @@ def define_buck_targets( [ ("torch/csrc/api/include", "torch/**/*.h"), ("", "torch/csrc/**/*.h"), + ("", "torch/csrc/**/*.hpp"), ("", "torch/nativert/**/*.h"), ("", "torch/headeronly/**/*.h"), ("", "torch/script.h"), @@ -2033,6 +2034,7 @@ def define_buck_targets( ("", "caffe2/utils/*.h"), ("", "caffe2/core/*.h"), ("", "torch/csrc/*.h"), + ("", "torch/csrc/*.hpp"), ("", "torch/csrc/api/include/torch/*.h"), ("", "torch/csrc/autograd/*.h"), ("", "torch/csrc/autograd/*/*.h"), diff --git a/c10/ovrsource_defs.bzl b/c10/ovrsource_defs.bzl index aafe5a4de8c42..532404f21bbaf 100644 --- a/c10/ovrsource_defs.bzl +++ b/c10/ovrsource_defs.bzl @@ -18,9 +18,9 @@ cuda_supported_platforms = [ def define_c10_ovrsource(name, is_mobile): if is_mobile: - pp_flags = ["-DC10_MOBILE=1"] + pp_flags = ["-DC10_MOBILE=1", "-DC10_USE_GLOG"] else: - pp_flags = [] + pp_flags = ["-DC10_USE_GLOG"] oxx_static_library( name = name, diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 99d4b2cd5aa93..b5d47bb4b5dff 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -540,11 +540,9 @@ if(NOT INTERN_BUILD_MOBILE AND NOT BUILD_LITE_INTERPRETER) ${TORCH_SRC_DIR}/csrc/utils/byte_order.cpp ) - if(USE_DISTRIBUTED) - append_filelist("libtorch_distributed_base_sources" TORCH_SRCS) - if(NOT WIN32) - append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS) - endif() + append_filelist("libtorch_distributed_base_sources" TORCH_SRCS) + if(NOT WIN32) + append_filelist("libtorch_distributed_extra_sources" TORCH_SRCS) endif() endif() @@ -573,32 +571,30 @@ if(USE_CUDA) list(APPEND Caffe2_GPU_SRCS ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) endif() - if(USE_DISTRIBUTED) - append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS) - if(NOT WIN32) - append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS) - set_source_files_properties( - ${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupNCCL.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/utils.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CudaDMAConnectivity.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp - PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" - ) - endif() + append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_GPU_SRCS) + if(NOT WIN32) + append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_GPU_SRCS) + set_source_files_properties( + ${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupNCCL.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/utils.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CudaDMAConnectivity.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryOps.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/NCCLSymmetricMemory.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/symm_mem/cuda_mem_pool.cpp + PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" + ) + endif() - set(ASYNC_MM_FILE "${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/AsyncMM.cu") - # Disable the warning to make cutlass warp-specialized cooperative kernel build for gcc-9 - if(CMAKE_COMPILER_IS_GNUCXX) - set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-Wno-unused-but-set-variable") - endif() - if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND CUDA_NVCC_FLAGS MATCHES ".*compute_90.*") - set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a") - endif() + set(ASYNC_MM_FILE "${TORCH_SRC_DIR}/csrc/distributed/c10d/cuda/AsyncMM.cu") + # Disable the warning to make cutlass warp-specialized cooperative kernel build for gcc-9 + if(CMAKE_COMPILER_IS_GNUCXX) + set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-Wno-unused-but-set-variable") + endif() + if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.0 AND CUDA_NVCC_FLAGS MATCHES ".*compute_90.*") + set_source_files_properties(${ASYNC_MM_FILE} PROPERTIES COMPILE_FLAGS "-gencode arch=compute_90a,code=sm_90a") endif() set_source_files_properties( ${TORCH_ROOT}/aten/src/ATen/cuda/detail/LazyNVRTC.cpp @@ -631,11 +627,9 @@ if(USE_ROCM) list(APPEND Caffe2_HIP_SRCS ${TORCH_SRC_DIR}/csrc/cuda/nccl.cpp) endif() - if(USE_DISTRIBUTED) - append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS) - if(NOT WIN32) - append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS) - endif() + append_filelist("libtorch_cuda_distributed_base_sources" Caffe2_HIP_SRCS) + if(NOT WIN32) + append_filelist("libtorch_cuda_distributed_extra_sources" Caffe2_HIP_SRCS) endif() # caffe2_nvrtc's stubs to driver APIs are useful for HIP. # See NOTE [ ATen NVRTC Stub and HIP ] @@ -1356,12 +1350,10 @@ if(BUILD_TEST) add_subdirectory(${TORCH_ROOT}/test/cpp/jit ${CMAKE_BINARY_DIR}/test_jit) add_subdirectory(${TORCH_ROOT}/test/cpp/nativert ${CMAKE_BINARY_DIR}/test_nativert) add_subdirectory(${TORCH_ROOT}/test/inductor ${CMAKE_BINARY_DIR}/test_inductor) - if(USE_DISTRIBUTED) - add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d) - if(NOT WIN32) - add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd) - add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc) - endif() + add_subdirectory(${TORCH_ROOT}/test/cpp/c10d ${CMAKE_BINARY_DIR}/test_cpp_c10d) + if(NOT WIN32) + add_subdirectory(${TORCH_ROOT}/test/cpp/dist_autograd ${CMAKE_BINARY_DIR}/dist_autograd) + add_subdirectory(${TORCH_ROOT}/test/cpp/rpc ${CMAKE_BINARY_DIR}/test_cpp_rpc) endif() if(NOT NO_API) add_subdirectory(${TORCH_ROOT}/test/cpp/api ${CMAKE_BINARY_DIR}/test_api) @@ -1466,47 +1458,41 @@ if(BUILD_LITE_INTERPRETER) endif() endif() - -# Pass USE_DISTRIBUTED to torch_cpu, as some codes in jit/pickler.cpp and -# jit/unpickler.cpp need to be compiled only when USE_DISTRIBUTED is set -if(USE_DISTRIBUTED) - target_compile_definitions(torch_cpu PUBLIC USE_DISTRIBUTED) - if(USE_GLOO AND USE_C10D_GLOO) - target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO) - endif() - if(USE_UCC AND USE_C10D_UCC) - target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC) - if(USE_CUDA) - target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC) - endif() - endif() - if(USE_NCCL AND USE_C10D_NCCL) - if(USE_ROCM) - target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL) - else() - target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL) - endif() - endif() - if(USE_MPI AND USE_C10D_MPI) - if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set_source_files_properties( - "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp" - PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) - endif() - target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI) - endif() - # Pass USE_RPC in order to reduce use of - # #if defined(USE_DISTRIBUTED) && !defined(_WIN32) - # need to be removed when RPC is supported - if(NOT WIN32) - target_compile_definitions(torch_cpu PUBLIC USE_RPC) +if(USE_GLOO AND USE_C10D_GLOO) + target_compile_definitions(torch_cpu PUBLIC USE_C10D_GLOO) +endif() +if(USE_UCC AND USE_C10D_UCC) + target_compile_definitions(torch_cpu PUBLIC USE_C10D_UCC) + if(USE_CUDA) + target_compile_definitions(torch_cuda PUBLIC USE_C10D_UCC) endif() - # Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp - # can only be compiled with USE_TENSORPIPE is set. - if(USE_TENSORPIPE) - target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE) +endif() +if(USE_NCCL AND USE_C10D_NCCL) + if(USE_ROCM) + target_compile_definitions(torch_hip PUBLIC USE_C10D_NCCL) + else() + target_compile_definitions(torch_cuda PUBLIC USE_C10D_NCCL) endif() endif() +if(USE_MPI AND USE_C10D_MPI) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set_source_files_properties( + "${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupMPI.cpp" + PROPERTIES COMPILE_FLAGS -Wno-deprecated-declarations) + endif() + target_compile_definitions(torch_cpu PUBLIC USE_C10D_MPI) +endif() +# Pass USE_RPC in order to reduce use of +# #if defined(USE_DISTRIBUTED) && !defined(_WIN32) +# need to be removed when RPC is supported +if(NOT WIN32) + target_compile_definitions(torch_cpu PUBLIC USE_RPC) +endif() +# Pass USE_TENSORPIPE to torch_cpu as some parts of rpc/utils.cpp +# can only be compiled with USE_TENSORPIPE is set. +if(USE_TENSORPIPE) + target_compile_definitions(torch_cpu PUBLIC USE_TENSORPIPE) +endif() if(NOT INTERN_BUILD_MOBILE) if(${CAFFE2_LINK_LOCAL_PROTOBUF}) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 6ad56d3b9b44e..08ffdaf8cf451 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1134,7 +1134,7 @@ if(USE_CUDA AND CUDA_VERSION VERSION_LESS 13.0) include_directories(SYSTEM ${CUB_INCLUDE_DIRS}) endif() -if(USE_DISTRIBUTED AND USE_TENSORPIPE) +if(USE_TENSORPIPE) if(MSVC) message(WARNING "Tensorpipe cannot be used on Windows.") else() diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index ffd4b5298a890..fb64e99bccf22 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -192,13 +192,11 @@ function(caffe2_print_configuration_summary) message(STATUS " USE_PYTORCH_QNNPACK : ${USE_PYTORCH_QNNPACK}") message(STATUS " USE_XNNPACK : ${USE_XNNPACK}") message(STATUS " USE_DISTRIBUTED : ${USE_DISTRIBUTED}") - if(${USE_DISTRIBUTED}) - message(STATUS " USE_MPI : ${USE_MPI}") - message(STATUS " USE_GLOO : ${USE_GLOO}") - message(STATUS " USE_GLOO_WITH_OPENSSL : ${USE_GLOO_WITH_OPENSSL}") - message(STATUS " USE_GLOO_IBVERBS : ${USE_GLOO_IBVERBS}") - message(STATUS " USE_TENSORPIPE : ${USE_TENSORPIPE}") - endif() + message(STATUS " USE_MPI : ${USE_MPI}") + message(STATUS " USE_GLOO : ${USE_GLOO}") + message(STATUS " USE_GLOO_WITH_OPENSSL : ${USE_GLOO_WITH_OPENSSL}") + message(STATUS " USE_GLOO_IBVERBS : ${USE_GLOO_IBVERBS}") + message(STATUS " USE_TENSORPIPE : ${USE_TENSORPIPE}") if(NOT "${SELECTED_OP_LIST}" STREQUAL "") message(STATUS " SELECTED_OP_LIST : ${SELECTED_OP_LIST}") endif() diff --git a/docs/source/conf.py b/docs/source/conf.py index 44ad4de8115f6..d1504757f9c54 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -3333,13 +3333,6 @@ def coverage_post_process(app, exception): if not isinstance(app.builder, CoverageBuilder): return - if not torch.distributed.is_available(): - raise RuntimeError( - "The coverage tool cannot run with a version " - "of PyTorch that was built with USE_DISTRIBUTED=0 " - "as this module's API changes." - ) - # These are all the modules that have "automodule" in an rst file # These modules are the ones for which coverage is checked # Here, we make sure that no module is missing from that list diff --git a/test/cpp/dist_autograd/CMakeLists.txt b/test/cpp/dist_autograd/CMakeLists.txt index 14fd7f7ae9a2b..86a6c924288bb 100644 --- a/test/cpp/dist_autograd/CMakeLists.txt +++ b/test/cpp/dist_autograd/CMakeLists.txt @@ -1,4 +1,4 @@ -if(USE_DISTRIBUTED AND NOT WIN32) +if(NOT WIN32) set(DIST_AUTOGRAD_TEST_DIR "${TORCH_ROOT}/test/cpp/dist_autograd") set(DIST_AUTOGRAD_TEST_SOURCES ${TORCH_ROOT}/test/cpp/common/main.cpp diff --git a/test/distributed/tensor/test_fake.py b/test/distributed/tensor/test_fake.py new file mode 100644 index 0000000000000..099c6e87f5f18 --- /dev/null +++ b/test/distributed/tensor/test_fake.py @@ -0,0 +1,41 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates +# Owner(s): ["oncall: distributed"] + +import torch +from torch._subclasses.fake_tensor import FakeTensorMode +from torch.distributed.tensor import DTensor +from torch.distributed.tensor.placement_types import Shard +from torch.testing._internal.common_utils import run_tests, TestCase +from torch.testing._internal.distributed.fake_pg import FakeStore + + +class TestFakeDTensor(TestCase): + def test_fake_dtensor_operations(self): + # Use FakeTensorMode to handle CUDA tensors without actual CUDA + fake_mode = FakeTensorMode() + world_size = 4 + + fake_store = FakeStore() + torch.distributed.init_process_group( + "fake", store=fake_store, rank=0, world_size=world_size + ) + device_mesh = torch.distributed.device_mesh.init_device_mesh( + "cuda", + (2, world_size // 2), + ) + + # Create fake CUDA tensor using FakeTensorMode + with fake_mode: + x = torch.randn(1, 1, device="cuda") + x = DTensor.from_local(x, device_mesh, [Shard(0), Shard(1)]) + + # Test basic DTensor operations + self.assertIsInstance(x, DTensor) + + # Test sum operation + r = x.sum(1) + self.assertIsInstance(r, DTensor) + + +if __name__ == "__main__": + run_tests() diff --git a/test/export/test_export.py b/test/export/test_export.py index b4596eab95baf..2c466f162a893 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -60,10 +60,7 @@ from torch.fx.experimental.proxy_tensor import make_fx from torch.fx.experimental.symbolic_shapes import ShapeEnv from torch.testing import FileCheck -from torch.testing._internal.common_cuda import ( - PLATFORM_SUPPORTS_FLASH_ATTENTION, - xfailIfDistributedNotSupported, -) +from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_FLASH_ATTENTION from torch.testing._internal.common_utils import ( find_library_location, IS_FBCODE, @@ -15772,7 +15769,6 @@ def distributed_env(self, world_size): finally: torch.distributed.destroy_process_group() - @xfailIfDistributedNotSupported def test_distributed_all_reduce(self): class Foo(torch.nn.Module): def __init__(self): @@ -15790,7 +15786,6 @@ def forward(self, x): inp = (torch.randn(4, 4),) self.assertTrue(torch.allclose(ep.module()(*inp), m(*inp))) - @xfailIfDistributedNotSupported def test_distributed_all_gather(self): class Foo(torch.nn.Module): def forward(self, x): @@ -15806,7 +15801,6 @@ def forward(self, x): torch.allclose(a, b) for a, b in zip(ep.module()(*inp), m(*inp)) ) - @xfailIfDistributedNotSupported def test_distributed_all_gather_into_tensor(self): class Foo(torch.nn.Module): def forward(self, x): @@ -15820,7 +15814,6 @@ def forward(self, x): inp = (torch.randn(2),) self.assertTrue(torch.allclose(ep.module()(*inp), m(*inp))) - @xfailIfDistributedNotSupported @testing.expectedFailureCppRuntime def test_distributed_all_to_all_single(self): class Foo(torch.nn.Module): @@ -15838,7 +15831,6 @@ def forward(self, x): ) self.assertEqual(len(nodes), 1) - @xfailIfDistributedNotSupported @testing.expectedFailureCppRuntime def test_distributed_reduce_scatter_tensor(self): class Foo(torch.nn.Module): diff --git a/test/test_numa_binding.py b/test/test_numa_binding.py index 764156ff9b98a..d38032ba22603 100644 --- a/test/test_numa_binding.py +++ b/test/test_numa_binding.py @@ -7,7 +7,7 @@ from dataclasses import dataclass from multiprocessing.context import SpawnProcess from typing import Any, Optional -from unittest import skipUnless +from unittest import skipIf, skipUnless from unittest.mock import mock_open, patch import torch @@ -22,7 +22,7 @@ AffinityMode, NumaOptions, ) -from torch.testing._internal.common_utils import run_tests, TestCase +from torch.testing._internal.common_utils import IS_MACOS, run_tests, TestCase @dataclass(frozen=True) @@ -680,6 +680,7 @@ def test_core_complex_tiebreak_prefers_lower_cache_key(self) -> None: set(range(0, 2)), ) + @skipIf(IS_MACOS, "sched_getaffinity doesn't exist") def test_binds_to_node_0_if_node_stored_as_minus_one(self) -> None: self._add_mock_hardware( num_sockets=1, diff --git a/tools/build_pytorch_libs.py b/tools/build_pytorch_libs.py index 9d43de80f1298..457b224354fb2 100644 --- a/tools/build_pytorch_libs.py +++ b/tools/build_pytorch_libs.py @@ -88,8 +88,7 @@ def build_pytorch( ) -> None: my_env = _create_build_env() if ( - not check_negative_env_flag("USE_DISTRIBUTED") - and not check_negative_env_flag("USE_CUDA") + not check_negative_env_flag("USE_CUDA") and not check_negative_env_flag("USE_NCCL") and not check_env_flag("USE_SYSTEM_NCCL") ): diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt index 866c40ad1c12e..adc9aad4a05c3 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -276,32 +276,30 @@ add_custom_command( WORKING_DIRECTORY "${TORCH_ROOT}" ) -if(USE_DISTRIBUTED) - if(WIN32) - append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS) - else() - append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS) - endif() - # Disable certain warnings for GCC-9.X - if(CMAKE_COMPILER_IS_GNUCXX) - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") - set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") - endif() - # NCCL is a private dependency of libtorch, but libtorch_python includes - # some private headers of libtorch, which in turn include NCCL. As a hacky - # alternative to making NCCL a public dependency of libtorch, we make it - # a private dependency of libtorch_python as well. - if(USE_NCCL) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl) - endif() - # Same for MPI. - if(USE_MPI) - list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX) - endif() - list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D) +if(WIN32) + append_filelist("libtorch_python_distributed_core_sources" TORCH_PYTHON_SRCS) +else() + append_filelist("libtorch_python_distributed_sources" TORCH_PYTHON_SRCS) endif() +# Disable certain warnings for GCC-9.X +if(CMAKE_COMPILER_IS_GNUCXX) + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/autograd/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/rpc/testing/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") + set_source_files_properties(${TORCH_SRC_DIR}/csrc/distributed/c10d/init.cpp PROPERTIES COMPILE_FLAGS "-Wno-cast-function-type") +endif() +# NCCL is a private dependency of libtorch, but libtorch_python includes +# some private headers of libtorch, which in turn include NCCL. As a hacky +# alternative to making NCCL a public dependency of libtorch, we make it +# a private dependency of libtorch_python as well. +if(USE_NCCL) + list(APPEND TORCH_PYTHON_LINK_LIBRARIES __caffe2_nccl) +endif() +# Same for MPI. +if(USE_MPI) + list(APPEND TORCH_PYTHON_LINK_LIBRARIES MPI::MPI_CXX) +endif() +list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_C10D) if(USE_NCCL AND NOT WIN32) list(APPEND TORCH_PYTHON_SRCS @@ -369,10 +367,6 @@ if(BUILD_LIBTORCHLESS) target_compile_definitions(torch_python PRIVATE USE_C10D_NCCL) endif() - if(USE_DISTRIBUTED) - target_compile_definitions(torch_python PRIVATE USE_DISTRIBUTED) - endif() - if(USE_MPI AND USE_C10D_MPI) target_compile_definitions(torch_python PRIVATE USE_C10D_MPI) endif() diff --git a/torch/_C/_distributed_c10d.pyi b/torch/_C/_distributed_c10d.pyi index ad3d8e3abf245..79e437063b8cb 100644 --- a/torch/_C/_distributed_c10d.pyi +++ b/torch/_C/_distributed_c10d.pyi @@ -851,3 +851,12 @@ class ProcessGroupXCCL(Backend): def _set_process_group(pg: ProcessGroup) -> None: ... def _current_process_group() -> ProcessGroup: ... +def _dump_nccl_trace_json( + includeCollectives: Optional[bool] = ..., + onlyActive: Optional[bool] = ..., +) -> bytes: ... +def _dump_nccl_trace( + includeCollectives: Optional[bool] = ..., + includeStackTraces: Optional[bool] = ..., + onlyActive: Optional[bool] = ..., +) -> bytes: ... diff --git a/torch/csrc/Exceptions.h b/torch/csrc/Exceptions.h index 60a7bb644df01..d43d2b02a23ef 100644 --- a/torch/csrc/Exceptions.h +++ b/torch/csrc/Exceptions.h @@ -15,9 +15,7 @@ #include #include -#if defined(USE_DISTRIBUTED) #include -#endif inline void PyErr_SetString(PyObject* type, const std::string& message) { PyErr_SetString(type, message.c_str()); diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index 675a4c4310052..ac2b03d2651cc 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -120,14 +120,10 @@ #endif #endif -#ifdef USE_DISTRIBUTED -#ifdef USE_C10D #include #include #include #include -#endif -#endif #if defined(USE_VALGRIND) #include @@ -552,11 +548,7 @@ static PyObject* THPModule_getBackcompatKeepdimWarn( } static PyObject* THPModule_hasDistributed(PyObject* _unused, PyObject* noargs) { -#ifdef USE_DISTRIBUTED Py_RETURN_TRUE; -#else - Py_RETURN_FALSE; -#endif } static PyObject* THPModule_showConfig(PyObject* module, PyObject* noargs) { @@ -1993,7 +1985,6 @@ PyObject* initModule() { #ifdef USE_XPU THPUtils_addPyMethodDefs(methods, THXPModule_methods()); #endif -#if defined(USE_DISTRIBUTED) && defined(USE_C10D) THPUtils_addPyMethodDefs( methods, torch::distributed::c10d::python_functions()); #ifndef _WIN32 @@ -2003,7 +1994,6 @@ PyObject* initModule() { methods, torch::distributed::autograd::python_functions()); THPUtils_addPyMethodDefs( methods, torch::distributed::rpc::testing::python_functions()); -#endif #endif static struct PyModuleDef torchmodule = { diff --git a/torch/csrc/autograd/functions/init.cpp b/torch/csrc/autograd/functions/init.cpp index 5e19010f9ae3c..05c8901e1f60d 100644 --- a/torch/csrc/autograd/functions/init.cpp +++ b/torch/csrc/autograd/functions/init.cpp @@ -8,9 +8,7 @@ #include #include #include -#ifdef USE_DISTRIBUTED #include -#endif #include #include #include @@ -150,11 +148,9 @@ void THPAutograd_initFunctions() { static PyTypeObject CopyBackwardsClass; addClass(module, CopyBackwardsClass, "CopyBackwards"); -#ifdef USE_DISTRIBUTED static PyTypeObject SendRpcBackwardClass; addClass( module, SendRpcBackwardClass, "SendRpcBackward"); -#endif static PyTypeObject CopySlicesClass; addClass(module, CopySlicesClass, "CopySlices"); diff --git a/torch/csrc/distributed/c10d/HashStore.cpp b/torch/csrc/distributed/c10d/HashStore.cpp index 15befd9ec34e2..1055afc4847d0 100644 --- a/torch/csrc/distributed/c10d/HashStore.cpp +++ b/torch/csrc/distributed/c10d/HashStore.cpp @@ -1,6 +1,5 @@ #include -#include #include #include diff --git a/torch/csrc/distributed/c10d/Work.cpp b/torch/csrc/distributed/c10d/Work.cpp index cdec9185ce537..2c1ee42727d8a 100644 --- a/torch/csrc/distributed/c10d/Work.cpp +++ b/torch/csrc/distributed/c10d/Work.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp index 7e79fef8392f6..128fab6593b37 100644 --- a/torch/csrc/distributed/c10d/init.cpp +++ b/torch/csrc/distributed/c10d/init.cpp @@ -46,6 +46,7 @@ #include #include #include + #include #include diff --git a/torch/csrc/inductor/aoti_torch/shim_cpu.cpp b/torch/csrc/inductor/aoti_torch/shim_cpu.cpp index b1c864bf3fbba..a610685fe9557 100644 --- a/torch/csrc/inductor/aoti_torch/shim_cpu.cpp +++ b/torch/csrc/inductor/aoti_torch/shim_cpu.cpp @@ -1,7 +1,5 @@ -#ifdef USE_DISTRIBUTED #include -#endif #include #include @@ -533,7 +531,6 @@ AOTITorchError aoti_torch_cpu__weight_int4pack_mm_cpu_tensor( }); } -#ifdef USE_DISTRIBUTED AOTITorchError aoti_torch_cpu__c10d_functional_all_reduce_( AtenTensorHandle inp, const char* reduce_op, @@ -566,4 +563,3 @@ AOTITorchError aoti_torch_cpu__c10d_functional_wait_tensor( *ret0 = new_tensor_handle(std::move(tmp_result)); }); } -#endif diff --git a/torch/csrc/jit/python/pybind_utils.h b/torch/csrc/jit/python/pybind_utils.h index 5ae84e3e0c68b..2c0c1ea4b9cf2 100644 --- a/torch/csrc/jit/python/pybind_utils.h +++ b/torch/csrc/jit/python/pybind_utils.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include #include @@ -24,10 +26,6 @@ #include #include #include -#ifdef USE_DISTRIBUTED -#include -#include -#endif #include #include diff --git a/torch/csrc/jit/python/python_sugared_value.cpp b/torch/csrc/jit/python/python_sugared_value.cpp index 8b16e089aa50e..808fe7d3605ba 100644 --- a/torch/csrc/jit/python/python_sugared_value.cpp +++ b/torch/csrc/jit/python/python_sugared_value.cpp @@ -1225,7 +1225,7 @@ std::shared_ptr toSugaredValue( } else if (obj.ptr() == py::module::import("torch").attr("_check").ptr()) { return std::make_shared(); #ifdef USE_RPC - // RPC module is only available when build flag "USE_DISTRIBUTED" is on. + // This is not defined on WINDOWS } else if ( isRpcAvailable && obj.ptr() == @@ -1238,7 +1238,6 @@ std::shared_ptr toSugaredValue( return SpecialFormValue::create(prim::rpc_sync); } else if ( isRpcAvailable && - // RPC module is only available when build flag "USE_DISTRIBUTED" is on. obj.ptr() == py::module::import("torch.distributed.rpc").attr("remote").ptr()) { return SpecialFormValue::create(prim::rpc_remote); diff --git a/torch/csrc/jit/runtime/interpreter.h b/torch/csrc/jit/runtime/interpreter.h index 6ae9f52a0cda2..be582cfb7cdd8 100644 --- a/torch/csrc/jit/runtime/interpreter.h +++ b/torch/csrc/jit/runtime/interpreter.h @@ -128,13 +128,8 @@ struct InterpreterContinuation { std::optional tls_state = std::nullopt) : state(std::move(state_)), stack(std::move(stack_)), - tls_state_(std::move(tls_state)) -#ifdef USE_DISTRIBUTED - , - dist_autograd_context_id_(dist_autograd_context_id) -#endif - { - } + tls_state_(std::move(tls_state)), + dist_autograd_context_id_(dist_autograd_context_id) {} void operator()(); @@ -142,9 +137,10 @@ struct InterpreterContinuation { InterpreterState state; Stack stack; std::optional tls_state_ = std::nullopt; -#ifdef USE_DISTRIBUTED - int64_t dist_autograd_context_id_; +#ifndef USE_RPC + [[maybe_unused]] #endif + int64_t dist_autograd_context_id_; }; // what is the tensors type, including state from the current execution context diff --git a/torch/csrc/jit/serialization/pickler.h b/torch/csrc/jit/serialization/pickler.h index 526c840bc10e8..e3379f4de65ac 100644 --- a/torch/csrc/jit/serialization/pickler.h +++ b/torch/csrc/jit/serialization/pickler.h @@ -79,9 +79,7 @@ class TORCH_API Pickler { void pushTuple(const IValue& ivalue); void pushString(const std::string& string); void pushDevice(const IValue& ivalue); -#ifdef USE_DISTRIBUTED void pushRRef(const IValue& ivalue); -#endif // unmemoized version void pushStringImpl(const std::string& string); void pushStorageOfTensor(const at::Tensor& tensor); diff --git a/torch/csrc/jit/serialization/unpickler.h b/torch/csrc/jit/serialization/unpickler.h index 702a1d8816e7f..208cf554ad2bb 100644 --- a/torch/csrc/jit/serialization/unpickler.h +++ b/torch/csrc/jit/serialization/unpickler.h @@ -140,9 +140,7 @@ class TORCH_API Unpickler { void rebuildParameter(); void rebuildTensorFromTypeV2(); void rebuildSparseTensor(); -#ifdef USE_DISTRIBUTED void rebuildRRef(); -#endif PickleOpCode readInstruction(); PickleOpCode readOpCode() { return static_cast(read()); diff --git a/torch/csrc/profiler/standalone/execution_trace_observer.cpp b/torch/csrc/profiler/standalone/execution_trace_observer.cpp index 1c88e80d4021c..e46c141cd3f4d 100644 --- a/torch/csrc/profiler/standalone/execution_trace_observer.cpp +++ b/torch/csrc/profiler/standalone/execution_trace_observer.cpp @@ -30,15 +30,12 @@ #include #include -#ifdef USE_DISTRIBUTED #include -#endif // USE_DISTRIBUTED using namespace at; // Collective property attributes // https://github.com/pytorch/pytorch/issues/124674 -#ifdef USE_DISTRIBUTED constexpr auto kETCommsName = "collective_name"; constexpr auto kETInMsgNelems = "in_msg_nelems"; constexpr auto kETOutMsgNelems = "out_msg_nelems"; @@ -49,7 +46,6 @@ constexpr auto kETGlobalRankStride = "global_rank_stride"; constexpr auto kETGroupSize = "pg_size"; constexpr auto kETProcessGroupName = "pg_name"; constexpr auto kETProcessGroupDesc = "pg_desc"; -#endif // USE_DISTRIBUTED namespace torch::profiler::impl { @@ -269,7 +265,6 @@ static std::ofstream openOutputFile(const std::string& name) { return stream; } -#ifdef USE_DISTRIBUTED static std::string getAttrJson( const std::string& name, const std::string& type, @@ -282,7 +277,6 @@ static std::string getAttrJson( type, value); } -#endif static void writeJsonNode( std::ofstream& out, @@ -660,7 +654,6 @@ static void handleKernelBackendInfo( inline std::string getCommsNodeAttrs(const RecordFunction& fn) { // NOLINT std::vector attrs; -#ifdef USE_DISTRIBUTED // We rely on paramcommsdebug object that is available in thread local info auto debugInfo = dynamic_cast( c10::ThreadLocalDebugInfo::get(c10::DebugInfoKind::PARAM_COMMS_INFO)); @@ -704,8 +697,6 @@ inline std::string getCommsNodeAttrs(const RecordFunction& fn) { // NOLINT addAttr(kGroupSize, kETGroupSize, "uint64"); -#endif // USE_DISTRIBUTED - // XXX consider using as string stream? return attrs.empty() ? "" : fmt::format(", {}", fmt::join(attrs, ", ")); } diff --git a/torch/csrc/profiler/util.cpp b/torch/csrc/profiler/util.cpp index 0b2979e6fb7ea..e97699a99fd1c 100644 --- a/torch/csrc/profiler/util.cpp +++ b/torch/csrc/profiler/util.cpp @@ -11,9 +11,7 @@ #ifdef USE_KINETO #include #endif -#ifdef USE_DISTRIBUTED #include -#endif // USE_DISTRIBUTED namespace torch::profiler::impl { @@ -455,7 +453,7 @@ std::unordered_map saveNcclMeta( // @lint-ignore CLANGTIDY const SaveNcclMetaConfig& config) { std::unordered_map map; -#ifdef USE_DISTRIBUTED +#if !defined(BUILD_LITE_INTERPRETER) && !defined(C10_MOBILE) auto debugInfo = dynamic_cast( c10::ThreadLocalDebugInfo::get(c10::DebugInfoKind::PARAM_COMMS_INFO)); @@ -565,7 +563,7 @@ std::unordered_map saveNcclMeta( } } } -#endif // USE_DISTRIBUTED +#endif // !defined(BUILD_LITE_INTERPRETER) && !defined(C10_MOBILE) return map; } diff --git a/torch/csrc/profiler/util.h b/torch/csrc/profiler/util.h index f2ae57fa0e591..dcb4b866a2de3 100644 --- a/torch/csrc/profiler/util.h +++ b/torch/csrc/profiler/util.h @@ -185,7 +185,6 @@ struct HashCombine { } }; -#ifdef USE_DISTRIBUTED constexpr auto kCommsName = "Collective name"; constexpr auto kDtype = "dtype"; constexpr auto kInMsgNelems = "In msg nelems"; @@ -203,6 +202,5 @@ constexpr auto kP2pSrc = "Src Rank"; constexpr auto kP2pDst = "Dst Rank"; constexpr auto kInTensorsStart = "Input Tensors start"; constexpr auto kOutTensorsStart = "Output Tensors start"; -#endif // USE_DISTRIBUTED } // namespace torch::profiler::impl diff --git a/torch/distributed/_C_stubs.py b/torch/distributed/_C_stubs.py new file mode 100644 index 0000000000000..b241006372b6a --- /dev/null +++ b/torch/distributed/_C_stubs.py @@ -0,0 +1,150 @@ +# mypy: allow-untyped-defs +""" +Python stubs for backend-specific distributed components. + +Since _C._distributed_c10d always exists now, this module only provides +stubs for backend-specific functionality that may not be available in all builds +(e.g., NCCL, UCC, MPI, Gloo, etc.). +""" + +from __future__ import annotations + +from typing import Optional, TYPE_CHECKING + +from torch._C._distributed_c10d import Store + + +if TYPE_CHECKING: + from datetime import timedelta + +import torch + + +# Store classes +class HashStore(Store): + """Stub HashStore for builds without this functionality.""" + + def __init__(self, *args, **kwargs): + self._data = {} + + def set(self, key: str, value: str): + self._data[key] = value + + def get(self, key: str) -> bytes: + return self._data.get(key, "").encode() + + +# Backend-specific process group stubs +class ProcessGroupMPI: + """Stub ProcessGroupMPI for non-MPI builds.""" + + def __init__(self, *args, **kwargs): + pass + + +class ProcessGroupNCCL: + """Stub ProcessGroupNCCL for non-NCCL builds.""" + + def __init__(self, *args, **kwargs): + pass + + +class ProcessGroupGloo: + """Stub ProcessGroupGloo for non-Gloo builds.""" + + def __init__(self, *args, **kwargs): + pass + + +class ProcessGroupUCC: + """Stub ProcessGroupUCC for non-UCC builds.""" + + def __init__(self, *args, **kwargs): + pass + + +class ProcessGroupXCCL: + """Stub ProcessGroupXCCL for non-XCCL builds.""" + + def __init__(self, *args, **kwargs): + pass + + +class _ProcessGroupWrapper: + """Stub _ProcessGroupWrapper for non-Gloo builds.""" + + def __init__(self, process_group, *args, **kwargs): + self._process_group = process_group + + def __getattr__(self, name): + return getattr(self._process_group, name) + + +# NCCL-specific function stubs +_DEFAULT_PG_NCCL_TIMEOUT: Optional[timedelta] = None + + +def _hash_tensors(tensors): + """Stub function to hash tensors - returns dummy hash.""" + return 0 + + +def _dump_nccl_trace_json( + includeCollectives: Optional[bool] = None, onlyActive: Optional[bool] = None +) -> bytes: + """Stub function that returns empty JSON trace.""" + return b"{}" + + +def _dump_nccl_trace( + includeCollectives: Optional[bool] = None, + includeStackTraces: Optional[bool] = None, + onlyActive: Optional[bool] = None, +) -> bytes: + """Stub function that returns empty pickle trace.""" + return b"" + + +# NVSHMEM/SymmetricMemory stubs +def _is_nvshmem_available() -> bool: + """Stub function that returns False indicating NVSHMEM is not available.""" + return False + + +def _nvshmemx_cumodule_init(module: int) -> None: + """Stub function for NVSHMEM CU module initialization.""" + + +class _SymmetricMemory: + """Stub _SymmetricMemory class for builds without this functionality.""" + + def __init__(self, *args, **kwargs): + pass + + @classmethod + def empty_strided_p2p(cls, size, stride, dtype, device, group_name=None): + """Stub that returns a regular tensor.""" + return torch.empty(size, dtype=dtype, device=device) + + @classmethod + def rendezvous(cls, tensor, group_name=None): + """Stub that returns None.""" + return None + + @classmethod + def set_group_info(cls, *args, **kwargs): + """Stub that does nothing.""" + + @classmethod + def set_backend(cls, name): + """Stub that does nothing.""" + + @classmethod + def get_backend(cls, device): + """Stub that returns None.""" + return None + + @classmethod + def has_multicast_support(cls, device_type, device_index): + """Stub that returns False.""" + return False diff --git a/torch/distributed/__init__.py b/torch/distributed/__init__.py index 38e2fdbee803a..836b00c51c3a4 100644 --- a/torch/distributed/__init__.py +++ b/torch/distributed/__init__.py @@ -14,16 +14,10 @@ def is_available() -> bool: """ - Return ``True`` if the distributed package is available. - - Otherwise, - ``torch.distributed`` does not expose any other APIs. Currently, - ``torch.distributed`` is available on Linux, MacOS and Windows. Set - ``USE_DISTRIBUTED=1`` to enable it when building PyTorch from source. - Currently, the default value is ``USE_DISTRIBUTED=1`` for Linux and Windows, - ``USE_DISTRIBUTED=0`` for MacOS. + Always returns ``True``. Note that even if distributed is available, + there may not necessarily be any usable backends. """ - return hasattr(torch._C, "_c10d_init") + return True if is_available() and not torch._C._c10d_init(): @@ -36,132 +30,124 @@ def is_available() -> bool: DistStoreError = torch._C._DistStoreError QueueEmptyError = torch._C._DistQueueEmptyError -if is_available(): - from torch._C._distributed_c10d import ( - _broadcast_coalesced, - _compute_bucket_assignment_by_size, - _ControlCollectives, - _DEFAULT_FIRST_BUCKET_BYTES, - _make_nccl_premul_sum, - _register_builtin_comm_hook, - _register_comm_hook, - _StoreCollectives, - _test_python_store, - _verify_params_across_processes, - Backend as _Backend, - BuiltinCommHookType, - DebugLevel, - FileStore, - get_debug_level, - GradBucket, - Logger, - PrefixStore, - ProcessGroup as ProcessGroup, - Reducer, - set_debug_level, - set_debug_level_from_env, - Store, - TCPStore, - Work as _Work, - ) - - class _DistributedPdb(pdb.Pdb): - """ - Supports using PDB from inside a multiprocessing child process. - - Usage: - _DistributedPdb().set_trace() - """ - - def interaction(self, *args, **kwargs): - _stdin = sys.stdin - try: - sys.stdin = open("/dev/stdin") - pdb.Pdb.interaction(self, *args, **kwargs) - finally: - sys.stdin = _stdin - - _breakpoint_cache: dict[int, typing.Any] = {} - - def breakpoint(rank: int = 0, skip: int = 0, timeout_s=3600): - """ - Set a breakpoint, but only on a single rank. All other ranks will wait for you to be - done with the breakpoint before continuing. - - Args: - rank (int): Which rank to break on. Default: ``0`` - skip (int): Skip the first ``skip`` calls to this breakpoint. Default: ``0``. - """ - if skip > 0: - key = hash(str(traceback.format_exc())) - counter = _breakpoint_cache.get(key, 0) + 1 - _breakpoint_cache[key] = counter - if counter <= skip: - log.warning("Skip the breakpoint, counter=%d", counter) - return - - # avoid having the default timeout (if short) interrupt your debug session - if timeout_s is not None: - for group in torch.distributed.distributed_c10d._pg_map: - torch.distributed.distributed_c10d._set_pg_timeout( - timedelta(seconds=timeout_s), group - ) - - if get_rank() == rank: - pdb = _DistributedPdb() - pdb.message( - "\n!!! ATTENTION !!!\n\n" - f"Type 'up' to get to the frame that called dist.breakpoint(rank={rank})\n" - ) - pdb.set_trace() - # If Meta/Python keys are in the TLS, we want to make sure that we ignore them - # and hit the (default) CPU/CUDA implementation of barrier. - meta_in_tls = torch._C._meta_in_tls_dispatch_include() - guard = torch._C._DisableTorchDispatch() # type: ignore[attr-defined] - torch._C._set_meta_in_tls_dispatch_include(False) +from torch.distributed._distributed_c10d import ( + _broadcast_coalesced, + _compute_bucket_assignment_by_size, + _ControlCollectives, + _DEFAULT_FIRST_BUCKET_BYTES, + _make_nccl_premul_sum, + _register_builtin_comm_hook, + _register_comm_hook, + _StoreCollectives, + _test_python_store, + _verify_params_across_processes, + Backend as _Backend, + BuiltinCommHookType, + DebugLevel, + FileStore, + get_debug_level, + GradBucket, + Logger, + PrefixStore, + ProcessGroup as ProcessGroup, + Reducer, + set_debug_level, + set_debug_level_from_env, + Store, + TCPStore, + Work as _Work, +) + + +class _DistributedPdb(pdb.Pdb): + """ + Supports using PDB from inside a multiprocessing child process. + + Usage: + _DistributedPdb().set_trace() + """ + + def interaction(self, *args, **kwargs): + _stdin = sys.stdin try: - barrier() + sys.stdin = open("/dev/stdin") + pdb.Pdb.interaction(self, *args, **kwargs) finally: - torch._C._set_meta_in_tls_dispatch_include(meta_in_tls) - del guard - - if sys.platform != "win32": - from torch._C._distributed_c10d import HashStore - - from .device_mesh import DeviceMesh, init_device_mesh - - # Variables prefixed with underscore are not auto imported - # See the comment in `distributed_c10d.py` above `_backend` on why we expose - # this. - from .distributed_c10d import * # noqa: F403 - from .distributed_c10d import ( - _all_gather_base, - _coalescing_manager, - _CoalescingManager, - _create_process_group_wrapper, - _get_process_group_name, - _rank_not_in_group, - _reduce_scatter_base, - _time_estimator, - get_node_local_rank, - ) - from .remote_device import _remote_device - from .rendezvous import ( - _create_store_from_options, - register_rendezvous_handler, - rendezvous, - ) - - set_debug_level_from_env() - -else: - # This stub is sufficient to get - # python test/test_public_bindings.py -k test_correct_module_names - # working even when USE_DISTRIBUTED=0. Feel free to add more - # stubs as necessary. - # We cannot define stubs directly because they confuse pyre - - class _ProcessGroupStub: - pass - - sys.modules["torch.distributed"].ProcessGroup = _ProcessGroupStub # type: ignore[attr-defined] + sys.stdin = _stdin + + +_breakpoint_cache: dict[int, typing.Any] = {} + + +def breakpoint(rank: int = 0, skip: int = 0, timeout_s=3600): + """ + Set a breakpoint, but only on a single rank. All other ranks will wait for you to be + done with the breakpoint before continuing. + + Args: + rank (int): Which rank to break on. Default: ``0`` + skip (int): Skip the first ``skip`` calls to this breakpoint. Default: ``0``. + """ + if skip > 0: + key = hash(str(traceback.format_exc())) + counter = _breakpoint_cache.get(key, 0) + 1 + _breakpoint_cache[key] = counter + if counter <= skip: + log.warning("Skip the breakpoint, counter=%d", counter) + return + + # avoid having the default timeout (if short) interrupt your debug session + if timeout_s is not None: + for group in torch.distributed.distributed_c10d._pg_map: + torch.distributed.distributed_c10d._set_pg_timeout( + timedelta(seconds=timeout_s), group + ) + + if get_rank() == rank: + pdb = _DistributedPdb() + pdb.message( + "\n!!! ATTENTION !!!\n\n" + f"Type 'up' to get to the frame that called dist.breakpoint(rank={rank})\n" + ) + pdb.set_trace() + # If Meta/Python keys are in the TLS, we want to make sure that we ignore them + # and hit the (default) CPU/CUDA implementation of barrier. + meta_in_tls = torch._C._meta_in_tls_dispatch_include() + guard = torch._C._DisableTorchDispatch() # type: ignore[attr-defined] + torch._C._set_meta_in_tls_dispatch_include(False) + try: + barrier() + finally: + torch._C._set_meta_in_tls_dispatch_include(meta_in_tls) + del guard + + +if sys.platform != "win32": + from torch.distributed._distributed_c10d import HashStore + +from .device_mesh import DeviceMesh, init_device_mesh + +# Variables prefixed with underscore are not auto imported +# See the comment in `distributed_c10d.py` above `_backend` on why we expose +# this. +from .distributed_c10d import * # noqa: F403 +from .distributed_c10d import ( + _all_gather_base, + _coalescing_manager, + _CoalescingManager, + _create_process_group_wrapper, + _get_process_group_name, + _rank_not_in_group, + _reduce_scatter_base, + _time_estimator, + get_node_local_rank, +) +from .remote_device import _remote_device +from .rendezvous import ( + _create_store_from_options, + register_rendezvous_handler, + rendezvous, +) + + +set_debug_level_from_env() diff --git a/torch/distributed/_dist2.py b/torch/distributed/_dist2.py index ce5cb8d7e0cc3..1c27bf55d6834 100644 --- a/torch/distributed/_dist2.py +++ b/torch/distributed/_dist2.py @@ -10,7 +10,7 @@ from typing import Protocol, Union import torch -from torch._C._distributed_c10d import ( +from torch.distributed._distributed_c10d import ( _current_process_group, _set_process_group, ProcessGroup, diff --git a/torch/distributed/_distributed_c10d.py b/torch/distributed/_distributed_c10d.py new file mode 100644 index 0000000000000..beb7830edc1da --- /dev/null +++ b/torch/distributed/_distributed_c10d.py @@ -0,0 +1,245 @@ +# mypy: disable-error-code="assignment" +# noqa: F401 +""" +Centralized module for importing and re-exporting torch._C._distributed_c10d components. + +IMPORTANT PATTERN: +Never access torch._C._distributed_c10d directly in code. Always import from and use +torch.distributed._distributed_c10d which is guaranteed to have all functions available. + +Example: + # WRONG: torch._C._distributed_c10d._set_global_rank(rank) + # RIGHT: + from torch.distributed._distributed_c10d import _set_global_rank + _set_global_rank(rank) +""" + +from typing import TYPE_CHECKING + +# Import all core distributed components from the C extension +# NB: This list has to be spelled out because the _C module doesn't have __all__ +from torch._C._distributed_c10d import ( + _allow_inflight_collective_as_graph_input, + _broadcast_coalesced, + _compute_bucket_assignment_by_size, + _ControlCollectives, + _current_process_group, + _DEFAULT_FIRST_BUCKET_BYTES, + _DEFAULT_PG_TIMEOUT, + _DistributedBackendOptions, + _make_nccl_premul_sum, + _register_builtin_comm_hook, + _register_comm_hook, + _register_process_group, + _register_work, + _resolve_process_group, + _set_allow_inflight_collective_as_graph_input, + _set_global_rank, + _set_process_group, + _StoreCollectives, + _test_python_store, + _unregister_all_process_groups, + _unregister_process_group, + _verify_params_across_processes, + _WorkerServer, + AllgatherOptions, + AllreduceCoalescedOptions, + AllreduceOptions, + AllToAllOptions, + Backend, + BarrierOptions, + BroadcastOptions, + BuiltinCommHookType, + DebugLevel, + FakeProcessGroup, + FakeWork, + FileStore, + GatherOptions, + get_debug_level, + GradBucket, + Logger, + PrefixStore, + ProcessGroup, + ReduceOp, + ReduceOptions, + Reducer, + ReduceScatterOptions, + ScatterOptions, + set_debug_level, + set_debug_level_from_env, + Store, + TCPStore, + Work, +) + + +# Backend-specific components that may not be available +_MPI_AVAILABLE = False +_NCCL_AVAILABLE = False +_GLOO_AVAILABLE = False +_UCC_AVAILABLE = False +_XCCL_AVAILABLE = False + +# HashStore +try: + from torch._C._distributed_c10d import HashStore +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import HashStore + +# NVSHMEM/SymmetricMemory components + +# There are multiple backends for SymmetricMemory, as a result, +# _SymmetricMemory should not be imported together with NVSHMEM related modules. +try: + from torch._C._distributed_c10d import _SymmetricMemory +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import _SymmetricMemory + +try: + from torch._C._distributed_c10d import ( + _is_nvshmem_available, + _nvshmemx_cumodule_init, + ) +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import ( + _is_nvshmem_available, + _nvshmemx_cumodule_init, + ) + +# MPI backend +try: + from torch._C._distributed_c10d import ProcessGroupMPI + + _MPI_AVAILABLE = True +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import ProcessGroupMPI + +# NCCL backend +try: + from torch._C._distributed_c10d import ( + _DEFAULT_PG_NCCL_TIMEOUT, + _dump_nccl_trace, + _dump_nccl_trace_json, + _hash_tensors, + ProcessGroupNCCL, + ) + + _NCCL_AVAILABLE = True +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import ( + _DEFAULT_PG_NCCL_TIMEOUT, + _dump_nccl_trace, + _dump_nccl_trace_json, + _hash_tensors, + ProcessGroupNCCL, + ) + +# Gloo backend +try: + from torch._C._distributed_c10d import _ProcessGroupWrapper, ProcessGroupGloo + + _GLOO_AVAILABLE = True +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import _ProcessGroupWrapper, ProcessGroupGloo + +# UCC backend +try: + from torch._C._distributed_c10d import ProcessGroupUCC + + _UCC_AVAILABLE = True +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import ProcessGroupUCC + +# XCCL backend +try: + from torch._C._distributed_c10d import ProcessGroupXCCL + + _XCCL_AVAILABLE = True +except ImportError: + if not TYPE_CHECKING: + from torch.distributed._C_stubs import ProcessGroupXCCL + +# Provide backwards compatibility by making all symbols available at module level +__all__ = [ + # Basic components + "_broadcast_coalesced", + "_compute_bucket_assignment_by_size", + "_ControlCollectives", + "_DEFAULT_FIRST_BUCKET_BYTES", + "_DEFAULT_PG_TIMEOUT", + "_DEFAULT_PG_NCCL_TIMEOUT", + "_make_nccl_premul_sum", + "_register_builtin_comm_hook", + "_register_comm_hook", + "_StoreCollectives", + "_test_python_store", + "_verify_params_across_processes", + "_allow_inflight_collective_as_graph_input", + "_register_work", + "_set_allow_inflight_collective_as_graph_input", + "_is_nvshmem_available", + "_nvshmemx_cumodule_init", + "_SymmetricMemory", + "_hash_tensors", + "_set_global_rank", + "_dump_nccl_trace", + "_dump_nccl_trace_json", + "Backend", + "BuiltinCommHookType", + "DebugLevel", + "FakeProcessGroup", + "FileStore", + "get_debug_level", + "GradBucket", + "HashStore", + "Logger", + "PrefixStore", + "ProcessGroup", + "Reducer", + "ReduceOp", + "set_debug_level", + "set_debug_level_from_env", + "Store", + "TCPStore", + "Work", + "FakeWork", + # Additional distributed_c10d components + "_DistributedBackendOptions", + "_register_process_group", + "_resolve_process_group", + "_unregister_all_process_groups", + "_unregister_process_group", + "_current_process_group", + "_set_process_group", + "_WorkerServer", + "AllgatherOptions", + "AllreduceCoalescedOptions", + "AllreduceOptions", + "AllToAllOptions", + "BarrierOptions", + "BroadcastOptions", + "GatherOptions", + "ReduceOptions", + "ReduceScatterOptions", + "ScatterOptions", + # Process group implementations + "ProcessGroupMPI", + "ProcessGroupNCCL", + "ProcessGroupGloo", + "ProcessGroupUCC", + "ProcessGroupXCCL", + "_ProcessGroupWrapper", + # Availability flags + "_MPI_AVAILABLE", + "_NCCL_AVAILABLE", + "_GLOO_AVAILABLE", + "_UCC_AVAILABLE", + "_XCCL_AVAILABLE", +] diff --git a/torch/distributed/_functional_collectives.py b/torch/distributed/_functional_collectives.py index c893794fc3011..95feb6cd79714 100644 --- a/torch/distributed/_functional_collectives.py +++ b/torch/distributed/_functional_collectives.py @@ -7,6 +7,10 @@ import torch import torch.distributed as dist import torch.distributed.distributed_c10d as c10d +from torch.distributed._distributed_c10d import ( + _allow_inflight_collective_as_graph_input, + _set_allow_inflight_collective_as_graph_input, +) from torch.distributed.device_mesh import DeviceMesh from torch.fx.experimental.proxy_tensor import get_proxy_mode @@ -858,15 +862,13 @@ def all_reduce_wait_compiled(y): will be registered in the work registry, and the wait_tensor() in compiled region called on the output tensor of the collective will wait on the correct work object. """ - previous = torch._C._distributed_c10d._allow_inflight_collective_as_graph_input() + previous = _allow_inflight_collective_as_graph_input() try: - torch._C._distributed_c10d._set_allow_inflight_collective_as_graph_input(value) + _set_allow_inflight_collective_as_graph_input(value) yield finally: - torch._C._distributed_c10d._set_allow_inflight_collective_as_graph_input( - previous - ) + _set_allow_inflight_collective_as_graph_input(previous) def _make_all_gather_out_tensor(input, group_size): diff --git a/torch/distributed/_shard/sharded_tensor/reshard.py b/torch/distributed/_shard/sharded_tensor/reshard.py index daef9c3586184..2bc3d65e5c8cb 100644 --- a/torch/distributed/_shard/sharded_tensor/reshard.py +++ b/torch/distributed/_shard/sharded_tensor/reshard.py @@ -4,7 +4,7 @@ import torch import torch.distributed as dist import torch.distributed._shard.sharding_spec as shard_spec -from torch._C._distributed_c10d import ProcessGroup +from torch.distributed._distributed_c10d import ProcessGroup from torch.distributed._shard.metadata import ShardMetadata from torch.distributed._shard.sharding_spec._internals import ( get_chunked_dim_size, diff --git a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py index 61808d0adf62a..f02563619d2fa 100644 --- a/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py +++ b/torch/distributed/_shard/sharding_spec/chunk_sharding_spec_ops/embedding_bag.py @@ -4,7 +4,7 @@ import torch import torch.distributed as dist -from torch._C._distributed_c10d import ReduceOp +from torch.distributed._distributed_c10d import ReduceOp from torch.distributed._shard.sharded_tensor import ShardedTensor from torch.distributed._shard.sharding_spec import ChunkShardingSpec from torch.distributed._shard.sharding_spec.api import custom_sharding_spec_op diff --git a/torch/distributed/_symmetric_memory/__init__.py b/torch/distributed/_symmetric_memory/__init__.py index 43c2959fdd8d1..8154cd9809139 100644 --- a/torch/distributed/_symmetric_memory/__init__.py +++ b/torch/distributed/_symmetric_memory/__init__.py @@ -15,7 +15,12 @@ import torch.distributed._functional_collectives as funcol import torch.distributed.distributed_c10d as c10d from torch._C._autograd import DeviceType -from torch._C._distributed_c10d import _SymmetricMemory, Work as _Work +from torch.distributed._distributed_c10d import ( + _register_work, + _SymmetricMemory, + ProcessGroup, + Work as _Work, +) _group_name_to_store: dict[str, c10d.Store] = {} @@ -1488,7 +1493,7 @@ def _low_contention_all_gather( src_buf = symm_mem.get_buffer(remote_rank, tensor.shape, tensor.dtype) chunks[remote_rank].copy_(src_buf) symm_mem.barrier() - torch._C._distributed_c10d._register_work(output, Work()) + _register_work(output, Work()) return output @@ -1536,7 +1541,7 @@ def _low_contention_reduce_scatter_with_symm_mem_input( ret = ret.mean(dim=0) else: raise ValueError(f"reduce_op ({reduce_op}) is not supported") - torch._C._distributed_c10d._register_work(ret, Work()) + _register_work(ret, Work()) return ret @@ -1571,7 +1576,7 @@ def _low_contention_reduce_scatter_with_workspace( ret = ret.mean(dim=0) else: raise ValueError(f"reduce_op ({reduce_op}) is not supported") - torch._C._distributed_c10d._register_work(ret, Work()) + _register_work(ret, Work()) return ret @@ -1649,7 +1654,6 @@ def _all_to_all_vdev_2d_offset_meta( if TYPE_CHECKING: - from torch._C._distributed_c10d import ProcessGroup from torch.types import _device, _dtype, _int @@ -1727,8 +1731,6 @@ def rendezvous( group (Union[str, :class:`torch.distributed.ProcessGroup`]): The group identifying the participating processes. This can be either a group name or a process group object. """ - from torch._C._distributed_c10d import ProcessGroup - if isinstance(group, str): group_name = group elif isinstance(group, ProcessGroup): @@ -1746,11 +1748,7 @@ def is_nvshmem_available() -> bool: Check if NVSHMEM is available in current build and on current system. """ - try: - from torch._C._distributed_c10d import _is_nvshmem_available - except ImportError: - # Not all builds have NVSHMEM support. - return False + from torch.distributed._distributed_c10d import _is_nvshmem_available # Check if NVSHMEM is available on current system. return _is_nvshmem_available() diff --git a/torch/distributed/_symmetric_memory/_nvshmem_triton.py b/torch/distributed/_symmetric_memory/_nvshmem_triton.py index c543fdffc1c76..7b7828227d7d1 100644 --- a/torch/distributed/_symmetric_memory/_nvshmem_triton.py +++ b/torch/distributed/_symmetric_memory/_nvshmem_triton.py @@ -75,7 +75,7 @@ def enable_triton(lib_dir: Optional[str] = None) -> dict[str, str]: """ import triton - from torch._C._distributed_c10d import _nvshmemx_cumodule_init + from torch.distributed._distributed_c10d import _nvshmemx_cumodule_init if lib_dir is not None: lib_path = os.path.join(lib_dir, "libnvshmem_device.bc") diff --git a/torch/distributed/_tools/fake_collectives.py b/torch/distributed/_tools/fake_collectives.py index 3b201b395334b..b89970ab33480 100644 --- a/torch/distributed/_tools/fake_collectives.py +++ b/torch/distributed/_tools/fake_collectives.py @@ -2,7 +2,9 @@ from typing import Any import torch -from torch._C._distributed_c10d import ( + +# Import centralized distributed components +from torch.distributed._distributed_c10d import ( _resolve_process_group, FakeWork, ProcessGroup, diff --git a/torch/distributed/algorithms/model_averaging/utils.py b/torch/distributed/algorithms/model_averaging/utils.py index fa8cc184eddc5..3e3243002a9c0 100644 --- a/torch/distributed/algorithms/model_averaging/utils.py +++ b/torch/distributed/algorithms/model_averaging/utils.py @@ -5,10 +5,6 @@ import torch import torch.distributed as dist - -# The two imports below are not always available depending on the -# USE_DISTRIBUTED compile flag. Make sure they raise import error -# if we're trying to use them. from torch.distributed import group, ProcessGroup diff --git a/torch/distributed/constants.py b/torch/distributed/constants.py index c1e604bc86753..bfa8785218645 100644 --- a/torch/distributed/constants.py +++ b/torch/distributed/constants.py @@ -1,7 +1,11 @@ from datetime import timedelta from typing import Optional -from torch._C._distributed_c10d import _DEFAULT_PG_TIMEOUT +# Import from centralized fallback module - no ImportError handling needed +from torch.distributed._distributed_c10d import ( + _DEFAULT_PG_NCCL_TIMEOUT, + _DEFAULT_PG_TIMEOUT, +) __all__ = ["default_pg_timeout", "default_pg_nccl_timeout"] @@ -16,11 +20,4 @@ # Later, we could consider merging them back together at the c++ layer if we can align on a same value. # (only if TORCH_NCCL_BLOCKING_WAIT or TORCH_NCCL_ASYNC_ERROR_HANDLING is set to 1). -try: - from torch._C._distributed_c10d import _DEFAULT_PG_NCCL_TIMEOUT - - default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT -except ImportError: - # if C++ NCCL support is not compiled, we don't have access to the default nccl value. - # if anyone is actually trying to use nccl in this state, it should error. - default_pg_nccl_timeout = None +default_pg_nccl_timeout: Optional[timedelta] = _DEFAULT_PG_NCCL_TIMEOUT diff --git a/torch/distributed/device_mesh.py b/torch/distributed/device_mesh.py index 3a9363090bf71..6ee9263db8cd4 100644 --- a/torch/distributed/device_mesh.py +++ b/torch/distributed/device_mesh.py @@ -11,35 +11,14 @@ from typing import Optional, TYPE_CHECKING, Union import torch -from torch.distributed import is_available from torch.utils._typing_utils import not_none __all__ = ["init_device_mesh", "DeviceMesh"] -if not is_available(): - import sys - - # We need to create the stubs when distributed is not available. - # Otherwise, we would fail the doc tests (```./.ci/pytorch/docs-test.sh```), - # since it would try to import ``torch.distributed.device_mesh`` or - # ``torch.distributed.init_device_mesh`` but cannot find them. - - class _DeviceMeshStub: - pass - - def _init_device_mesh_stub(): - pass - - sys.modules["torch.distributed.device_mesh"].DeviceMesh = _DeviceMeshStub # type: ignore[attr-defined] - sys.modules[ - "torch.distributed.device_mesh" - ].init_device_mesh = _init_device_mesh_stub # type: ignore[attr-defined] - - -else: - from torch._C._distributed_c10d import Backend as C10dBackend +if True: # just to temporarily avoid reindentation + from torch.distributed._distributed_c10d import Backend as C10dBackend from torch.distributed.distributed_c10d import ( _get_default_group, _resolve_process_group, @@ -534,15 +513,16 @@ def _setup_world_group_and_device(self): # heuristic to set the current cuda/cuda-like device base on num of gpu devices available in each host # NOTE: This device selection would only work for homogeneous hardware. num_devices_per_host = device_handle.device_count() - if ( - world_size > num_devices_per_host - and world_size % num_devices_per_host != 0 - ): - raise RuntimeError( - f"DeviceMesh only support homogeneous hardware, but found " - f"{world_size} ranks and {num_devices_per_host} {self.device_type} devices!" - ) - device_handle.set_device(get_rank() % num_devices_per_host) + if num_devices_per_host: + if ( + world_size > num_devices_per_host + and world_size % num_devices_per_host != 0 + ): + raise RuntimeError( + f"DeviceMesh only support homogeneous hardware, but found " + f"{world_size} ranks and {num_devices_per_host} {self.device_type} devices!" + ) + device_handle.set_device(get_rank() % num_devices_per_host) return _get_default_group() diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 29609404df09b..c81d9c60eb1fe 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -19,13 +19,21 @@ from typing_extensions import deprecated import torch +import torch.distributed._distributed_c10d as _c10d from torch._C import _DistStoreError as DistStoreError -from torch._C._distributed_c10d import ( +from torch._utils_internal import set_pytorch_distributed_envs_from_justknobs +from torch.distributed._distributed_c10d import ( # Process group implementations; Availability flags _DistributedBackendOptions, + _GLOO_AVAILABLE, + _MPI_AVAILABLE, + _NCCL_AVAILABLE, + _ProcessGroupWrapper, _register_process_group, _resolve_process_group, + _UCC_AVAILABLE, _unregister_all_process_groups, _unregister_process_group, + _XCCL_AVAILABLE, AllgatherOptions, AllreduceCoalescedOptions, AllreduceOptions, @@ -37,6 +45,11 @@ get_debug_level, PrefixStore, ProcessGroup, + ProcessGroupGloo, + ProcessGroupMPI, + ProcessGroupNCCL, + ProcessGroupUCC, + ProcessGroupXCCL, ReduceOp, ReduceOptions, ReduceScatterOptions, @@ -44,7 +57,6 @@ Store, Work, ) -from torch._utils_internal import set_pytorch_distributed_envs_from_justknobs from torch.monitor import _WaitCounter from torch.overrides import handle_torch_function, has_torch_function from torch.utils._typing_utils import not_none @@ -131,17 +143,11 @@ "split_group", ] -_MPI_AVAILABLE = True -_NCCL_AVAILABLE = True -_GLOO_AVAILABLE = True -_UCC_AVAILABLE = True -_XCCL_AVAILABLE = True - _pickler = pickle.Pickler _unpickler = pickle.Unpickler -# Change __module__ of all imported types from torch._C._distributed_c10d that are public +# Change __module__ of all imported types from the distributed wrapper that are public def _export_c_types() -> None: _public_types_to_change_module = [ AllreduceCoalescedOptions, @@ -167,45 +173,26 @@ def _export_c_types() -> None: _export_c_types() -try: - from torch._C._distributed_c10d import ProcessGroupMPI - +# Add process groups to __all__ and set their module based on availability +if _MPI_AVAILABLE: ProcessGroupMPI.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupMPI"] -except ImportError: - _MPI_AVAILABLE = False - -try: - from torch._C._distributed_c10d import ProcessGroupNCCL +if _NCCL_AVAILABLE: ProcessGroupNCCL.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupNCCL"] -except ImportError: - _NCCL_AVAILABLE = False - -try: - from torch._C._distributed_c10d import _ProcessGroupWrapper, ProcessGroupGloo +if _GLOO_AVAILABLE: ProcessGroupGloo.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupGloo"] -except ImportError: - _GLOO_AVAILABLE = False - -try: - from torch._C._distributed_c10d import ProcessGroupUCC +if _UCC_AVAILABLE: ProcessGroupUCC.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupUCC"] -except ImportError: - _UCC_AVAILABLE = False - -try: - from torch._C._distributed_c10d import ProcessGroupXCCL +if _XCCL_AVAILABLE: ProcessGroupXCCL.__module__ = "torch.distributed.distributed_c10d" __all__ += ["ProcessGroupXCCL"] -except ImportError: - _XCCL_AVAILABLE = False logger = logging.getLogger(__name__) @@ -1325,7 +1312,8 @@ def _get_default_store() -> Store: def _update_default_pg(pg) -> None: _world.default_pg = pg rank = pg.rank() if pg is not None and pg != GroupMember.NON_GROUP_MEMBER else -1 - torch._C._distributed_c10d._set_global_rank(rank) + + _c10d._set_global_rank(rank) def get_backend_config(group: Optional[ProcessGroup] = None) -> str: @@ -1962,7 +1950,7 @@ def _new_process_group_helper( if device_id: pg.bound_device_id = device_id - backend_class: torch._C._distributed_c10d.Backend + backend_class: _c10d.Backend for device, backend_str in backend_config.get_device_backend_map().items(): # Use the group name as prefix in the default store, such that # a single store can be reused by multiple groups. @@ -3077,7 +3065,9 @@ def _object_to_tensor(obj, device, group): if get_debug_level() == DebugLevel.DETAIL and is_nccl_available(): backend = get_backend(group) if backend == Backend.NCCL: - hash = torch._C._distributed_c10d._hash_tensors([byte_tensor]) + from torch.distributed._distributed_c10d import _hash_tensors + + hash = _hash_tensors([byte_tensor]) logger.warning( "_object_to_tensor size: %s hash value: %s", byte_tensor.numel(), @@ -3092,7 +3082,9 @@ def _tensor_to_object(tensor, tensor_size, group): if get_debug_level() == DebugLevel.DETAIL and is_nccl_available(): backend = get_backend(group) if backend == Backend.NCCL: - hash = torch._C._distributed_c10d._hash_tensors([tensor]) + from torch.distributed._distributed_c10d import _hash_tensors + + hash = _hash_tensors([tensor]) logger.warning( "_tensor_to_object size: %s hash value: %s", tensor.numel(), hash ) @@ -4969,7 +4961,7 @@ def monitored_barrier( def _create_process_group_wrapper( - wrapped_pg: torch._C._distributed_c10d.Backend, + wrapped_pg: _c10d.Backend, store_prefix: str, store: Store, rank: int, diff --git a/torch/distributed/elastic/control_plane.py b/torch/distributed/elastic/control_plane.py index 817255edd23dc..63334a0ca3f62 100644 --- a/torch/distributed/elastic/control_plane.py +++ b/torch/distributed/elastic/control_plane.py @@ -14,7 +14,7 @@ @contextmanager def _worker_server(socket_path: str) -> Generator[None, None, None]: - from torch._C._distributed_c10d import _WorkerServer + from torch.distributed._distributed_c10d import _WorkerServer server = _WorkerServer(socket_path) try: diff --git a/torch/distributed/nn/functional.py b/torch/distributed/nn/functional.py index eeff877260bcc..2bdf3fe2bdffd 100644 --- a/torch/distributed/nn/functional.py +++ b/torch/distributed/nn/functional.py @@ -2,10 +2,6 @@ import torch import torch.distributed as dist from torch.autograd import Function - -# The two imports below are not always available depending on the -# USE_DISTRIBUTED compile flag. Make sure they raise import error -# if we're trying to use them. from torch.distributed import group, ReduceOp diff --git a/torch/distributed/rpc/__init__.py b/torch/distributed/rpc/__init__.py index adf901d6b6e3e..27a945a92e44c 100644 --- a/torch/distributed/rpc/__init__.py +++ b/torch/distributed/rpc/__init__.py @@ -37,7 +37,6 @@ def is_available() -> bool: import numbers import torch.distributed.autograd as dist_autograd - from torch._C._distributed_c10d import Store from torch._C._distributed_rpc import ( # noqa: F401 _cleanup_python_rpc_handler, _DEFAULT_INIT_METHOD, @@ -70,6 +69,7 @@ def is_available() -> bool: RpcBackendOptions, WorkerInfo, ) + from torch.distributed._distributed_c10d import Store if _is_tensorpipe_available: from torch._C._distributed_rpc import ( # noqa: F401 diff --git a/torch/distributed/tensor/_collective_utils.py b/torch/distributed/tensor/_collective_utils.py index 4fce6fea538a6..f01836c59592b 100644 --- a/torch/distributed/tensor/_collective_utils.py +++ b/torch/distributed/tensor/_collective_utils.py @@ -8,8 +8,10 @@ import torch import torch.distributed._functional_collectives as funcol import torch.distributed.tensor._dtensor_spec as dtensor_spec -from torch._C._distributed_c10d import _resolve_process_group from torch._logging import warning_once + +# Import from centralized fallback module - no conditional imports needed +from torch.distributed._distributed_c10d import _resolve_process_group from torch.distributed.device_mesh import _mesh_resources, DeviceMesh from torch.distributed.distributed_c10d import ( _get_group_size_by_name, diff --git a/torch/testing/_internal/distributed/fake_pg.py b/torch/testing/_internal/distributed/fake_pg.py index 0a2814c246459..035a8bb7c586d 100644 --- a/torch/testing/_internal/distributed/fake_pg.py +++ b/torch/testing/_internal/distributed/fake_pg.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import torch.distributed as dist -from torch._C._distributed_c10d import FakeProcessGroup +from torch.distributed._distributed_c10d import FakeProcessGroup class FakeStore(dist.Store): From 03798b0f91244d21dd6bc65c364263d809449080 Mon Sep 17 00:00:00 2001 From: Mwiza Kunda Date: Fri, 12 Sep 2025 13:58:09 +0000 Subject: [PATCH 175/693] [inductor] Fix removal of constexpr args from the launcher signature (#161924) Fixes the case described below which occurs when: - A user `torch.compile`s a function that uses a triton kernel. - `TORCHINDUCTOR_DUMP_LAUNCH_PARAMS=1` . Problem: If the user defined triton kernel is not autotuned: ```python import os os.environ["TORCHINDUCTOR_DUMP_LAUNCH_PARAMS"] = "1" @triton.jit def kernel(..., BLOCK_SIZE: tl.constexpr): ... @torch.compile def fn(..) kernel[..](..., 128) fn(..) ``` Then In `triton_heuristics. _interpret_args_grid`, `filter_signature` function: ```python def filtered_signature() -> list[str]: # constexprs are not passed in as args return [ x for x in self.triton_meta["signature"].keys() if x not in cfg.kwargs.keys() ] ``` because `triton.autotune` is not used on the the `triton.jit` function, `cfg` above will be empty, and so `BLOCK_SIZE` will not be removed from the signature even though it is constexpr, even though it is removed from the arguments that are passed in to `interpret_args_grid`. This results in a mismatch between the number of parameters in the signature and the number of arguments, which leads to the error `NameError: name '_grid_2' is not defined`. Fix: Use the triton jit kernel `constexprs` for args to remove. Not sure if this is a good fix so suggestions are welcome. Test plan: Added a parameter to an existing triton kernel to test for this edge case Pull Request resolved: https://github.com/pytorch/pytorch/pull/161924 Approved by: https://github.com/davidberard98 --- test/inductor/test_triton_kernels.py | 6 +++++- torch/_inductor/runtime/triton_heuristics.py | 22 +++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/test/inductor/test_triton_kernels.py b/test/inductor/test_triton_kernels.py index 5fe3623b271a5..15a08e7f16271 100644 --- a/test/inductor/test_triton_kernels.py +++ b/test/inductor/test_triton_kernels.py @@ -4,6 +4,7 @@ # Skip do not assign a lambda expression, use a def import functools import logging +import os import torch import torch._dynamo.testing @@ -1280,8 +1281,11 @@ def f(x): self.assertEqual(compiled_out, eager_out) @requires_gpu + @common_utils.parametrize("dump_launch_params", ["0", "1"]) @common_utils.parametrize("dynamic", [False, True]) - def test_triton_kernel_equal_to_1_arg(self, dynamic): + def test_triton_kernel_equal_to_1_arg(self, dynamic, dump_launch_params): + os.environ["TORCHINDUCTOR_DUMP_LAUNCH_PARAMS"] = dump_launch_params + @triton.jit def add_kernel_half_n_elements( in_ptr0, diff --git a/torch/_inductor/runtime/triton_heuristics.py b/torch/_inductor/runtime/triton_heuristics.py index 38a9bd1ad9c05..6d978af8d7721 100644 --- a/torch/_inductor/runtime/triton_heuristics.py +++ b/torch/_inductor/runtime/triton_heuristics.py @@ -1311,11 +1311,23 @@ def _interpret_args_grid( def filtered_signature() -> list[str]: # constexprs are not passed in as args - return [ - x - for x in self.triton_meta["signature"].keys() - if x not in cfg.kwargs.keys() - ] + new_signature: list[str] = [] + from triton.runtime.interpreter import InterpretedFunction + + for i, x in enumerate(self.triton_meta["signature"].keys()): + if isinstance(self.fn, InterpretedFunction): + # These are torch compiled triton kernels that definitely + # have block size configs. Dynamo does not currently + # trace user defined triton kernels when TRITON_INTERPRET=1 + if x not in cfg.kwargs.keys(): + new_signature.append(x) + elif i not in self.fn.constexprs: + # use constexprs rather than just configs since user + # defined triton kernels may not have any configs + new_signature.append(x) + + return new_signature + else: def filtered_signature() -> list[str]: From 7357eb66c5e55c6f023d6889845d0cec26b678f6 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Fri, 12 Sep 2025 15:02:40 +0000 Subject: [PATCH 176/693] [ROCm][CI] unskip some test_memory_format tests (#162766) Fixes #70125. Much of the work was done by #161687. This PR is additional test cleanup. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162766 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily --- test/test_modules.py | 8 ++++++- torch/testing/_internal/common_modules.py | 29 +---------------------- 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/test/test_modules.py b/test/test_modules.py index 86e780dd6eedc..e587c67815c72 100644 --- a/test/test_modules.py +++ b/test/test_modules.py @@ -15,10 +15,16 @@ from torch.testing._internal.common_modules import module_db, modules, ModuleErrorEnum, TrainEvalMode from torch.testing._internal.common_utils import ( TestCase, run_tests, freeze_rng_state, mock_wrapper, get_tensors_from, gradcheck, - gradgradcheck, parametrize, wrapSwapTensorsTest) + gradgradcheck, parametrize, wrapSwapTensorsTest, TEST_WITH_ROCM) from unittest.mock import patch, call +if TEST_WITH_ROCM: + import os + os.environ["PYTORCH_MIOPEN_SUGGEST_NHWC"] = "1" + os.environ["PYTORCH_MIOPEN_SUGGEST_NHWC_BATCHNORM"] = "1" + + class TestModule(TestCase): _do_cuda_memory_leak_check = True _do_cuda_non_default_stream = True diff --git a/torch/testing/_internal/common_modules.py b/torch/testing/_internal/common_modules.py index edb897b6f99a5..3296d3da804e2 100644 --- a/torch/testing/_internal/common_modules.py +++ b/torch/testing/_internal/common_modules.py @@ -16,7 +16,7 @@ floating_types, floating_and_complex_types_and, get_all_fp_dtypes) from torch.testing._internal.common_device_type import ( _TestParametrizer, _update_param_kwargs, expectedFailureMPS, toleranceOverride, tol, - skipCUDAIfRocm, precisionOverride, skipMeta, skipMPS) + precisionOverride, skipMeta, skipMPS) from torch.testing._internal.common_methods_invocations import DecorateInfo from torch.testing._internal.common_nn import ( cosineembeddingloss_reference, cross_entropy_loss_reference, ctcloss_reference, @@ -3497,8 +3497,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, module_memformat_affects_out=True, skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # See #119108: MPSNDArrayConvolutionA14.mm:3976: failed assertion `destination datatype must be fp32' # xfail does not work due to Fatal Python error: Aborted DecorateInfo(skipIfMPSOnMacOS13, "TestModule", "test_memory_format", @@ -3514,8 +3512,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, module_memformat_affects_out=True, skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # This was wrongly being skipped before and needs investigation. # See https://github.com/pytorch/pytorch/issues/80247 DecorateInfo(unittest.expectedFailure, "TestModule", "test_memory_format", @@ -3538,8 +3534,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, module_memformat_affects_out=True, skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # Conv3d is not supported on MPS backend DecorateInfo(skipMPS, device_type="mps"), # This was wrongly being skipped before and needs investigation. @@ -3555,8 +3549,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad module_memformat_affects_out=True, dtypes=floating_and_complex_types_and(torch.chalf), skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # Not implemented for chalf on CPU DecorateInfo(unittest.expectedFailure, 'TestModule', 'test_cpu_gpu_parity', dtypes=(torch.chalf,), device_type='cuda'), @@ -3576,8 +3568,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad module_memformat_affects_out=True, dtypes=floating_and_complex_types_and(torch.chalf), skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # Fails on backward check because ViewAsRealBackward apply contiguous for grad DecorateInfo(unittest.expectedFailure, 'TestModule', 'test_memory_format', dtypes=(torch.complex32, torch.complex64, torch.complex128)), @@ -3608,16 +3598,11 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, module_memformat_affects_out=True, skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # ConvTranspose3d is not supported on MPS backend DecorateInfo(skipMPS), # This was wrongly being skipped before and needs investigation. # See https://github.com/pytorch/pytorch/issues/80247 DecorateInfo(unittest.expectedFailure, "TestModule", "test_memory_format"), - # These fail only on ROCm - DecorateInfo(unittest.expectedFailure, "TestModule", "test_memory_format", device_type='cuda', - dtypes=[torch.complex32, torch.complex64], active_if=TEST_WITH_ROCM), # Not implemented for chalf on CPU DecorateInfo(unittest.expectedFailure, 'TestModule', 'test_cpu_gpu_parity', dtypes=(torch.chalf,), device_type='cuda'), @@ -3677,8 +3662,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, module_memformat_affects_out=True, skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # Lazy modules don't currently play well with ModuleInfo tests on the meta device. # See https://github.com/pytorch/pytorch/issues/70505 for more info. DecorateInfo(skipMeta), @@ -3697,8 +3680,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, module_memformat_affects_out=True, skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # Lazy modules don't currently play well with ModuleInfo tests on the meta device. # See https://github.com/pytorch/pytorch/issues/70505 for more info. DecorateInfo(skipMeta), @@ -3724,8 +3705,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, module_memformat_affects_out=True, skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # Lazy modules don't currently play well with ModuleInfo tests on the meta device. # See https://github.com/pytorch/pytorch/issues/70505 for more info. DecorateInfo(skipMeta), @@ -3743,8 +3722,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, module_memformat_affects_out=True, skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # Lazy modules don't currently play well with ModuleInfo tests on the meta device. # See https://github.com/pytorch/pytorch/issues/70505 for more info. DecorateInfo(skipMeta), @@ -3763,8 +3740,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, module_memformat_affects_out=True, skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # Lazy modules don't currently play well with ModuleInfo tests on the meta device. # See https://github.com/pytorch/pytorch/issues/70505 for more info. DecorateInfo(skipMeta), @@ -3790,8 +3765,6 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad gradcheck_nondet_tol=GRADCHECK_NONDET_TOL, module_memformat_affects_out=True, skips=( - # Failure on ROCM for float32 issue #70125 - DecorateInfo(skipCUDAIfRocm, 'TestModule', 'test_memory_format', dtypes=[torch.float32]), # Lazy modules don't currently play well with ModuleInfo tests on the meta device. # See https://github.com/pytorch/pytorch/issues/70505 for more info. DecorateInfo(skipMeta), From 1e9ddf510f5f3e25db3e2a725159c41cf7cb982e Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Fri, 12 Sep 2025 15:07:13 +0000 Subject: [PATCH 177/693] [ROCm] fix hardsigmoid op (#162758) Currently std::min -> ::min did not work as expected on ROCm when input values >= 2147483648 It can be fixed by explicit typing std::min Pull Request resolved: https://github.com/pytorch/pytorch/pull/162758 Approved by: https://github.com/jeffdaily, https://github.com/pruthvistony Co-authored-by: Jeff Daily --- aten/src/ATen/native/cuda/ActivationHardsigmoidKernel.cu | 2 +- torch/testing/_internal/common_methods_invocations.py | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/aten/src/ATen/native/cuda/ActivationHardsigmoidKernel.cu b/aten/src/ATen/native/cuda/ActivationHardsigmoidKernel.cu index 8a3326fddb8a9..fcacef37ceaf0 100644 --- a/aten/src/ATen/native/cuda/ActivationHardsigmoidKernel.cu +++ b/aten/src/ATen/native/cuda/ActivationHardsigmoidKernel.cu @@ -36,7 +36,7 @@ void hardsigmoid_kernel(TensorIteratorBase& iter) { [zero, one_sixth, three, six] GPU_LAMBDA( scalar_t self_val) -> scalar_t { opmath_t x = static_cast(self_val); - return std::min(std::max(x + three, zero), six) * one_sixth; + return std::min(std::max(x + three, zero), six) * one_sixth; }); }); } diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 4c2c3e023031f..80f539455fce2 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -16593,12 +16593,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): toleranceOverride({torch.float16: tol(atol=1e-04, rtol=0.001)}), 'TestUnaryUfuncs', device_type='cuda',), ], skips=[ # still want to test that first derivative works though second derivative isn't supported - DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', "test_inplace_gradgrad"), - # produces 0 instead of nan on ROCM - DecorateInfo(unittest.expectedFailure, - 'TestUnaryUfuncs', "test_reference_numerics_extremal", - device_type='cuda', - active_if=(TEST_WITH_ROCM)), ] + DecorateInfo(unittest.expectedFailure, 'TestBwdGradients', "test_inplace_gradgrad")] ), UnaryUfuncInfo( 'nn.functional.logsigmoid', From e15686b40d8b961da4e521b26afca5d23daed710 Mon Sep 17 00:00:00 2001 From: Svetlana Karslioglu Date: Fri, 12 Sep 2025 15:36:50 +0000 Subject: [PATCH 178/693] Remove actionable label from docathon label sync script (#155713) Make sure we don't propagate actionable label in docathon sync label script. Pull Request resolved: https://github.com/pytorch/pytorch/pull/155713 Approved by: https://github.com/clee2000 --- .github/scripts/docathon-label-sync.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/scripts/docathon-label-sync.py b/.github/scripts/docathon-label-sync.py index ccd2eb0f4bd0f..04f4707a55c3f 100644 --- a/.github/scripts/docathon-label-sync.py +++ b/.github/scripts/docathon-label-sync.py @@ -39,7 +39,9 @@ def main() -> None: pull_request_label_names = [label.name for label in pull_request_labels] issue_label_names = [label.name for label in issue_labels] labels_to_add = [ - label for label in issue_label_names if label not in pull_request_label_names + label + for label in issue_label_names + if label not in pull_request_label_names and label != "actionable" ] if not labels_to_add: print("The pull request already has the same labels.") From a0dca0fc60fd9d5ba9be9b89e6d05992efa7b74b Mon Sep 17 00:00:00 2001 From: Jeffro <0xjeffro@gmail.com> Date: Fri, 12 Sep 2025 16:26:54 +0000 Subject: [PATCH 179/693] Fix protobuf test comparison by parsing proto instead of raw strings (#162644) The tests were comparing raw exported strings for protobuf comparison, which is not backward/forward compatible with different versions of protobuf. This PR parses the strings into protobuf and compares the protobufs directly, similar to what we did in assertImageProto. Our test failed because we used a different version of protobuf, which output 44100.0 instead of 44100, which resulted in an error. However, they are equal, but only different in the exported strings. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162644 Approved by: https://github.com/justinchuby, https://github.com/Skylion007 --- test/test_tensorboard.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/test_tensorboard.py b/test/test_tensorboard.py index c6982d319d810..cd527db88441b 100644 --- a/test/test_tensorboard.py +++ b/test/test_tensorboard.py @@ -82,13 +82,14 @@ def tearDown(self): if os.path.exists(temp_dir): shutil.rmtree(temp_dir) - def assertProto(self, str_to_compare): + def assertProto(self, actual_proto): if expecttest.ACCEPT: - write_proto(str_to_compare, self) + write_proto(actual_proto, self) return True - expected = read_expected_content(self) - str_to_compare = str(str_to_compare) - self.assertEqual(remove_whitespace(str_to_compare), remove_whitespace(expected)) + expected_str = read_expected_content(self) + expected_proto = Summary() + text_format.Parse(expected_str, expected_proto) + self.assertEqual(actual_proto, expected_proto) def assertImageProto(self, actual_proto): if expecttest.ACCEPT: From d71a6497b7da8bd61e560a5e39bb96971149cc1a Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Fri, 12 Sep 2025 16:34:49 +0000 Subject: [PATCH 180/693] Fix typo in ONNX export error message (#162819) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix another "summit" 😅 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162819 Approved by: https://github.com/cyyever, https://github.com/titaiwangms --- torch/onnx/_internal/exporter/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/onnx/_internal/exporter/_core.py b/torch/onnx/_internal/exporter/_core.py index 33a19d629388d..0cf27560784fa 100644 --- a/torch/onnx/_internal/exporter/_core.py +++ b/torch/onnx/_internal/exporter/_core.py @@ -94,7 +94,7 @@ f"""\ Failed to convert the exported program to an ONNX model. {_BLUE}This is step 3/3{_END} of exporting the model to ONNX. Next steps: - If there is a missing ONNX function, implement it and register it to the registry. - - If there is an internal error during ONNX conversion, debug the error and summit a PR to PyTorch. + - If there is an internal error during ONNX conversion, debug the error and submit a PR to PyTorch. - Create an error report with `torch.onnx.export(..., report=True)`, and save the ExportedProgram as a pt2 file. Create an issue in the PyTorch GitHub repository against the {_BLUE}*onnx*{_END} component. Attach the error report and the pt2 model.""" ) From d89189f28999af6b037c5939edf57836d141ff0e Mon Sep 17 00:00:00 2001 From: Jeffro <0xjeffro@gmail.com> Date: Fri, 12 Sep 2025 16:50:42 +0000 Subject: [PATCH 181/693] Fix inconsistent clock types in `ProcessGroupNCCL::runHookLoop` (#162543) ## Summary This PR fixes an inconsistency in `ProcessGroupNCCL::runHookLoop` when computing `timeStarted`. Both `timeFinished` and `timeStarted` in `WorkInfo` are expected to use `std::chrono::system_clock`, but previously the code was casting a duration from `steady_clock`. Reviewers suggested using `steady_clock` consistently for time measurement since it is appropriate for durations (see #153135 ). This PR updates both `timeStarted` and `timeFinished` in `WorkInfo`, and corresponding code in `runHookLoop`, to use `std::chrono::steady_clock`. ## Error message: ``` libcxx/include/__memory/allocator_traits.h:302:5: error: no matching function for call to '__construct_at' 302 | std::__construct_at(__p, std::forward<_Args>(__args)...); | ^~~~~~~~~~~~~~~~~~~ libcxx/include/__memory/shared_ptr.h:162:33: note: in instantiation of function template specialization 'std::allocator_traits>::construct>> &, std::chrono::time_point &, std::chrono::duration>, 0>' requested here 162 | allocator_traits<_TpAlloc>::construct(__tmp, __get_elem(), std::forward<_Args>(__args)...); | ^ libcxx/include/__memory/shared_ptr.h:736:51: note: in instantiation of function template specialization 'std::__shared_ptr_emplace>::__shared_ptr_emplace>> &, std::chrono::time_point &, std::chrono::duration>, std::allocator, 0>' requested here 736 | ::new ((void*)std::addressof(*__guard.__get())) _ControlBlock(__a, std::forward<_Args>(__args)...); | ^ libcxx/include/__memory/shared_ptr.h:744:15: note: in instantiation of function template specialization 'std::allocate_shared, c10d::OpType, unsigned long, std::chrono::time_point>> &, std::chrono::time_point &, std::chrono::duration>, 0>' requested here 744 | return std::allocate_shared<_Tp>(allocator<__remove_cv_t<_Tp> >(), std::forward<_Args>(__args)...); | ^ torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:2674:32: note: in instantiation of function template specialization 'std::make_shared>> &, std::chrono::time_point &, std::chrono::duration>, 0>' requested here 2674 | onCompletionHook_(std::make_shared( | ^ libcxx/include/__memory/construct_at.h:44:58: note: candidate template ignored: substitution failure [with _Tp = c10d::WorkInfo, _Args = >> &, std::chrono::time_point &, std::chrono::duration>>]: no matching constructor for initialization of 'c10d::WorkInfo' 43 | template ()) _Tp(std::declval<_Args>()...))> | ~~~ 44 | _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* __construct_at(_Tp* __location, _Args&&... __args) { | ^ 1 error generated. ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162543 Approved by: https://github.com/cyyever, https://github.com/Skylion007 --- torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp | 2 +- torch/csrc/distributed/c10d/Work.hpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp b/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp index 88782701c6a53..1811404d6663d 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp +++ b/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp @@ -2624,7 +2624,7 @@ void ProcessGroupNCCL::runHookLoop() { // Hook might grab GIL, unlock first to prevent deadlock lock.unlock(); - auto timeFinished = std::chrono::system_clock::now(); + auto timeFinished = std::chrono::steady_clock::now(); auto timeStarted = timeFinished + std::chrono::duration_cast( diff --git a/torch/csrc/distributed/c10d/Work.hpp b/torch/csrc/distributed/c10d/Work.hpp index 3b743e36d2a05..9e242d6faf9b2 100644 --- a/torch/csrc/distributed/c10d/Work.hpp +++ b/torch/csrc/distributed/c10d/Work.hpp @@ -166,8 +166,8 @@ struct TORCH_API WorkInfo { WorkInfo( const OpType& opType, const uint64_t seq, - const std::chrono::time_point& timeStarted, - const std::chrono::time_point& timeFinished, + const std::chrono::time_point& timeStarted, + const std::chrono::time_point& timeFinished, const std::chrono::duration& activeDuration) : opType(opType), seq(seq), @@ -177,8 +177,8 @@ struct TORCH_API WorkInfo { OpType opType; uint64_t seq; - std::chrono::time_point timeStarted; - std::chrono::time_point timeFinished; + std::chrono::time_point timeStarted; + std::chrono::time_point timeFinished; std::chrono::duration activeDuration; }; From b5f4a7dc141471f6b6c3c012b4a6f468f91834f4 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Fri, 12 Sep 2025 16:57:09 +0000 Subject: [PATCH 182/693] Revert "[DeviceMesh] Make CuTe layout as mesh layout to be ready for using in DeviceMesh (#162414)" This reverts commit 195ac549d7d6538c4212ca73f69488e990b9527d. Reverted https://github.com/pytorch/pytorch/pull/162414 on behalf of https://github.com/malfet due to Looks like it broke test_circular_deps on Windows, see https://hud.pytorch.org/hud/pytorch/pytorch/d89189f28999af6b037c5939edf57836d141ff0e/1?per_page=50&name_filter=win-vs2022-cpu ([comment](https://github.com/pytorch/pytorch/pull/162414#issuecomment-3286070938)) --- torch/distributed/_mesh_layout.py | 75 ------------------------------- 1 file changed, 75 deletions(-) delete mode 100644 torch/distributed/_mesh_layout.py diff --git a/torch/distributed/_mesh_layout.py b/torch/distributed/_mesh_layout.py deleted file mode 100644 index 6ada8e388cce1..0000000000000 --- a/torch/distributed/_mesh_layout.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -Definition of CuTe inspired Layouts for DeviceMesh internal bookkeeping and functions to manipulate them -""" - -import math -from collections.abc import Iterator -from dataclasses import dataclass -from typing import TypeAlias - -from torch.distributed._pycute import ( - coalesce, - complement, - composition, - flatten, - is_tuple, - Layout, -) - - -NestedIntTuple: TypeAlias = tuple["int | NestedIntTuple", ...] - - -@dataclass(frozen=True, init=True) -class _Layout(Layout): - shape: NestedIntTuple - stride: NestedIntTuple - - def __post_init__(self) -> None: - if not is_tuple(self.shape): - raise ValueError(f"shape must be a tuple, got {type(self.shape)}") - if not is_tuple(self.stride): - raise ValueError(f"stride must be a tuple, got {type(self.stride)}") - if len(flatten(self.shape)) != len(flatten(self.stride)): - raise ValueError( - f"sizes {len(flatten(self.shape))} and " - f"strides {len(flatten(self.stride))} must have the same length" - ) - - @property - def sizes(self) -> NestedIntTuple: - return self.shape - - @property - def strides(self) -> NestedIntTuple: - return self.stride - - @property - def sizes_and_strides(self) -> Iterator[tuple[int, int]]: - return zip(flatten(self.shape), flatten(self.stride)) # type: ignore[arg-type] - - def numel(self) -> int: - return math.prod(flatten(self.shape)) - - # operator [] (get-i like tuples) - def __getitem__(self, i: int) -> "_Layout": - size = self.sizes[i] - stride = self.strides[i] - if is_tuple(size) and is_tuple(stride): - return _Layout(size, stride) # type: ignore[arg-type] - elif isinstance(size, int) and isinstance(stride, int): - return _Layout((size,), (stride,)) - else: - raise ValueError("size and stride must be either int or tuple") - - def coalesce(self) -> "_Layout": - layout = coalesce(self) - return _Layout(layout.shape, layout.stride) # type: ignore[arg-type] - - def composition(self, layout: "_Layout") -> "_Layout": - result = composition(self, layout) - return _Layout(result.shape, result.stride) # type: ignore[arg-type] - - def complement(self, max_idx: int) -> "_Layout": - layout = complement(self, max_idx) - return _Layout(layout.shape, layout.stride) # type: ignore[arg-type] From cad052423b244116f5e2e225beefcb2859848dd3 Mon Sep 17 00:00:00 2001 From: David Berard Date: Fri, 12 Sep 2025 09:33:20 -0700 Subject: [PATCH 183/693] [triton] Update 3.5 pin to 5ae38bdb0dc066c5823e34dc9797afb9de42c866 (#162821) Include @aakhundov's sam_fast patch, plus NVIDIA's sm88/sm110 patches (thanks @nWEIdia) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162821 Approved by: https://github.com/atalman --- .ci/docker/ci_commit_pins/triton.txt | 2 +- benchmarks/dynamo/torchbench.yaml | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.ci/docker/ci_commit_pins/triton.txt b/.ci/docker/ci_commit_pins/triton.txt index 1269a7801411f..e543da3aa1613 100644 --- a/.ci/docker/ci_commit_pins/triton.txt +++ b/.ci/docker/ci_commit_pins/triton.txt @@ -1 +1 @@ -70cbcaca84471df49e81ddc56873c9241b671f8d +5ae38bdb0dc066c5823e34dc9797afb9de42c866 diff --git a/benchmarks/dynamo/torchbench.yaml b/benchmarks/dynamo/torchbench.yaml index 6a15cf33222b2..bf0a1b6c31e85 100644 --- a/benchmarks/dynamo/torchbench.yaml +++ b/benchmarks/dynamo/torchbench.yaml @@ -219,9 +219,7 @@ skip: - timm_regnet - timm_nfnet - cuda: - # Temporary until https://github.com/pytorch/pytorch/issues/162282 is fixed - - sam_fast + cuda: [] test: training: From 53b8bdb97774114ca02948fed47f2fd49996c564 Mon Sep 17 00:00:00 2001 From: Isalia20 Date: Fri, 12 Sep 2025 19:07:39 +0000 Subject: [PATCH 184/693] [MPS] enable cat op for sparse (#162007) Enable cat op for sparse on MPS Pull Request resolved: https://github.com/pytorch/pytorch/pull/162007 Approved by: https://github.com/malfet --- aten/src/ATen/native/native_functions.yaml | 2 +- test/test_sparse.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 3cf6f66468544..e2b8e5ac0b8a5 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -1414,7 +1414,7 @@ - func: cat(Tensor[] tensors, int dim=0) -> Tensor structured_delegate: cat.out dispatch: - SparseCPU, SparseCUDA: cat_sparse + SparseCPU, SparseCUDA, SparseMPS: cat_sparse QuantizedCPU: cat_quantized_cpu NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: cat_nested tags: core diff --git a/test/test_sparse.py b/test/test_sparse.py index d01a51a6a0867..2e197d9546721 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -1121,9 +1121,9 @@ def test_add_sub_nnz(self, device, dtype): x.sub_(2 * x) self.assertLessEqual(x._nnz(), 10) - @expectedFailureMPS @coalescedonoff @dtypes(torch.double, torch.cdouble) + @dtypesIfMPS(torch.float32, torch.complex64) def test_cat(self, device, dtype, coalesced): # shapes: list of tuples (sparse_dims, nnz, sizes) def test_shapes(shapes, dim, fail_message=None): From 38afeb2ba2084cdd5e67b2fcc0576e0b8588e115 Mon Sep 17 00:00:00 2001 From: William Wen Date: Fri, 12 Sep 2025 19:29:47 +0000 Subject: [PATCH 185/693] Fix markdown link syntax in graph breaks index (#162400) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162400 Approved by: https://github.com/Skylion007 --- docs/source/compile/programming_model.graph_breaks_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/compile/programming_model.graph_breaks_index.md b/docs/source/compile/programming_model.graph_breaks_index.md index c46e6012d29a1..096e60838672f 100644 --- a/docs/source/compile/programming_model.graph_breaks_index.md +++ b/docs/source/compile/programming_model.graph_breaks_index.md @@ -1,6 +1,6 @@ # Working with Graph Breaks -As you might remember from (Dynamo Core Concepts)[programming_model.dynamo_core_concepts] that Dynamo performs a graph break when +As you might remember from [Dynamo Core Concepts](programming_model.dynamo_core_concepts) that Dynamo performs a graph break when it encounters code that can't be traced. In the default `torch.compile` settings, Dynamo compiles the FX graph that has been determined up to that point, executes the unsupported code in regular Python, and then resumes tracing. From fa4d5e76eaaad08f91bb38a91f8b80afe1f6ccee Mon Sep 17 00:00:00 2001 From: karthickai Date: Thu, 11 Sep 2025 15:05:32 -0700 Subject: [PATCH 186/693] [Inductor] Fix ComboKernels failing due to missing helper functions (#162759) Fixes: #162756 Differential Revision: [D82257359](https://our.internmc.facebook.com/intern/diff/D82257359) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162759 Approved by: https://github.com/eellison, https://github.com/mlazos --- test/inductor/test_combo_kernels.py | 19 +++++++++++++++++++ .../_inductor/codegen/triton_combo_kernel.py | 8 ++++++++ 2 files changed, 27 insertions(+) diff --git a/test/inductor/test_combo_kernels.py b/test/inductor/test_combo_kernels.py index 90399546d26ea..59187c7349a09 100644 --- a/test/inductor/test_combo_kernels.py +++ b/test/inductor/test_combo_kernels.py @@ -6,6 +6,7 @@ import torch import torch._inductor +from torch._inductor.utils import run_and_get_code from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, TestCase, @@ -554,6 +555,24 @@ def fn(x, y, z): self.assertEqual(out_eager, out_compiled) + @requires_cuda_and_triton + def test_helper_fn_defined(self): + def fn(x, y, z): + return x.sum(1), y.mean(1), z.cumsum(1) + + inps = ( + torch.rand(16, 128, device="cuda"), + torch.rand(32, 128, device="cuda"), + torch.rand(32, 256, device="cuda"), + ) + + out_eager = fn(*inps) + fn_c = torch.compile(fn) + out_compiled, code = run_and_get_code(fn_c, *inps) + code = " ".join(code) + self.assertEqual(out_eager, out_compiled) + self.assertEqual(code.count("def _triton_helper_fn_add0(arg0_0, arg1_0):"), 1) + if __name__ == "__main__": from torch._dynamo.test_case import run_tests diff --git a/torch/_inductor/codegen/triton_combo_kernel.py b/torch/_inductor/codegen/triton_combo_kernel.py index e3df5bc0363d2..c28321923c5ea 100644 --- a/torch/_inductor/codegen/triton_combo_kernel.py +++ b/torch/_inductor/codegen/triton_combo_kernel.py @@ -764,6 +764,14 @@ def codegen_kernel(self, name: Optional[str] = None) -> str: if config.benchmark_combo_kernel: code.splice(self.imports_for_benchmark_kernel()) + seen_helpers: OrderedSet[str] = OrderedSet() + for sub_kernel in self.sub_kernels: + for helper in sub_kernel.helper_functions: + if helper not in seen_helpers: + code.writeline("") + code.splice(helper) + seen_helpers.add(helper) + argdefs, _, signature, _ = self.args.python_argdefs() argdefs = self.add_numel_to_args(argdefs, signature) block_args = self.get_block_args() From 65d642d6dbb8e9d8cd8ddceb8f06c3c80a4d8d94 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Fri, 12 Sep 2025 20:05:50 +0000 Subject: [PATCH 187/693] [ROCm] enable aoti tests, forward fix 162353 (#162827) Forward fix for tests added by #162353. Enables aoti tests on rocm. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162827 Approved by: https://github.com/dolpm, https://github.com/huydhn --- caffe2/CMakeLists.txt | 4 +++- test/cpp/nativert/CMakeLists.txt | 2 +- test/cpp/nativert/test_aoti_model_container_registration.cpp | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index b5d47bb4b5dff..51e4023b0d18e 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -551,9 +551,11 @@ if(USE_CUDA OR USE_ROCM) endif() if(USE_CUDA) - # eventually do rocm append_filelist("libtorch_nativert_cuda_sources" Caffe2_GPU_SRCS) endif() +if(USE_ROCM) + append_filelist("libtorch_nativert_cuda_sources" Caffe2_HIP_SRCS) +endif() if(USE_CUDA) list(APPEND Caffe2_GPU_CU_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS}) diff --git a/test/cpp/nativert/CMakeLists.txt b/test/cpp/nativert/CMakeLists.txt index 30c6828328249..77636a2d10932 100644 --- a/test/cpp/nativert/CMakeLists.txt +++ b/test/cpp/nativert/CMakeLists.txt @@ -48,7 +48,7 @@ set(NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/csrc/inductor/aoti_torch/oss_proxy_executor.cpp ) -if(USE_CUDA) +if(USE_CUDA OR USE_ROCM) list(APPEND NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/nativert/executor/triton/CudaTritonKernelManager.cpp) list(APPEND NATIVERT_TEST_SRCS ${TORCH_ROOT}/torch/nativert/executor/AOTInductorModelContainerCudaShim.cpp) endif() diff --git a/test/cpp/nativert/test_aoti_model_container_registration.cpp b/test/cpp/nativert/test_aoti_model_container_registration.cpp index 94b19bf8e7563..9a5d67a9c3c14 100644 --- a/test/cpp/nativert/test_aoti_model_container_registration.cpp +++ b/test/cpp/nativert/test_aoti_model_container_registration.cpp @@ -8,7 +8,7 @@ using namespace torch::nativert; TEST(AOTIModelContainerRegistrationTests, TestRegister) { EXPECT_TRUE(AOTIModelContainerRunnerRegistry()->Has(at::kCPU)); -#ifdef USE_CUDA +#if defined(USE_CUDA) || defined(USE_ROCM) EXPECT_TRUE(AOTIModelContainerRunnerRegistry()->Has(at::kCUDA)); #else EXPECT_FALSE(AOTIModelContainerRunnerRegistry()->Has(at::kCUDA)); From f7ea4975abb0aeb0224894f0b54b1f8fd1fa70e3 Mon Sep 17 00:00:00 2001 From: LifengWang Date: Fri, 12 Sep 2025 20:53:26 +0000 Subject: [PATCH 188/693] update the baseline data for the operator benchmark (#162693) According to the results of the last four operator benchmark runs, we found that five models achieved more than a 30% improvement compared to the baseline. Therefore, we will update the operator benchmark baseline data. We use the average results from the four runs as the new baseline for the five models. And add a pull request trigger for the operator benchmark workflow Benchmarking Framework | Benchmarking Module Name | Case Name | tag | run_backward | baseline old | r1 | r2 | r3 | r4 | avg | speedup -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- PyTorch | add | add_M1_N1_K1_cpu | short | FALSE | 3.9497 | 2.57 | 2.54 | 2.38 | 2.31 | 2.45 | 1.61 PyTorch | functional.hardtanh | functional.hardtanh_dims(512 512)_contigFalse_inplaceFalse_dtypetorch.quint8 | short | FALSE | 67.118 | 50.02 | 49.80 | 46.78 | 48.94 | 48.88 | 1.37 PyTorch | relu6 | relu6_dims(512 512)_contigFalse_inplaceFalse_dtypetorch.quint8 | short | FALSE | 68.739 | 51.17 | 51.19 | 48.07 | 50.42 | 50.21 | 1.37 PyTorch | relu6 | relu6_dims(256 1024)_contigFalse_inplaceFalse_dtypetorch.quint8 | short | FALSE | 69.1875 | 51.97 | 52.77 | 50.00 | 51.24 | 51.50 | 1.34 PyTorch | functional.hardtanh | functional.hardtanh_dims(256 1024)_contigFalse_inplaceFalse_dtypetorch.quint8 | short | FALSE | 67.436 | 50.98 | 51.69 | 49.06 | 49.87 | 50.40 | 1.34 @chuanqi129 @huydhn @desertfire @jainapurva Pull Request resolved: https://github.com/pytorch/pytorch/pull/162693 Approved by: https://github.com/huydhn --- .github/workflows/operator_benchmark.yml | 4 ++++ ...ected_ci_operator_benchmark_eager_float32_cpu.csv | 12 ++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/operator_benchmark.yml b/.github/workflows/operator_benchmark.yml index dd262d31b8fc2..dcdc2cd0ba24e 100644 --- a/.github/workflows/operator_benchmark.yml +++ b/.github/workflows/operator_benchmark.yml @@ -14,6 +14,10 @@ on: schedule: # Run at 07:00 UTC every Sunday - cron: 0 7 * * 0 + pull_request: + paths: + - benchmarks/operator_benchmark/** + - .github/workflows/operator_benchmark.yml concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} diff --git a/benchmarks/operator_benchmark/expected_ci_operator_benchmark_eager_float32_cpu.csv b/benchmarks/operator_benchmark/expected_ci_operator_benchmark_eager_float32_cpu.csv index 873f14d20127c..9a7b6797e982a 100644 --- a/benchmarks/operator_benchmark/expected_ci_operator_benchmark_eager_float32_cpu.csv +++ b/benchmarks/operator_benchmark/expected_ci_operator_benchmark_eager_float32_cpu.csv @@ -1,5 +1,5 @@ Benchmarking Framework,Benchmarking Module Name,Case Name,tag,run_backward,Execution Time -PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,3.9497 +PyTorch,add,add_M1_N1_K1_cpu,short,FALSE,2.459 PyTorch,add,add_M64_N64_K64_cpu,short,FALSE,14.3181 PyTorch,add,add_M64_N64_K128_cpu,short,FALSE,14.6826 PyTorch,add,add_M1_N1_K1_cpu_bwdall_BACKWARD,short,TRUE,58.1449 @@ -376,10 +376,10 @@ PyTorch,relu6,"relu6_dims(3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint32",sho PyTorch,relu6,"relu6_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,9.6588 PyTorch,relu6,"relu6_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,9.5969 PyTorch,relu6,"relu6_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,9.547 -PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,68.739 +PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,50.21375 PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,45.14133333 PyTorch,relu6,"relu6_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,52.6664 -PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,69.1875 +PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,51.49525 PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,48.3458 PyTorch,relu6,"relu6_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,62.0719 PyTorch,functional.hardtanh,"functional.hardtanh_dims(3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,7.5728 @@ -388,10 +388,10 @@ PyTorch,functional.hardtanh,"functional.hardtanh_dims(3,4,5)_contigFalse_inplace PyTorch,functional.hardtanh,"functional.hardtanh_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,8.1647 PyTorch,functional.hardtanh,"functional.hardtanh_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,8.1768 PyTorch,functional.hardtanh,"functional.hardtanh_dims(2,3,4,5)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,8.0619 -PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,67.118 +PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,48.88475 PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,43.702 PyTorch,functional.hardtanh,"functional.hardtanh_dims(512,512)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,50.3613 -PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,67.436 +PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,50.3995 PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint8",short,FALSE,46.9813 PyTorch,functional.hardtanh,"functional.hardtanh_dims(256,1024)_contigFalse_inplaceFalse_dtypetorch.qint32",short,FALSE,59.2295 PyTorch,functional.hardsigmoid,"functional.hardsigmoid_dims(3,4,5)_contigFalse_inplaceFalse_dtypetorch.quint8",short,FALSE,6.5189 @@ -1316,4 +1316,4 @@ PyTorch,where,"where_cond_shape(8,16,1)_input_shape(1,)_other_shape(1,)_cpu_dtyp PyTorch,where,"where_cond_shape(8,16,1)_input_shape(16,1)_other_shape(8,16,1)_cpu_dtypetorch.float32",short,FALSE,5.763 PyTorch,where,"where_cond_shape(8,16,1)_input_shape(8,1,1)_other_shape(1,)_cpu_dtypetorch.float32",short,FALSE,5.744666667 PyTorch,clamp,clamp_M512_N512_cpu,short,FALSE,15.26233333 -PyTorch,gelu,gelu_M512_N512_cpu,short,FALSE,31.33166667 \ No newline at end of file +PyTorch,gelu,gelu_M512_N512_cpu,short,FALSE,31.33166667 From cdb2d1838a6835da97a52f96b4407710c25e6f17 Mon Sep 17 00:00:00 2001 From: Ruben Rodriguez Buchillon Date: Thu, 11 Sep 2025 15:49:27 -0700 Subject: [PATCH 189/693] [inductor] FlexibleLayout for ExternKernelChoice for mms (#161351) # why - if we only use ExternKernelChoice we're not doing any codegen - if we're not doing any codegen, we can use a FlexibleLayout here, and provide deeper passes more chances to change it # what - if all the kernel template choices (KTC) are with a ExternKernelChoice template, we switch to a FlexibleLayout before generating the choice - add a test to make sure that works as intended (FlexibleLayout for only extern, and FixedLayout if Triton is involved) - caveats: - because CPP, CUTLASS, and CK are not using V.choices.get_mm_configs yet, we turn off the optimization if either of those backends are in use. This will be relaxed once they support this too - because Triton templates are still using their own calls (not a single call) to get_mm_configs, it's also turned off there. The next diff unifies Triton + ATEN to a single call to get_mm_configs and that in turn allows the optimization there too # testing ``` python3 -bb -m pytest test/inductor/test_max_autotune.py -v ``` Differential Revision: [D81520584](https://our.internmc.facebook.com/intern/diff/D81520584) Pull Request resolved: https://github.com/pytorch/pytorch/pull/161351 Approved by: https://github.com/eellison, https://github.com/jansel --- test/inductor/test_max_autotune.py | 38 +++++++++++- torch/_inductor/choices.py | 98 +++++++++++++++++++++++------- torch/_inductor/config.py | 2 + torch/_inductor/kernel/mm.py | 21 +------ 4 files changed, 116 insertions(+), 43 deletions(-) diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py index b0046571e9919..c040f7d1fff4b 100644 --- a/test/inductor/test_max_autotune.py +++ b/test/inductor/test_max_autotune.py @@ -27,7 +27,7 @@ TuningProcessPool, ) from torch._inductor.graph import GraphLowering -from torch._inductor.ir import Buffer, ChoiceCaller, FixedLayout +from torch._inductor.ir import Buffer, ChoiceCaller, FixedLayout, FlexibleLayout from torch._inductor.kernel.mm_plus_mm import aten_mm_plus_mm from torch._inductor.select_algorithm import ( add_feedback_saver, @@ -1987,6 +1987,42 @@ def choice_validator(choices): finally: clear_preprocessing_fns() + @config.patch( + {"test_configs.max_mm_configs": 4, "max_autotune_gemm_backends": "ATEN,TRITON"} + ) + @parametrize("max_autotune_enabled", (True, False)) + def test_autotune_layout_optimization(self, max_autotune_enabled): + """Test that layouts are flexible when every choice is ExternKernelChoice""" + + # we use a proxy here of bias_addmm and max-autotune because this enables us to see + # multiple choices in both scenarios (bias_addmm, addmm, triton (max-autotune only)) + # and both bias_addmm and addmm are extern kernel choices + def layout_checker(choices): + if choices: + expected_layout = ( + FixedLayout if max_autotune_enabled else FlexibleLayout + ) + for choice in choices: + self.assertIsInstance( + choice.layout, + expected_layout, + f"Expected {expected_layout.__name__} with max_autotune={max_autotune_enabled}", + ) + return choices + + add_preprocessing_fn(layout_checker) + + try: + bias = torch.randn(64, device=GPU_TYPE) + x = torch.randn(32, 128, device=GPU_TYPE) + w = torch.randn(128, 64, device=GPU_TYPE) + + with config.patch({"max_autotune": max_autotune_enabled}): + compiled_fn = torch.compile(lambda b, x, w: torch.addmm(b, x, w)) + _ = compiled_fn(bias, x, w) + finally: + clear_preprocessing_fns(clear_defaults=False) + class TestMaxAutotunePrecompile(TestCase): def test_precompilation_threads(self): diff --git a/torch/_inductor/choices.py b/torch/_inductor/choices.py index a6275ac85c110..2df92c6541258 100644 --- a/torch/_inductor/choices.py +++ b/torch/_inductor/choices.py @@ -14,6 +14,7 @@ from .metrics import get_metric_table, is_metric_table_enabled from .runtime.hints import DeviceProperties, ReductionHint from .scheduler import BaseSchedulerNode, Scheduler, WhyNoFuse +from .select_algorithm import ExternKernelChoice from .template_heuristics import get_template_heuristic from .template_heuristics.triton import ( BaseConfigHeuristic, @@ -23,6 +24,7 @@ ROCmConfigHeuristic, XPUConfigHeuristic, ) +from .utils import _use_autotune_backend from .virtualized import V @@ -32,14 +34,13 @@ from triton import Config as TritonConfig - from torch.utils._ordered_set import OrderedSet - from .codegen.common import KernelTemplate from .codegen.simd_kernel_features import SIMDKernelFeatures from .codegen.triton import TritonKernel - from .ir import ChoiceCaller, Layout + from .ir import ChoiceCaller from .kernel_template_choice import KernelTemplateChoice - from .select_algorithm import ExternKernelChoice + + from torch.utils._ordered_set import OrderedSet # isort: skip class Sortable(typing.Protocol): @@ -109,7 +110,6 @@ def _finalize_mm_configs( self, template_choices: dict[str, Generator[KernelTemplateChoice, None, None]], kernel_inputs: KernelInputs, - layout: Any, templates: list[Union[KernelTemplate, ExternKernelChoice]], op_name: str, kwarg_overrides: Optional[dict[str, dict[str, Any]]] = None, @@ -126,7 +126,6 @@ def _finalize_mm_configs( Args: template_choices: Dictionary mapping template UIDs to generators of KernelTemplateChoice objects kernel_inputs: MMKernelInputs containing input tensor nodes and matrix indices - layout: Output layout templates: List of template objects (KernelTemplate or ExternKernelChoice) in use op_name: Operation name (e.g., "bmm", "baddbmm", "addmm") kwarg_overrides: Optional dict of kwargs to override for each template heuristic @@ -142,7 +141,6 @@ def _finalize_mm_configs( def get_ktc( self, kernel_inputs: KernelInputs, - layout: Layout, template: Union[KernelTemplate, ExternKernelChoice], op_name: str, kwarg_overrides: Optional[dict[str, Any]] = None, @@ -176,16 +174,72 @@ def get_ktc( cs=cs, overrides=overrides, extra_kwargs=extra_kwargs, - layout=layout, + layout=kernel_inputs.output_layout(), inputs=inputs_val, ) + def _need_to_fix_layout( + self, + adjusted_choices: list[KernelTemplateChoice], + op_name: str, + ) -> bool: + """ + Check if we need to fix the layout instead of keeping it flexible + + Args: + ktc: KernelTemplateChoice object + + Returns: + True if we need to fix the layout, False otherwise + """ + # TODO: debug and fix + # NOTE: on mps, we see issues with flexible layouts on baddmm. This check just makes sure + # that for mps, everything stays as it was before this optimization + if len(adjusted_choices) > 0: + if adjusted_choices[0].inputs.device_type == "mps" and op_name not in [ + "mm", + "addmm", + ]: + return True + + # Since the following backends are not using get_mm_configs yet through the singular call, + if not (config.max_autotune or config.max_autotune_gemm): + # no danger of using other backends than ATEN + if not config.max_autotune_allow_flexible_layouts and op_name not in [ + # The historical implementation for mm and addmm allowed had flexible layouts in the + # not max-autotune world + "mm", + "addmm", + ]: + # TODO: deprecate this by migrating users to the new behavior + return True + return False + + if not config.max_autotune_allow_flexible_layouts: + # we always need to fix the layout + return True + + # Since the following backends are not using get_template_configs yet through the singular call, + # we don't know if they are a valid choice or not. Instead, just skip the optimization + # defensively. + # TODO(coconutruben): remove this once TRITON,CPP,CK,CUTLASS are supported + if _use_autotune_backend("TRITON"): + return True + if _use_autotune_backend("CUTLASS"): + return True + if _use_autotune_backend("CK") or _use_autotune_backend("CKTILE"): + return True + if _use_autotune_backend("CPP"): + return True + return any( + not isinstance(ktc.template, ExternKernelChoice) for ktc in adjusted_choices + ) + def get_mm_configs( self, kernel_inputs: KernelInputs, templates: list[Union[KernelTemplate, ExternKernelChoice]], op_name: str, - layout: Optional[Layout] = None, kwarg_overrides: Optional[dict[str, dict[str, Any]]] = None, ) -> list[ChoiceCaller]: """ @@ -206,17 +260,12 @@ def get_mm_configs( input_tensors = kernel_inputs.nodes() if len(input_tensors) < 2: raise ValueError(f"Need at least 2 input tensors, got {len(input_tensors)}") - if layout is None: - # TODO(coconutruben): remove this once we remove the layout argument entirely - # This is just here to the brief gap between commits where we still need this - # to accommodate fixed vs flexible layout decision externally - layout = kernel_inputs.output_layout(flexible=False) + layout = kernel_inputs.output_layout() # First pass: Create dict of template.uid to generator of KernelTemplateChoice objects template_choices = {} for template in templates: template_choices[template.uid] = self.get_ktc( kernel_inputs, - layout, template, op_name, kwarg_overrides.get(template.uid, {}), @@ -226,18 +275,21 @@ def get_mm_configs( adjusted_choices = self._finalize_mm_configs( template_choices, kernel_inputs, - layout, templates, op_name, kwarg_overrides, ) - choices = [] - # Third pass: Get adjusted choices and collect non-None ChoiceCaller objects - for ktc in adjusted_choices: - if ktc.choice is not None: - choices.append(ktc.choice) - - return choices + # Layout optimization: if all choices are ExternKernelChoice and layout is FixedLayout, convert to FlexibleLayout + if self._need_to_fix_layout(adjusted_choices, op_name): + layout = kernel_inputs.output_layout(flexible=False) + for ktc in adjusted_choices: + ktc.layout = layout + # for good measure, delete the cached ChoiceCaller from the ktc if it existed. + # ExternKernelChoice are cheap to generate + if hasattr(ktc, "_choice"): + del ktc._choice + # Third pass: Convert to ChoiceCaller objects + return [ktc.choice for ktc in adjusted_choices if ktc.choice is not None] def triton_kernel_kwargs( self, diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py index d08e8c0f02489..d20baa2aaf670 100644 --- a/torch/_inductor/config.py +++ b/torch/_inductor/config.py @@ -465,6 +465,8 @@ def prologue_fusion_enabled() -> bool: == "1" ) +# whether template autotuning should allow flexible layouts if possible (e.g. only extern choices) +max_autotune_allow_flexible_layouts: bool = False # force cublas and triton to use the same precision; cublas supports TF32 for matmul operations # when m, n, k are multiples of 16, 16, 8, whereas triton supports TF32 for matmul operations diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 30510042dd1d8..73239596a2903 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -23,7 +23,7 @@ from ..codegen.rocm.ck_tile_universal_gemm_template import CKTileGemmTemplate from ..codegen.rocm.ck_universal_gemm_template import CKGemmTemplate from ..codegen.subgraph import SubgraphChoiceCaller, SubgraphTemplate -from ..ir import Buffer, ChoiceCaller, FlexibleLayout, is_triton, Layout +from ..ir import Buffer, ChoiceCaller, is_triton, Layout from ..kernel_inputs import MMKernelInputs from ..lowering import add_layout_constraint, constrain_to_fx_strides, register_lowering from ..select_algorithm import ( @@ -749,16 +749,9 @@ def tuned_mm(mat1, mat2, *, layout=None): layout, ) - aten_layout = layout - if not (inductor_config.max_autotune or inductor_config.max_autotune_gemm): - aten_layout = FlexibleLayout( - device=layout.device, dtype=layout.dtype, size=layout.size - ) choices: list[ChoiceCaller] = [] if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs(kernel_inputs, [aten_mm], "mm", aten_layout) - ) + choices.extend(V.choices.get_mm_configs(kernel_inputs, [aten_mm], "mm")) static_shape, is_nonzero = _is_static_problem(layout) if is_nonzero and use_triton_template(layout, check_max_autotune=False): @@ -946,18 +939,9 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): mat2.get_dtype(), layout, ) - aten_layout = layout if (not is_nonzero) or ( not (inductor_config.max_autotune or inductor_config.max_autotune_gemm) ): - # Use a FlexibleLayout if we are not autotuning. - # This allows padding strides for the output. - from torch._inductor.ir import FixedLayout, FlexibleLayout - - if isinstance(layout, FixedLayout): - aten_layout = FlexibleLayout( - device=layout.device, dtype=layout.dtype, size=layout.size - ) # TODO(coconutruben): combine this with the main flow of addmm through # a subgraph or something as inp vs inp_expanded causes some slight numeric # differences @@ -969,7 +953,6 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): kernel_inputs, [aten_addmm], name, - aten_layout, ) ) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) From a326ef37e6e5adac8792a6a9b5097210611b6dc9 Mon Sep 17 00:00:00 2001 From: Ruben Rodriguez Buchillon Date: Thu, 11 Sep 2025 15:49:28 -0700 Subject: [PATCH 190/693] [inductor] leverage template stacking in V.choices.get_mm_configs (#161350) # why - now everything is in place to just gather templates and run the V.choices.get_mm_configs once per op - enables any overrides inside V.choices.get_mm_configs to have a full view of the options for an op, not just for one template # what - replace multiple calls to V.choices.get_mm_configs with calls to gather the active templates, and then using those in a single call # testing ``` python3 -bb -m pytest test/inductor/test_max_autotune.py -v ``` Differential Revision: [D81520571](https://our.internmc.facebook.com/intern/diff/D81520571) Pull Request resolved: https://github.com/pytorch/pytorch/pull/161350 Approved by: https://github.com/eellison, https://github.com/jansel ghstack dependencies: #161351 --- torch/_inductor/choices.py | 4 +- torch/_inductor/kernel/bmm.py | 47 ++++--- torch/_inductor/kernel/mm.py | 175 +++++++++++---------------- torch/_inductor/kernel/mm_plus_mm.py | 20 +-- 4 files changed, 110 insertions(+), 136 deletions(-) diff --git a/torch/_inductor/choices.py b/torch/_inductor/choices.py index 2df92c6541258..914d1580c6d13 100644 --- a/torch/_inductor/choices.py +++ b/torch/_inductor/choices.py @@ -222,9 +222,7 @@ def _need_to_fix_layout( # Since the following backends are not using get_template_configs yet through the singular call, # we don't know if they are a valid choice or not. Instead, just skip the optimization # defensively. - # TODO(coconutruben): remove this once TRITON,CPP,CK,CUTLASS are supported - if _use_autotune_backend("TRITON"): - return True + # TODO(coconutruben): remove this once CPP,CK,CUTLASS are supported if _use_autotune_backend("CUTLASS"): return True if _use_autotune_backend("CK") or _use_autotune_backend("CKTILE"): diff --git a/torch/_inductor/kernel/bmm.py b/torch/_inductor/kernel/bmm.py index e9867212767eb..734ab96810743 100644 --- a/torch/_inductor/kernel/bmm.py +++ b/torch/_inductor/kernel/bmm.py @@ -1,6 +1,6 @@ # mypy: allow-untyped-defs import logging -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Union import torch from torch._dynamo.utils import counters @@ -28,6 +28,7 @@ if TYPE_CHECKING: from ..ir import ChoiceCaller + from ..select_algorithm import KernelTemplate log = logging.getLogger(__name__) aten = torch.ops.aten @@ -197,21 +198,29 @@ def may_require_contiguous(t, meta_t): aten_extra_kwargs = {"out_dtype": out_dtype} choices: list[ChoiceCaller] = [] + + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] + kwarg_overrides = {} + if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [aten_handler], - name, - kwarg_overrides={aten_handler.uid: aten_extra_kwargs}, - ) - ) + templates_to_use.append(aten_handler) + kwarg_overrides[aten_handler.uid] = aten_extra_kwargs if use_triton_template(layout, check_max_autotune=False): # TODO: add out_dtype support for Triton Template assert out_dtype is None, "out_dtype is not supported for Triton" - - choices.extend(V.choices.get_mm_configs(kernel_inputs, [bmm_template], name)) + templates_to_use.append(bmm_template) + + # Single unified call for all templates + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + templates_to_use, + name, + kwarg_overrides=kwarg_overrides, + ) + ) _, is_nonzero = _is_static_problem(layout) batch_stride_largest_or_zero = is_batch_stride_largest_or_zero(mat1, mat2, layout) if ( @@ -271,16 +280,16 @@ def tuned_baddbmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): name = "baddbmm" # options to tune from choices: list[ChoiceCaller] = [] + + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - choices.extend(V.choices.get_mm_configs(kernel_inputs, [aten_baddbmm], name)) + templates_to_use.append(aten_baddbmm) if use_triton_template(layout, check_max_autotune=False): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [bmm_template], - name, - ) - ) + templates_to_use.append(bmm_template) + + # Single unified call for all templates + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 73239596a2903..fc20286887069 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import functools import logging -from typing import Any, Optional +from typing import Any, Optional, Union import torch from torch._dynamo.utils import counters @@ -29,6 +29,7 @@ from ..select_algorithm import ( autotune_select_algorithm, ExternKernelChoice, + KernelTemplate, realize_inputs, TritonTemplate, ) @@ -750,32 +751,26 @@ def tuned_mm(mat1, mat2, *, layout=None): ) choices: list[ChoiceCaller] = [] - if use_aten_gemm_kernels(): - choices.extend(V.choices.get_mm_configs(kernel_inputs, [aten_mm], "mm")) static_shape, is_nonzero = _is_static_problem(layout) - if is_nonzero and use_triton_template(layout, check_max_autotune=False): - # Get template choices using the new unified function - choices.extend(V.choices.get_mm_configs(kernel_inputs, [mm_template], "mm")) + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] + if use_aten_gemm_kernels(): + templates_to_use.append(aten_mm) + + if is_nonzero and use_triton_template(layout, check_max_autotune=True): + templates_to_use.append(mm_template) + if use_triton_tma_template(mat1, mat2): - # Get TMA template choices using the new unified function - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, [persistent_tma_mm_template], "mm" - ) - ) + templates_to_use.append(persistent_tma_mm_template) if use_decompose_k_choice(m, n, k): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, [decompose_k_subgraph_template], "mm" - ) - ) - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, [mm_contiguous_subgraph_template], "mm" - ) - ) + templates_to_use.append(decompose_k_subgraph_template) + + templates_to_use.append(mm_contiguous_subgraph_template) + + # Single unified call for all non-autoheuristic templates + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm")) if ( is_nonzero @@ -891,25 +886,25 @@ def tuned_int_mm(mat1, mat2, *, layout=None): # Create MMKernelInputs for Int MM kernel_inputs = MMKernelInputs([mat1, mat2], out_dtype=torch.int32) + + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [aten__int_mm], - name, - ) - ) + templates_to_use.append(aten__int_mm) + + if is_nonzero and use_triton_template( + layout, enable_int32=True, check_max_autotune=False + ): + templates_to_use.append(mm_template) + + # Single unified call for all templates + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) if use_cutlass and _use_cutlass_for_op(name): CUTLASS3xGemmTemplate.add_cutlass_gemm_choices( choices, layout, kernel_inputs.nodes(), fuseable=True, non_fuseable=True ) - if is_nonzero and use_triton_template( - layout, enable_int32=True, check_max_autotune=False - ): - choices.extend(V.choices.get_mm_configs(kernel_inputs, [mm_template], name)) - return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) @@ -957,50 +952,21 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): ) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [aten_bias_addmm], - name, - ) - ) - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [aten_addmm], - name, - ) - ) + templates_to_use.extend([aten_bias_addmm, aten_addmm]) if is_nonzero and use_triton_template(layout, check_max_autotune=False): - # all the triton templates use the extra_kwargs - # Get template choices using the new unified function - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [mm_template], - name, - ) - ) + templates_to_use.append(mm_template) if use_triton_tma_template(mat1, mat2): - # Get TMA template choices using the new unified function - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [persistent_tma_mm_template], - name, - ) - ) + templates_to_use.append(persistent_tma_mm_template) - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [addmm_contiguous_subgraph_template], - "addmm", - ) - ) + templates_to_use.append(addmm_contiguous_subgraph_template) + + # Single unified call for all templates + choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) if ( is_nonzero @@ -1155,52 +1121,49 @@ def tuned_scaled_mm( ) choices: list[ChoiceCaller] = [] + + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] + kwarg_overrides = {} + if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [aten__fp8_mm], - name, - kwarg_overrides={ - aten__fp8_mm.uid: dict( - out_dtype=out_dtype, use_fast_accum=use_fast_accum - ) - }, - ) + templates_to_use.append(aten__fp8_mm) + kwarg_overrides[aten__fp8_mm.uid] = dict( + out_dtype=out_dtype, use_fast_accum=use_fast_accum ) - # We dont have triton lowerings for the MX variants yet - if scale_a.dtype != torch.float32: - return autotune_select_algorithm(name, choices, input_nodes, layout) - _, is_nonzero = _is_static_problem(layout) - if is_nonzero and use_triton_template( - layout, enable_float8=True, check_max_autotune=False + if ( + # We dont have triton lowerings for the MX variants yet + scale_a.dtype == torch.float32 + and is_nonzero + and use_triton_template(layout, enable_float8=True, check_max_autotune=False) ): overriders = dict(USE_FAST_ACCUM=use_fast_accum) + # TODO (paulzhan): There is no template that exists for bias and TMA # Don't run tma template currently if bias exists if use_triton_tma_template(mat_a, mat_b) and not bias: - # Get TMA template choices using the new unified function - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [scaled_mm_device_tma_template], - name, - kwarg_overrides={scaled_mm_device_tma_template.uid: overriders}, - ) - ) + templates_to_use.append(scaled_mm_device_tma_template) + kwarg_overrides[scaled_mm_device_tma_template.uid] = overriders - # Get template choices using the new unified function - choices.extend( - V.choices.get_mm_configs( - kernel_inputs, - [mm_template], - name, - kwarg_overrides={mm_template.uid: overriders}, - ) + templates_to_use.append(mm_template) + kwarg_overrides[mm_template.uid] = overriders + + # Single unified call for all templates + choices.extend( + V.choices.get_mm_configs( + kernel_inputs, + templates_to_use, + name, + kwarg_overrides=kwarg_overrides, ) + ) + + # Early return for MX variants + if scale_a.dtype != torch.float32: + return autotune_select_algorithm(name, choices, input_nodes, layout) if ( is_nonzero diff --git a/torch/_inductor/kernel/mm_plus_mm.py b/torch/_inductor/kernel/mm_plus_mm.py index c27056e5a3227..20140378477a2 100644 --- a/torch/_inductor/kernel/mm_plus_mm.py +++ b/torch/_inductor/kernel/mm_plus_mm.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import logging -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Union import torch @@ -19,6 +19,7 @@ if TYPE_CHECKING: from torch._inductor.ir import ChoiceCaller + from torch._inductor.select_algorithm import KernelTemplate log = logging.getLogger(__name__) @@ -155,16 +156,19 @@ def tuned_mm_plus_mm(mat1, mat2, mat3, mat4, *, layout=None): assert layout1 == layout2 # options to tune from choices: list[ChoiceCaller] = [] + + # Collect all templates for unified call + templates_to_use: list[Union[ExternKernelChoice, KernelTemplate]] = [] if use_aten_gemm_kernels(): - choices.extend( - V.choices.get_mm_configs(kernel_inputs, [aten_mm_plus_mm], "mm_plus_mm") - ) + templates_to_use.append(aten_mm_plus_mm) if use_triton_template(layout1, check_max_autotune=False): - # Get template choices using the new unified function - choices.extend( - V.choices.get_mm_configs(kernel_inputs, [mm_plus_mm_template], "mm_plus_mm") - ) + templates_to_use.append(mm_plus_mm_template) + + # Single unified call for all templates + choices.extend( + V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm_plus_mm") + ) return autotune_select_algorithm( "mm_plus_mm", choices, kernel_inputs.nodes(), layout1 From 269c9907a0471b4f7783b9829e8cd24c3d702c48 Mon Sep 17 00:00:00 2001 From: Ruben Rodriguez Buchillon Date: Thu, 11 Sep 2025 15:49:28 -0700 Subject: [PATCH 191/693] [inductor][choices] rename get_mm_configs to get_template_configs (#162293) # why - eventually we want all templates to go through this - we're exposing this through diode as a sort of interface/API - avoid later renaming # what - rename get_mm_configs to get_template_configs - rename _finalize_mm_configs to _finalize_template_configs # testing - lintrunner - ci Differential Revision: [D81820641](https://our.internmc.facebook.com/intern/diff/D81820641) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162293 Approved by: https://github.com/eellison ghstack dependencies: #161351, #161350 --- torch/_inductor/choices.py | 10 +++++----- torch/_inductor/kernel/bmm.py | 6 ++++-- torch/_inductor/kernel/mm.py | 18 ++++++++++++------ torch/_inductor/kernel/mm_plus_mm.py | 2 +- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/torch/_inductor/choices.py b/torch/_inductor/choices.py index 914d1580c6d13..f35b5e254d013 100644 --- a/torch/_inductor/choices.py +++ b/torch/_inductor/choices.py @@ -106,7 +106,7 @@ def get_flex_decode_configs( flex_heuristics = self.get_config_heuristics(device_type) return flex_heuristics.get_flex_decode_configs(head_dim, dtype) - def _finalize_mm_configs( + def _finalize_template_configs( self, template_choices: dict[str, Generator[KernelTemplateChoice, None, None]], kernel_inputs: KernelInputs, @@ -148,12 +148,12 @@ def get_ktc( """ Utility to get the KernelTemplateChoice generator for a specific input. - This is a per template/op call, whereas get_mm_configs is an op wide call (all templates). + This is a per template/op call, whereas get_template_configs is an op wide call (all templates). Consider when overriding/using at which level you need to make decisions """ # Extract device_type from kernel_inputs device_type = kernel_inputs.device_type - assert device_type is not None, "get_mm_configs requires a valid device type" + assert device_type is not None, "get_ktc requires a valid device type" # Extract template_name from the template object template_name = template.uid @@ -233,7 +233,7 @@ def _need_to_fix_layout( not isinstance(ktc.template, ExternKernelChoice) for ktc in adjusted_choices ) - def get_mm_configs( + def get_template_configs( self, kernel_inputs: KernelInputs, templates: list[Union[KernelTemplate, ExternKernelChoice]], @@ -270,7 +270,7 @@ def get_mm_configs( ) # Second pass: Adjust the template choices - adjusted_choices = self._finalize_mm_configs( + adjusted_choices = self._finalize_template_configs( template_choices, kernel_inputs, templates, diff --git a/torch/_inductor/kernel/bmm.py b/torch/_inductor/kernel/bmm.py index 734ab96810743..20d101b951c09 100644 --- a/torch/_inductor/kernel/bmm.py +++ b/torch/_inductor/kernel/bmm.py @@ -214,7 +214,7 @@ def may_require_contiguous(t, meta_t): # Single unified call for all templates choices.extend( - V.choices.get_mm_configs( + V.choices.get_template_configs( kernel_inputs, templates_to_use, name, @@ -290,6 +290,8 @@ def tuned_baddbmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): templates_to_use.append(bmm_template) # Single unified call for all templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) + choices.extend( + V.choices.get_template_configs(kernel_inputs, templates_to_use, name) + ) return autotune_select_algorithm(name, choices, kernel_inputs.nodes(), layout) diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index fc20286887069..24c5c23218ba6 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -770,7 +770,9 @@ def tuned_mm(mat1, mat2, *, layout=None): templates_to_use.append(mm_contiguous_subgraph_template) # Single unified call for all non-autoheuristic templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm")) + choices.extend( + V.choices.get_template_configs(kernel_inputs, templates_to_use, "mm") + ) if ( is_nonzero @@ -805,7 +807,7 @@ def tuned_mm(mat1, mat2, *, layout=None): always_included.append("extern_mm") num_choices_before_extra_configs = len(choices) choices.extend( - V.choices.get_mm_configs( + V.choices.get_template_configs( # TODO(coconutruben): remove once we deprecate ah # mm-extra is a hack to keep the ah functionality alive # while we transition to the unified kwargs retrieval @@ -898,7 +900,9 @@ def tuned_int_mm(mat1, mat2, *, layout=None): templates_to_use.append(mm_template) # Single unified call for all templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) + choices.extend( + V.choices.get_template_configs(kernel_inputs, templates_to_use, name) + ) if use_cutlass and _use_cutlass_for_op(name): CUTLASS3xGemmTemplate.add_cutlass_gemm_choices( @@ -944,7 +948,7 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): [inp, mat1, mat2], scalars=dict(alpha=alpha, beta=beta) ) choices.extend( - V.choices.get_mm_configs( + V.choices.get_template_configs( kernel_inputs, [aten_addmm], name, @@ -966,7 +970,9 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): templates_to_use.append(addmm_contiguous_subgraph_template) # Single unified call for all templates - choices.extend(V.choices.get_mm_configs(kernel_inputs, templates_to_use, name)) + choices.extend( + V.choices.get_template_configs(kernel_inputs, templates_to_use, name) + ) if ( is_nonzero @@ -1153,7 +1159,7 @@ def tuned_scaled_mm( # Single unified call for all templates choices.extend( - V.choices.get_mm_configs( + V.choices.get_template_configs( kernel_inputs, templates_to_use, name, diff --git a/torch/_inductor/kernel/mm_plus_mm.py b/torch/_inductor/kernel/mm_plus_mm.py index 20140378477a2..df94e3e5cd7bb 100644 --- a/torch/_inductor/kernel/mm_plus_mm.py +++ b/torch/_inductor/kernel/mm_plus_mm.py @@ -167,7 +167,7 @@ def tuned_mm_plus_mm(mat1, mat2, mat3, mat4, *, layout=None): # Single unified call for all templates choices.extend( - V.choices.get_mm_configs(kernel_inputs, templates_to_use, "mm_plus_mm") + V.choices.get_template_configs(kernel_inputs, templates_to_use, "mm_plus_mm") ) return autotune_select_algorithm( From 25f1a5d8d13f404d7e08d64efce9aa6c411ee143 Mon Sep 17 00:00:00 2001 From: Ruben Rodriguez Buchillon Date: Thu, 11 Sep 2025 15:49:29 -0700 Subject: [PATCH 192/693] [inductor][ez] add src_hash property for Templates (#161468) # why enable caching/overriding/filtering based on src hash later # what - KernelTemplate has a src_hash that is None by default - sha256 on TritonTemplate of the template src code - None on ExternKernelChoice to have same API # testing n/a (not in use in this change) Differential Revision: [](https://our.internmc.facebook.com/intern/diff/) Differential Revision: [D81821149](https://our.internmc.facebook.com/intern/diff/D81821149) Pull Request resolved: https://github.com/pytorch/pytorch/pull/161468 Approved by: https://github.com/eellison ghstack dependencies: #161351, #161350, #162293 --- torch/_inductor/codegen/common.py | 14 +++++++++++++- torch/_inductor/kernel_template_choice.py | 2 +- torch/_inductor/select_algorithm.py | 7 ++++++- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/torch/_inductor/codegen/common.py b/torch/_inductor/codegen/common.py index 9802358b02eee..0023987577387 100644 --- a/torch/_inductor/codegen/common.py +++ b/torch/_inductor/codegen/common.py @@ -2407,8 +2407,9 @@ def get_dtype(name: str) -> torch.dtype: return get_dtype - def __init__(self, name: str) -> None: + def __init__(self, name: str, hash: Optional[str] = None) -> None: self.name = name + self._hash = hash @property def uid(self) -> str: @@ -2421,6 +2422,17 @@ def uid(self) -> str: # TODO(coconutruben): add some central registration to assert on global uniqueness return self.name + @property + def src_hash(self) -> Union[str, None]: + """ + source hash for a Template. + + Templates can optionally provide a src hash to make it easier to cache/validate that + a template has not changed from one version to another. Override this if that detection + is different for your specific Template + """ + return self._hash + def choice_or_none(self, **kwargs: Any) -> Optional[ChoiceCaller]: """ Maybe generates a new ChoiceCaller and returns it, or None if generation fails. diff --git a/torch/_inductor/kernel_template_choice.py b/torch/_inductor/kernel_template_choice.py index ac42eaf5b95b0..023428fbf5fd0 100644 --- a/torch/_inductor/kernel_template_choice.py +++ b/torch/_inductor/kernel_template_choice.py @@ -39,7 +39,7 @@ def choice(self) -> Optional[ChoiceCaller]: """ Lazily evaluate and return the ChoiceCaller for this template choice. - On first access, calls template.choice_or_None() with the stored parameters. + On first access, calls template.choice_or_none() with the stored parameters. If successful, caches and returns the ChoiceCaller. If it fails, caches and returns None. Subsequent accesses return the cached value. diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py index b2bdef0f7ef8d..70dc4fc78a292 100644 --- a/torch/_inductor/select_algorithm.py +++ b/torch/_inductor/select_algorithm.py @@ -2,6 +2,7 @@ import contextlib import dataclasses import functools +import hashlib import inspect import itertools import json @@ -1433,7 +1434,7 @@ def __init__( cache_codegen_enabled_for_template=False, prologue_loads_all_inputs=False, ) -> None: - super().__init__(name) + super().__init__(name, hash=hashlib.sha256(source.encode("utf-8")).hexdigest()) self.grid = grid self.template = self._template_from_string(source) assert name not in self.all_templates, "duplicate template name" @@ -1888,6 +1889,10 @@ def __init__( self.op_overload = op_overload self.use_fallback_kernel = use_fallback_kernel self.kernel_creator = kernel_creator + # match the API for KernelTemplate as they can be treated the same + # There is no src hash for ExternKernelChoice in the traditional sense + # so we indicate this by returning None + self.src_hash = None def to_callable(self): return getattr(extern_kernels, self.name) From 0dcd9304aa0ea404c2807cb058660e49c9810c20 Mon Sep 17 00:00:00 2001 From: Haifeng Jin Date: Fri, 12 Sep 2025 21:48:15 +0000 Subject: [PATCH 193/693] fix high=0 bug in nll_loss test (#162763) Minor bug fix for the `nll_loss` test. Before this PR, it runs `torch.randint(high=0)`, which will fail because it would try to generate a number that >= low and < high, i.e. x>=0 and x<0. The test did not fail because that line is not run when testing on CPU because it failed earlier because of a unsupported dtype. However, as we support TPUs at Google, this line is reached first before the dtype check, which triggers the bug. To my understanding, these OpInfo should be general enough to support different hardware. Fixing this obvious bug would make it more general cross different hardware. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162763 Approved by: https://github.com/soulitzer --- torch/testing/_internal/common_methods_invocations.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 80f539455fce2..080c95bc7d2f0 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -8468,7 +8468,8 @@ def gen_shape_kwargs(): yield make_input(s), make_target(s), dict(reduction=reduction) yield make_input(s), make_target(s), dict(weight=make_weight(), reduction=reduction) yield make_input(s), make_target(s), dict(weight=make_weight(low=0), reduction=reduction) - yield make_input(s), make_target(s), dict(weight=make_weight(high=0), reduction=reduction) + if dtype.is_floating_point or dtype.is_complex: + yield make_input(s), make_target(s), dict(weight=make_weight(high=0), reduction=reduction) t = make_target(s) ignore = num_classes // 2 # If "mean", nll returns NaN, so it's not differentiable at those points From ee53ad2dd0a864cd331241cd3c4e131da1ceb868 Mon Sep 17 00:00:00 2001 From: Dmitry Rogozhkin Date: Fri, 12 Sep 2025 21:56:58 +0000 Subject: [PATCH 194/693] xpu: test py_limited_api with SyclExtension (#162546) Commit extends existing CUDA test to cover XPU SyclExtension case for the same feature - `py_limited_api`. Commit required a fix for xpu to install some Aten header files (#145902) which got resolved after the merge of #159621. See: https://github.com/pytorch/pytorch/issues/145902 Requires: https://github.com/pytorch/pytorch/pull/159621 Requires: https://github.com/intel/torch-xpu-ops/pull/1743 CC: @guangyey, @EikanWang Pull Request resolved: https://github.com/pytorch/pytorch/pull/162546 Approved by: https://github.com/guangyey, https://github.com/EikanWang, https://github.com/janeyx99 --- .../python_agnostic/__init__.py | 13 ++++++++--- .../python_agnostic/csrc/ultra_norm.sycl | 19 ++++++++++++++++ .../python_agnostic_extension/setup.py | 14 +++++++++--- .../test/test_python_agnostic.py | 22 ++++++++++++++----- test/run_test.py | 4 +++- 5 files changed, 59 insertions(+), 13 deletions(-) create mode 100644 test/cpp_extensions/python_agnostic_extension/python_agnostic/csrc/ultra_norm.sycl diff --git a/test/cpp_extensions/python_agnostic_extension/python_agnostic/__init__.py b/test/cpp_extensions/python_agnostic_extension/python_agnostic/__init__.py index b416bca020672..1fec358448064 100644 --- a/test/cpp_extensions/python_agnostic_extension/python_agnostic/__init__.py +++ b/test/cpp_extensions/python_agnostic_extension/python_agnostic/__init__.py @@ -15,12 +15,19 @@ # The following is used to assert the ultra_norm op is properly loaded and # calculates correct results upon import of this extension. +if torch.cuda.is_available(): + device = "cuda" +elif torch.xpu.is_available(): + device = "xpu" +else: + raise AssertionError("Expected CUDA or XPU device backend, found none") + inputs = [ - torch.tensor([1.0, 2.0, 3.0], device="cuda"), - torch.tensor([-4.0, -5.0, -6.0], device="cuda"), + torch.tensor([1.0, 2.0, 3.0], device=device), + torch.tensor([-4.0, -5.0, -6.0], device=device), ] assert torch.equal( ops.ultra_norm(inputs), - torch.norm(torch.tensor([1.0, 2.0, 3.0, -4.0, -5.0, -6.0], device="cuda")), + torch.norm(torch.tensor([1.0, 2.0, 3.0, -4.0, -5.0, -6.0], device=device)), ) diff --git a/test/cpp_extensions/python_agnostic_extension/python_agnostic/csrc/ultra_norm.sycl b/test/cpp_extensions/python_agnostic_extension/python_agnostic/csrc/ultra_norm.sycl new file mode 100644 index 0000000000000..e4557cbf59b2e --- /dev/null +++ b/test/cpp_extensions/python_agnostic_extension/python_agnostic/csrc/ultra_norm.sycl @@ -0,0 +1,19 @@ +#include +#include +#include +#include +#include + +at::Tensor ultra_norm(at::TensorList inputs) { + auto res = at::native::foreach_tensor_norm_xpu(inputs); + std::vector unsqueezed; + for (const auto& scalar_tensor : res) { + unsqueezed.push_back(at::unsqueeze(scalar_tensor, 0)); + } + auto stacked = at::xpu::cat(unsqueezed); + return at::xpu::norm(stacked, 2, at::IntArrayRef{}, false); +} + +TORCH_LIBRARY_IMPL(python_agnostic, XPU, m) { + m.impl("python_agnostic::ultra_norm", &ultra_norm); +} diff --git a/test/cpp_extensions/python_agnostic_extension/setup.py b/test/cpp_extensions/python_agnostic_extension/setup.py index c81ec9aec41dc..007e0ac689942 100644 --- a/test/cpp_extensions/python_agnostic_extension/setup.py +++ b/test/cpp_extensions/python_agnostic_extension/setup.py @@ -9,7 +9,8 @@ from setuptools import setup -from torch.utils.cpp_extension import BuildExtension, CUDAExtension +import torch +from torch.utils.cpp_extension import BuildExtension, CUDAExtension, SyclExtension ROOT_DIR = Path(__file__).parent @@ -40,10 +41,17 @@ def get_extension(): "cxx": ["-fdiagnostics-color=always"], } - sources = list(CSRC_DIR.glob("**/*.cu")) + if torch.cuda.is_available(): + sources = list(CSRC_DIR.glob("**/*.cu")) + extension = CUDAExtension + elif torch.xpu.is_available(): + sources = list(CSRC_DIR.glob("**/*.sycl")) + extension = SyclExtension + else: + raise AssertionError("Expected CUDA or XPU device backend, found none") return [ - CUDAExtension( + extension( "python_agnostic._C", sources=sorted(str(s) for s in sources), py_limited_api=True, diff --git a/test/cpp_extensions/python_agnostic_extension/test/test_python_agnostic.py b/test/cpp_extensions/python_agnostic_extension/test/test_python_agnostic.py index a64ddc8e440e9..58a8dafb305a3 100644 --- a/test/cpp_extensions/python_agnostic_extension/test/test_python_agnostic.py +++ b/test/cpp_extensions/python_agnostic_extension/test/test_python_agnostic.py @@ -7,11 +7,15 @@ import unittest from pathlib import Path -from torch.testing._internal.common_device_type import ( - instantiate_device_type_tests, - onlyCUDA, +from torch.testing._internal.common_cuda import TEST_CUDA +from torch.testing._internal.common_device_type import instantiate_device_type_tests +from torch.testing._internal.common_utils import ( + IS_LINUX, + run_tests, + shell, + TEST_XPU, + TestCase, ) -from torch.testing._internal.common_utils import IS_LINUX, run_tests, shell, TestCase class TestPythonAgnostic(TestCase): @@ -29,7 +33,10 @@ def setUpClass(cls): if return_code != 0: raise RuntimeError("python_agnostic bdist_wheel failed to build") - @onlyCUDA + @unittest.skipIf( + not (TEST_CUDA or TEST_XPU), + "test requires CUDA or XPU", + ) @unittest.skipIf(not IS_LINUX, "test requires linux tools ldd and nm") def test_extension_is_python_agnostic(self, device): # For this test, run_test.py will call `python setup.py bdist_wheel` in the @@ -59,7 +66,10 @@ def test_extension_is_python_agnostic(self, device): self.assertFalse("Py" in missing_symbols) -instantiate_device_type_tests(TestPythonAgnostic, globals(), only_for="cuda") +devices = ("cuda", "xpu") +instantiate_device_type_tests( + TestPythonAgnostic, globals(), only_for=devices, allow_xpu=True +) if __name__ == "__main__": run_tests() diff --git a/test/run_test.py b/test/run_test.py index 01619aa5e7f76..7276baecabf83 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -38,6 +38,7 @@ TEST_WITH_ASAN, TEST_WITH_ROCM, TEST_WITH_SLOW_GRADCHECK, + TEST_XPU, ) @@ -841,8 +842,9 @@ def _test_cpp_extensions_aot(test_directory, options, use_ninja): exts_to_build = [ (install_cmd, "no_python_abi_suffix_test"), ] - if TEST_CUDA: + if TEST_CUDA or TEST_XPU: exts_to_build.append((wheel_cmd, "python_agnostic_extension")) + if TEST_CUDA: exts_to_build.append((install_cmd, "libtorch_agnostic_extension")) for cmd, extension_dir in exts_to_build: return_code = shell( From d25c35d2b27dad5b59d69a749da32a42bbec1625 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Fri, 12 Sep 2025 13:33:49 -0700 Subject: [PATCH 195/693] [MPS] Fix `[nan]median` output for empty tensors (#162846) It should be `NaN` rather than 0 Added respective checks to `test_empty_tensor` Fixes https://github.com/pytorch/pytorch/issues/162798 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162846 Approved by: https://github.com/dcci --- aten/src/ATen/native/mps/operations/ReduceOps.mm | 1 + test/test_mps.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/aten/src/ATen/native/mps/operations/ReduceOps.mm b/aten/src/ATen/native/mps/operations/ReduceOps.mm index ae13504d9003e..4ace191b73b88 100644 --- a/aten/src/ATen/native/mps/operations/ReduceOps.mm +++ b/aten/src/ATen/native/mps/operations/ReduceOps.mm @@ -617,6 +617,7 @@ static Tensor median_common_mps(const Tensor& input_t, bool nanmedian) { // we allocate 1 here due to MacOS13 bug for gather MPSGraph op, look below for the error Tensor output_t = at::empty({1}, input_t.scalar_type(), std::nullopt, kMPS, std::nullopt, std::nullopt); if (output_t.numel() == 0 || num_in_elements == 0) { + output_t.fill_(std::numeric_limits::quiet_NaN()); return output_t; } diff --git a/test/test_mps.py b/test/test_mps.py index 756b2cd20567a..b29d24ee32386 100644 --- a/test/test_mps.py +++ b/test/test_mps.py @@ -11628,6 +11628,9 @@ def test_empty_slice(self, device="mps"): def test_empty_reduce(self, device="mps"): x = torch.rand(0, 3, device=device) self.assertTrue(x.mean().isnan()) + self.assertTrue(x.nanmean().isnan()) + self.assertTrue(x.median().isnan()) + self.assertTrue(x.nanmedian().isnan()) self.assertEqual(x.count_nonzero(), 0) self.assertEqual(x.sum(), 0) self.assertEqual(x.nansum(), 0) From cdfa298a3b32df6cfd09348755f4202148830b78 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Fri, 12 Sep 2025 23:52:43 +0000 Subject: [PATCH 196/693] Revert "[MTIA Runtime] Add foreach_div ops to native_functions.yaml (#162732)" This reverts commit a3f01f6418667f791f36d928f7e912eb89be2e67. Reverted https://github.com/pytorch/pytorch/pull/162732 on behalf of https://github.com/huydhn due to Reverted internally ([comment](https://github.com/pytorch/pytorch/pull/162732#issuecomment-3287163750)) --- aten/src/ATen/native/native_functions.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index e2b8e5ac0b8a5..4a5c4ac51558b 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -10699,7 +10699,6 @@ dispatch: CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow CUDA: foreach_tensor_div_list_kernel_cuda - MTIA: foreach_tensor_div_list_kernel_mtia - func: _foreach_div_.List(Tensor(a!)[] self, Tensor[] other) -> () device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices @@ -10707,7 +10706,6 @@ dispatch: CompositeExplicitAutograd: foreach_tensor_div_list_kernel_slow_ CUDA: foreach_tensor_div_list_kernel_cuda_ - MTIA: foreach_tensor_div_list_kernel_mtia_ autogen: _foreach_div.List_out - func: _foreach_div.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[] @@ -10731,7 +10729,6 @@ dispatch: CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow CUDA: foreach_tensor_div_tensor_kernel_cuda - MTIA: foreach_tensor_div_tensor_kernel_mtia - func: _foreach_div_.Tensor(Tensor(a!)[] self, Tensor other) -> () device_check: NoCheck # foreach kernels fall back to slow path when tensor are on different devices @@ -10739,7 +10736,6 @@ dispatch: CompositeExplicitAutograd: foreach_tensor_div_tensor_kernel_slow_ CUDA: foreach_tensor_div_tensor_kernel_cuda_ - MTIA: foreach_tensor_div_tensor_kernel_mtia_ autogen: _foreach_div.Tensor_out - func: _foreach_clamp_max.Scalar(Tensor[] self, Scalar scalar) -> Tensor[] From 9b429846e87ba72464fbe51d1057f77e5c3922d2 Mon Sep 17 00:00:00 2001 From: FFFrog Date: Sat, 13 Sep 2025 02:06:24 +0800 Subject: [PATCH 197/693] [OpenReg] Migrate OpenReg Tests from tests/test_openreg.py into torch_openreg/tests (#161917) **Background:** Almost all the tests in `test/test_openreg.py` are designed for `torch_openreg`, so placing these testcases in the test directory is not a good idea. Instead, they should be moved to the `tests` directory under `torch_openreg`, coordinating these tests with their corresponding functional logic. **How to do:** So how do we verify the quality of the third-party device integration mechanism? We will maintain a `test_openreg` entrypoint in `test/run_test.py`. This entrypoint will install `torch_openreg` and run all the testcases located in `torch_openreg`. As long as all testcases pass, we can guarantee that the out-of-tree backend integration mechanism is available. **Next:** We will also improve `torch_openreg's` test coverage in the future. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161917 Approved by: https://github.com/albanD --- .../torch_openreg/tests/test_autograd.py | 42 ++ .../torch_openreg/tests/test_event.py | 40 ++ .../torch_openreg/tests/test_memory.py | 31 + .../torch_openreg/tests/test_misc.py | 162 +++++ .../torch_openreg/tests/test_ops.py | 291 ++++++++ .../torch_openreg/tests/test_rng.py | 23 + .../torch_openreg/tests/test_storage.py | 174 +++++ .../torch_openreg/tests/test_streams.py | 27 + .../torch_openreg/tests/test_utils.py | 20 + test/run_test.py | 21 +- test/test_openreg.py | 629 ------------------ test/test_transformers_privateuse1.py | 98 --- tools/testing/discover_tests.py | 2 + 13 files changed, 829 insertions(+), 731 deletions(-) create mode 100644 test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_autograd.py create mode 100644 test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_event.py create mode 100644 test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_memory.py create mode 100644 test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_misc.py create mode 100644 test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_ops.py create mode 100644 test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_rng.py create mode 100644 test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_storage.py create mode 100644 test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_streams.py create mode 100644 test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_utils.py delete mode 100644 test/test_openreg.py delete mode 100644 test/test_transformers_privateuse1.py diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_autograd.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_autograd.py new file mode 100644 index 0000000000000..6b58094e3fde4 --- /dev/null +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_autograd.py @@ -0,0 +1,42 @@ +# Owner(s): ["module: PrivateUse1"] + +import os + +import psutil + +import torch +from torch.testing._internal.common_utils import ( + run_tests, + skipIfMPS, + skipIfTorchDynamo, + skipIfWindows, + TestCase, +) + + +class TestAutograd(TestCase): + # Support MPS and Windows platform later and fix torchdynamo issue + @skipIfMPS + @skipIfWindows() + @skipIfTorchDynamo() + def test_autograd_init(self): + # Make sure autograd is initialized + torch.ones(2, requires_grad=True, device="openreg").sum().backward() + + pid = os.getpid() + task_path = f"/proc/{pid}/task" + all_threads = psutil.Process(pid).threads() + + all_thread_names = set() + + for t in all_threads: + with open(f"{task_path}/{t.id}/comm") as file: + thread_name = file.read().strip() + all_thread_names.add(thread_name) + + for i in range(torch.accelerator.device_count()): + self.assertIn(f"pt_autograd_{i}", all_thread_names) + + +if __name__ == "__main__": + run_tests() diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_event.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_event.py new file mode 100644 index 0000000000000..c381b623839c0 --- /dev/null +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_event.py @@ -0,0 +1,40 @@ +# Owner(s): ["module: PrivateUse1"] + +import torch +from torch.testing._internal.common_utils import run_tests, skipIfTorchDynamo, TestCase + + +class TestEvent(TestCase): + @skipIfTorchDynamo() + def test_record_event(self): + stream = torch.Stream(device="openreg:1") + event1 = stream.record_event() + self.assertNotEqual(0, event1.event_id) + event2 = stream.record_event() + self.assertNotEqual(0, event2.event_id) + self.assertNotEqual(event1.event_id, event2.event_id) + + @skipIfTorchDynamo() + def test_event_elapsed_time(self): + stream = torch.Stream(device="openreg:1") + e1 = torch.Event(device="openreg:1", enable_timing=True) + e1.record(stream) + e2 = torch.Event(device="openreg:1", enable_timing=True) + e2.record(stream) + + e2.synchronize() + self.assertTrue(e2.query()) + + ms = e1.elapsed_time(e2) + self.assertTrue(ms > 0) + + @skipIfTorchDynamo() + def test_event_wait_stream(self): + s1 = torch.Stream(device="openreg") + s2 = torch.Stream(device="openreg") + e1 = s1.record_event() + e1.wait(s2) + + +if __name__ == "__main__": + run_tests() diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_memory.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_memory.py new file mode 100644 index 0000000000000..3d67e16a0f503 --- /dev/null +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_memory.py @@ -0,0 +1,31 @@ +# Owner(s): ["module: PrivateUse1"] + +import torch +from torch.testing._internal.common_utils import run_tests, skipIfTorchDynamo, TestCase + + +class TestPinMemory(TestCase): + @skipIfTorchDynamo("unsupported aten.is_pinned.default") + def test_pin_memory(self): + tensor = torch.randn(10) + self.assertFalse(tensor.is_pinned()) + pinned_tensor = tensor.pin_memory() + self.assertTrue(pinned_tensor.is_pinned()) + slice_tensor = pinned_tensor[2:5] + self.assertTrue(slice_tensor.is_pinned()) + + tensor = torch.randn(10) + storage = tensor.storage() + self.assertFalse(storage.is_pinned("openreg")) + pinned_storage = storage.pin_memory("openreg") + self.assertTrue(pinned_storage.is_pinned("openreg")) + + tensor = torch.randn(10) + untyped_storage = tensor.untyped_storage() + self.assertFalse(untyped_storage.is_pinned("openreg")) + pinned_untyped_storage = untyped_storage.pin_memory("openreg") + self.assertTrue(pinned_untyped_storage.is_pinned("openreg")) + + +if __name__ == "__main__": + run_tests() diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_misc.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_misc.py new file mode 100644 index 0000000000000..11d29fe70bba0 --- /dev/null +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_misc.py @@ -0,0 +1,162 @@ +# Owner(s): ["module: PrivateUse1"] + +import types +import unittest + +import torch +from torch.testing._internal.common_utils import run_tests, skipIfTorchDynamo, TestCase + + +class TestBackendModule(TestCase): + def test_backend_module_name(self): + self.assertEqual(torch._C._get_privateuse1_backend_name(), "openreg") + # backend can be renamed to the same name multiple times + torch.utils.rename_privateuse1_backend("openreg") + with self.assertRaisesRegex(RuntimeError, "has already been set"): + torch.utils.rename_privateuse1_backend("dev") + + def test_backend_module_registration(self): + def generate_faked_module(): + return types.ModuleType("fake_module") + + with self.assertRaisesRegex(RuntimeError, "Expected one of cpu"): + torch._register_device_module("dev", generate_faked_module()) + with self.assertRaisesRegex(RuntimeError, "The runtime module of"): + torch._register_device_module("openreg", generate_faked_module()) + + def test_backend_module_function(self): + with self.assertRaisesRegex(RuntimeError, "Try to call torch.openreg"): + torch.utils.backend_registration._get_custom_mod_func("func_name_") + self.assertTrue( + torch.utils.backend_registration._get_custom_mod_func("device_count")() == 2 + ) + + +class TestBackendProperty(TestCase): + def test_backend_generate_methods(self): + with self.assertRaisesRegex(RuntimeError, "The custom device module of"): + torch.utils.generate_methods_for_privateuse1_backend() + + self.assertTrue(hasattr(torch.Tensor, "is_openreg")) + self.assertTrue(hasattr(torch.Tensor, "openreg")) + self.assertTrue(hasattr(torch.TypedStorage, "is_openreg")) + self.assertTrue(hasattr(torch.TypedStorage, "openreg")) + self.assertTrue(hasattr(torch.UntypedStorage, "is_openreg")) + self.assertTrue(hasattr(torch.UntypedStorage, "openreg")) + self.assertTrue(hasattr(torch.nn.Module, "openreg")) + self.assertTrue(hasattr(torch.nn.utils.rnn.PackedSequence, "is_openreg")) + self.assertTrue(hasattr(torch.nn.utils.rnn.PackedSequence, "openreg")) + + def test_backend_tensor_methods(self): + x = torch.empty(4, 4) + self.assertFalse(x.is_openreg) + + y = x.openreg(torch.device("openreg")) + self.assertTrue(y.is_openreg) + z = x.openreg(torch.device("openreg:0")) + self.assertTrue(z.is_openreg) + n = x.openreg(0) + self.assertTrue(n.is_openreg) + + @unittest.skip("Need to support Parameter in openreg") + def test_backend_module_methods(self): + class FakeModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.x = torch.nn.Parameter(torch.randn(3, 3)) + + def forward(self): + pass + + module = FakeModule() + self.assertEqual(module.x.device.type, "cpu") + module.openreg() # type: ignore[misc] + self.assertEqual(module.x.device.type, "openreg") + + @unittest.skip("Need to support untyped_storage in openreg") + def test_backend_storage_methods(self): + x = torch.empty(4, 4) + + x_cpu = x.storage() + self.assertFalse(x_cpu.is_openreg) + x_openreg = x_cpu.openreg() + self.assertTrue(x_openreg.is_openreg) + + y = torch.empty(4, 4) + + y_cpu = y.untyped_storage() + self.assertFalse(y_cpu.is_openreg) + y_openreg = y_cpu.openreg() + self.assertTrue(y_openreg.is_openreg) + + def test_backend_packed_sequence_methods(self): + x = torch.rand(5, 3) + y = torch.tensor([1, 1, 1, 1, 1]) + + z_cpu = torch.nn.utils.rnn.PackedSequence(x, y) + self.assertFalse(z_cpu.is_openreg) + + z_openreg = z_cpu.openreg() + self.assertTrue(z_openreg.is_openreg) + + +class TestTensorType(TestCase): + def test_backend_tensor_type(self): + dtypes_map = { + torch.bool: "torch.openreg.BoolTensor", + torch.double: "torch.openreg.DoubleTensor", + torch.float32: "torch.openreg.FloatTensor", + torch.half: "torch.openreg.HalfTensor", + torch.int32: "torch.openreg.IntTensor", + torch.int64: "torch.openreg.LongTensor", + torch.int8: "torch.openreg.CharTensor", + torch.short: "torch.openreg.ShortTensor", + torch.uint8: "torch.openreg.ByteTensor", + } + + for dtype, str in dtypes_map.items(): + x = torch.empty(4, 4, dtype=dtype, device="openreg") + self.assertTrue(x.type() == str) + + # Note that all dtype-d Tensor objects here are only for legacy reasons + # and should NOT be used. + @skipIfTorchDynamo() + def test_backend_type_methods(self): + # Tensor + tensor_cpu = torch.randn([8]).float() + self.assertEqual(tensor_cpu.type(), "torch.FloatTensor") + + tensor_openreg = tensor_cpu.openreg() + self.assertEqual(tensor_openreg.type(), "torch.openreg.FloatTensor") + + # Storage + storage_cpu = tensor_cpu.storage() + self.assertEqual(storage_cpu.type(), "torch.FloatStorage") + + tensor_openreg = tensor_cpu.openreg() + storage_openreg = tensor_openreg.storage() + self.assertEqual(storage_openreg.type(), "torch.storage.TypedStorage") + + class CustomFloatStorage: + @property + def __module__(self): + return "torch." + torch._C._get_privateuse1_backend_name() + + @property + def __name__(self): + return "FloatStorage" + + try: + torch.openreg.FloatStorage = CustomFloatStorage() + self.assertEqual(storage_openreg.type(), "torch.openreg.FloatStorage") + + # test custom int storage after defining FloatStorage + tensor_openreg = tensor_cpu.int().openreg() + storage_openreg = tensor_openreg.storage() + self.assertEqual(storage_openreg.type(), "torch.storage.TypedStorage") + finally: + torch.openreg.FloatStorage = None + + +if __name__ == "__main__": + run_tests() diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_ops.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_ops.py new file mode 100644 index 0000000000000..903a946ea32eb --- /dev/null +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_ops.py @@ -0,0 +1,291 @@ +# Owner(s): ["module: PrivateUse1"] + +import collections +import functools +import unittest + +import torch +from torch.nn.attention import SDPBackend +from torch.testing._internal.common_nn import NNTestCase +from torch.testing._internal.common_utils import ( + run_tests, + skipIfTorchDynamo, + skipIfXpu, + TEST_XPU, + TestCase, +) + + +SDPAShape = collections.namedtuple( + "Sdpa_Shape", ["batch", "num_heads", "seq_len", "head_dim"] +) + + +class TestFactory(TestCase): + def test_empty(self): + x = torch.empty(3, device="openreg") + self.assertEqual(x.device.type, "openreg") + self.assertEqual(x.shape, torch.Size([3])) + + x = torch.empty([2, 3, 4, 5], device="openreg", names=["N", "C", "H", "W"]) + self.assertEqual(x.device.type, "openreg") + self.assertEqual(x.shape, torch.Size([2, 3, 4, 5])) + + with torch._subclasses.fake_tensor.FakeTensorMode(): + x = torch.empty(3, 3, device="openreg") + y = torch.empty(3, 3, device="openreg:0") + z = x + y + self.assertEqual(z.device.type, "openreg") + self.assertEqual(z.shape, torch.Size([3, 3])) + + def test_zeros(self): + y = torch.zeros(3, device="openreg") + self.assertEqual(y.device.type, "openreg") + self.assertEqual(y.shape, torch.Size([3])) + + def test_tensor(self): + z = torch.tensor((), device="openreg") + self.assertEqual(z.device.type, "openreg") + self.assertEqual(z.shape, torch.Size([0])) + + +class TestCopy(TestCase): + def test_copy_same_device(self): + a = torch.ones(10, device="openreg").clone() + self.assertEqual(a, torch.ones(10, device="openreg")) + + def test_cross_device_copy(self): + a = torch.rand(10) + b = a.to(device="openreg").add(2).to(device="cpu") + self.assertEqual(b, a + 2) + + def test_cross_diff_devices_copy(self): + a = torch.ones(10, device="openreg:0").to(device="openreg:1").to(device="cpu") + self.assertEqual(a, torch.ones(10)) + + +class TestOps(TestCase): + def test_masked_select(self): + tensor_cpu = torch.randn(10) + tensor_openreg = tensor_cpu.to(device="openreg") + mask = tensor_openreg.gt(0) + out = torch.masked_select(tensor_openreg, mask) + + self.assertEqual(out, tensor_cpu.masked_select(tensor_cpu.gt(0))) + + def test_expand(self): + x = torch.tensor([[1], [2], [3]], device="openreg") + y = x.expand(3, 2) + self.assertEqual(y.to(device="cpu"), torch.tensor([[1, 1], [2, 2], [3, 3]])) + self.assertEqual(x.data_ptr(), y.data_ptr()) + + def test_resize(self): + tensor_cpu = torch.randn([4, 4]) + + tensor_openreg = tensor_cpu.openreg() + self.assertTrue(tensor_openreg.size() == torch.Size([4, 4])) + + storage_openreg = tensor_openreg.storage() + self.assertTrue(storage_openreg.size() == 16) + + tensor_openreg.resize_(2, 2, 2, 2) + self.assertTrue(tensor_openreg.size() == torch.Size([2, 2, 2, 2])) + + storage_openreg = tensor_openreg.storage() + self.assertTrue(storage_openreg.size() == 16) + + def test_printing(self): + a = torch.ones(20, device="openreg") + print(a) + + +class TestSTUB(TestCase): + def test_backend_dispatchstub(self): + x_cpu = torch.randn(2, 2, 3, dtype=torch.float32, device="cpu") + x_openreg = x_cpu.to("openreg") + + y_cpu = torch.abs(x_cpu) + y_openreg = torch.abs(x_openreg) + self.assertEqual(y_cpu, y_openreg.cpu()) + + o_cpu = torch.randn(2, 2, 6, dtype=torch.float32, device="cpu") + o_openreg = o_cpu.to("openreg") + # output operand with resize flag is False in TensorIterator. + torch.abs(x_cpu, out=o_cpu[:, :, 0:6:2]) + torch.abs(x_openreg, out=o_openreg[:, :, 0:6:2]) + self.assertEqual(o_cpu, o_openreg.cpu()) + + # output operand with resize flag is True in TensorIterator and + # convert output to contiguous tensor in TensorIterator. + torch.abs(x_cpu, out=o_cpu[:, :, 0:6:3]) + torch.abs(x_openreg, out=o_openreg[:, :, 0:6:3]) + self.assertEqual(o_cpu, o_openreg.cpu()) + + +class TestQuantization(TestCase): + @skipIfXpu(msg="missing kernel for openreg") + def test_quantize(self): + x = torch.randn(3, 4, 5, dtype=torch.float32, device="openreg") + quantized_tensor = torch.quantize_per_tensor(x, 0.1, 10, torch.qint8) + self.assertEqual(quantized_tensor.device, torch.device("openreg:0")) + self.assertEqual(quantized_tensor.dtype, torch.qint8) + + +class TestAutogradFunction(TestCase): + def test_compile_autograd_function_returns_self(self): + in_ref = torch.randn(4, device="openreg", requires_grad=True) + out_ref = torch.ops.openreg.custom_autograd_fn_returns_self(in_ref) + out_ref.sum().backward() + + in_test = in_ref.detach().clone().requires_grad_(True) + # TODO(FFFrog): Need to support inductor for OpenReg first. + out_test = torch.compile(backend="aot_eager")( + torch.ops.openreg.custom_autograd_fn_returns_self + )(in_test) + out_test.sum().backward() + + self.assertEqual(out_ref, out_test) + self.assertEqual(in_ref.grad, in_test.grad) + + @skipIfTorchDynamo("Temporary disabled due to torch._ops.OpOverloadPacket") + def test_compile_autograd_function_aliasing(self): + in_ref = torch.randn(4, device="openreg", requires_grad=True) + out_ref = torch.ops.openreg.custom_autograd_fn_aliasing(in_ref) + out_ref.sum().backward() + + in_test = in_ref.detach().clone().requires_grad_(True) + # TODO(FFFrog): Need to support inductor for OpenReg first. + out_test = torch.compile(backend="aot_eager")( + torch.ops.openreg.custom_autograd_fn_aliasing + )(in_test) + out_test.sum().backward() + + self.assertEqual(out_ref, out_test) + self.assertEqual(in_ref.grad, in_test.grad) + + +class TestFallback(TestCase): + def test_scalar_type_fallback(self): + x_cpu = torch.Tensor([[0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2]]).to(torch.int64) + x = torch.triu_indices(3, 3, device="openreg") + self.assertEqual(x_cpu, x) + + def test_tensor_type_fallback(self): + x = torch.Tensor([[1, 2, 3], [2, 3, 4]]).to("openreg") + y = torch.Tensor([1, 0, 2]).to("openreg") + self.assertTrue(x.device.type, "openreg") + self.assertFalse(x.is_cpu) + + z_cpu = torch.Tensor([[0, 2, 1], [1, 3, 2]]) + # call sub op, which will fallback to cpu + z = torch.sub(x, y) + self.assertEqual(z_cpu, z) + + # call index op, which will fallback to cpu + z_cpu = torch.Tensor([3, 1]) + y = torch.Tensor([1, 0]).long().to("openreg") + z = x[y, y] + self.assertEqual(z_cpu, z) + + def test_tensorlist_type_fallback(self): + # create tensors located in custom device + v_openreg = torch.Tensor([1, 2, 3]).to("openreg") + # create result tensor located in cpu + z_cpu = torch.Tensor([2, 4, 6]) + # create tensorlist for foreach_add op + x = (v_openreg, v_openreg) + y = (v_openreg, v_openreg) + + # Check that our device is correct. + self.assertTrue(v_openreg.device.type == "openreg") + self.assertFalse(v_openreg.is_cpu) + + # call _foreach_add op, which will fallback to cpu + z = torch._foreach_add(x, y) + self.assertEqual(z_cpu, z[0]) + self.assertEqual(z_cpu, z[1]) + + +@unittest.skipIf(TEST_XPU, "XPU does not support cppextension currently") +class TestSDPA(NNTestCase): + @skipIfTorchDynamo() + def test_fused_sdp_choice_privateuseone(self): + batch_size, seq_len, num_heads, head_dim = 4, 256, 2, 128 + make_tensor = functools.partial(torch.rand, device="cpu", dtype=torch.float16) + shape = SDPAShape(batch_size, num_heads, seq_len, head_dim) + q_cpu, k_cpu, v_cpu = make_tensor(shape), make_tensor(shape), make_tensor(shape) + q_privateuse1 = q_cpu.to("openreg") + k_privateuse1 = k_cpu.to("openreg") + v_privateuse1 = v_cpu.to("openreg") + assert ( + torch._fused_sdp_choice(q_privateuse1, k_privateuse1, v_privateuse1) + == SDPBackend.OVERRIDEABLE.value + ) + + def test_scaled_dot_product_fused_attention_overrideable(self): + batch_size, seq_len, num_heads, head_dim = 4, 256, 2, 128 + make_tensor = functools.partial(torch.rand, device="cpu", dtype=torch.float16) + shape = SDPAShape(batch_size, num_heads, seq_len, head_dim) + q_cpu, k_cpu, v_cpu = make_tensor(shape), make_tensor(shape), make_tensor(shape) + q_privateuse1 = q_cpu.to("openreg") + k_privateuse1 = k_cpu.to("openreg") + v_privateuse1 = v_cpu.to("openreg") + torch.nn.functional.scaled_dot_product_attention( + q_privateuse1, k_privateuse1, v_privateuse1, attn_mask=None, dropout_p=0.0 + ) + + def test_scaled_dot_product_fused_attention_overrideable_backward(self): + batch_size, seq_len, num_heads, head_dim = 4, 256, 2, 128 + make_tensor = functools.partial( + torch.rand, device="cpu", dtype=torch.float16, requires_grad=True + ) + shape = (batch_size, num_heads, seq_len, head_dim) + q_cpu, k_cpu, v_cpu = make_tensor(shape), make_tensor(shape), make_tensor(shape) + attn_mask = make_tensor((batch_size, num_heads, seq_len, seq_len)) + q_privateuse1 = q_cpu.to("openreg") + k_privateuse1 = k_cpu.to("openreg") + v_privateuse1 = v_cpu.to("openreg") + attn_mask_privateuse1 = attn_mask.to("openreg") + ( + output, + logsumexp, + cum_seq_q, + cum_seq_k, + max_q, + max_k, + philox_seed, + philox_offset, + debug_attn_mask, + ) = torch.ops.aten._scaled_dot_product_fused_attention_overrideable( + q_privateuse1, k_privateuse1, v_privateuse1, attn_bias=attn_mask_privateuse1 + ) + + rand_upward = torch.rand( + shape, device="cpu", dtype=torch.float16, requires_grad=False + ) + rand_upward_privateuse1 = rand_upward.to("openreg") + grad_input_mask = [True, True, True, True] + grad_q, grad_k, grad_v, grad_attn_mask = ( + torch.ops.aten._scaled_dot_product_fused_attention_overrideable_backward( + rand_upward_privateuse1, + q_privateuse1, + k_privateuse1, + v_privateuse1, + attn_mask_privateuse1, + grad_input_mask, + output, + logsumexp, + cum_seq_q, + cum_seq_k, + max_q, + max_k, + dropout_p=0.0, + is_causal=False, + philox_seed=philox_seed, + philox_offset=philox_offset, + ) + ) + + +if __name__ == "__main__": + run_tests() diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_rng.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_rng.py new file mode 100644 index 0000000000000..8a6258408d880 --- /dev/null +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_rng.py @@ -0,0 +1,23 @@ +# Owner(s): ["module: PrivateUse1"] + +import torch +from torch.testing._internal.common_utils import run_tests, TestCase + + +class TestRNG(TestCase): + def test_generator(self): + generator = torch.Generator(device="openreg:1") + self.assertEqual(generator.device.type, "openreg") + self.assertEqual(generator.device.index, 1) + + def test_rng_state(self): + state = torch.openreg.get_rng_state(0) + torch.openreg.set_rng_state(state, 0) + + def test_manual_seed(self): + torch.openreg.manual_seed_all(2024) + self.assertEqual(torch.openreg.initial_seed(), 2024) + + +if __name__ == "__main__": + run_tests() diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_storage.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_storage.py new file mode 100644 index 0000000000000..0167f2039dadc --- /dev/null +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_storage.py @@ -0,0 +1,174 @@ +# Owner(s): ["module: PrivateUse1"] + +import _codecs +import io +import os +import tempfile +import unittest + +import numpy + +import torch +from torch.serialization import safe_globals +from torch.testing._internal.common_utils import ( + run_tests, + skipIfTorchDynamo, + TemporaryFileName, + TestCase, +) + + +class TestStorage(TestCase): + @skipIfTorchDynamo("unsupported aten.is_pinned.default") + def test_rewrapped_storage(self): + pinned_a = torch.randn(10).pin_memory() + rewrapped_a = torch.tensor((), dtype=torch.float32).set_( + pinned_a.untyped_storage()[2:], + size=(5,), + stride=(1,), + storage_offset=0, + ) + self.assertTrue(rewrapped_a.is_pinned()) + self.assertNotEqual(pinned_a.data_ptr(), rewrapped_a.data_ptr()) + + +class TestSerialization(TestCase): + def test_serialization(self): + storage = torch.UntypedStorage(4, device=torch.device("openreg")) + self.assertEqual(torch.serialization.location_tag(storage), "openreg:0") + + storage = torch.UntypedStorage(4, device=torch.device("openreg:0")) + self.assertEqual(torch.serialization.location_tag(storage), "openreg:0") + + storage_cpu = torch.empty(4, 4).storage() + storage_openreg = torch.serialization.default_restore_location( + storage_cpu, "openreg:0" + ) + self.assertTrue(storage_openreg.is_openreg) + + tensor = torch.empty(3, 3, device="openreg") + self.assertEqual(torch._utils.get_tensor_metadata(tensor), {}) + metadata = {"version_number": True, "format_number": True} + torch._utils.set_tensor_metadata(tensor, metadata) + self.assertEqual(torch._utils.get_tensor_metadata(tensor), metadata) + + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, "data.pt") + torch.save(tensor, path) + + tensor_openreg = torch.load(path) + self.assertTrue(tensor_openreg.is_openreg) + self.assertEqual(torch._utils.get_tensor_metadata(tensor_openreg), metadata) + + tensor_cpu = torch.load(path, map_location="cpu") + self.assertFalse(tensor_cpu.is_openreg) + self.assertEqual(torch._utils.get_tensor_metadata(tensor_cpu), {}) + + @skipIfTorchDynamo() + @unittest.skipIf( + numpy.__version__ < "1.25", + "versions < 1.25 serialize dtypes differently from how it's serialized in data_legacy_numpy", + ) + def test_open_device_numpy_serialization(self): + """ + This tests the legacy _rebuild_device_tensor_from_numpy serialization path + """ + data_legacy_numpy = ( + b"PK\x03\x04\x00\x00\x08\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + b"\x00\x00\x00\x10\x00\x12\x00archive/data.pklFB\x0e\x00ZZZZZZZZZZZZZZ\x80\x02}q\x00X\x01" + b"\x00\x00\x00xq\x01ctorch._utils\n_rebuild_device_tensor_from_numpy\nq\x02(cnumpy.core.m" + b"ultiarray\n_reconstruct\nq\x03cnumpy\nndarray\nq\x04K\x00\x85q\x05c_codecs\nencode\nq\x06" + b"X\x01\x00\x00\x00bq\x07X\x06\x00\x00\x00latin1q\x08\x86q\tRq\n\x87q\x0bRq\x0c(K\x01K\x02K" + b"\x03\x86q\rcnumpy\ndtype\nq\x0eX\x02\x00\x00\x00f4q\x0f\x89\x88\x87q\x10Rq\x11(K\x03X\x01" + b"\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00" + b"PK\x05\x06\x00\x00\x00\x00\x04\x00\x04\x00\x06\x01\x00\x008\x03\x00\x00\x00\x00" + ) + buf_data_legacy_numpy = io.BytesIO(data_legacy_numpy) + + with safe_globals( + [ + ( + ( + numpy.core.multiarray._reconstruct, + "numpy.core.multiarray._reconstruct", + ) + if numpy.__version__ >= "2.1" + else numpy.core.multiarray._reconstruct + ), + numpy.ndarray, + numpy.dtype, + _codecs.encode, + numpy.dtypes.Float32DType, + ] + ): + sd_loaded = torch.load(buf_data_legacy_numpy, weights_only=True) + buf_data_legacy_numpy.seek(0) + # Test map_location + sd_loaded_cpu = torch.load( + buf_data_legacy_numpy, weights_only=True, map_location="cpu" + ) + + expected = torch.tensor( + [[1, 2, 3], [4, 5, 6]], dtype=torch.float32, device="openreg" + ) + self.assertEqual(sd_loaded["x"].cpu(), expected.cpu()) + self.assertFalse(sd_loaded["x"].is_cpu) + self.assertTrue(sd_loaded_cpu["x"].is_cpu) + + def test_open_device_cpu_serialization(self): + default_protocol = torch.serialization.DEFAULT_PROTOCOL + + with unittest.mock.patch.object(torch._C, "_has_storage", return_value=False): + x = torch.randn(2, 3) + x_openreg = x.to("openreg") + sd = {"x": x_openreg} + rebuild_func = x_openreg._reduce_ex_internal(default_protocol)[0] + self.assertTrue( + rebuild_func is torch._utils._rebuild_device_tensor_from_cpu_tensor + ) + + # Test map_location + with TemporaryFileName() as f: + torch.save(sd, f) + sd_loaded = torch.load(f, weights_only=True) + # Test map_location + sd_loaded_cpu = torch.load(f, weights_only=True, map_location="cpu") + self.assertFalse(sd_loaded["x"].is_cpu) + self.assertEqual(sd_loaded["x"].cpu(), x) + self.assertTrue(sd_loaded_cpu["x"].is_cpu) + + # Test metadata_only + with TemporaryFileName() as f: + with self.assertRaisesRegex( + RuntimeError, + "Cannot serialize tensors on backends with no storage under skip_data context manager", + ): + with torch.serialization.skip_data(): + torch.save(sd, f) + + +if __name__ == "__main__": + run_tests() diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_streams.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_streams.py new file mode 100644 index 0000000000000..415f438bf0834 --- /dev/null +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_streams.py @@ -0,0 +1,27 @@ +# Owner(s): ["module: PrivateUse1"] + +import torch +from torch.testing._internal.common_utils import run_tests, skipIfTorchDynamo, TestCase + + +class TestStream(TestCase): + def test_stream_synchronize(self): + stream = torch.Stream(device="openreg:1") + stream.synchronize() + self.assertEqual(True, stream.query()) + + def test_stream_wait_stream(self): + stream_1 = torch.Stream(device="openreg:0") + stream_2 = torch.Stream(device="openreg:1") + stream_2.wait_stream(stream_1) + + @skipIfTorchDynamo() + def test_stream_wait_event(self): + s1 = torch.Stream(device="openreg") + s2 = torch.Stream(device="openreg") + e = s1.record_event() + s2.wait_event(e) + + +if __name__ == "__main__": + run_tests() diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_utils.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_utils.py new file mode 100644 index 0000000000000..e6a6093e43340 --- /dev/null +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_utils.py @@ -0,0 +1,20 @@ +# Owner(s): ["module: PrivateUse1"] + +import torch +from torch.testing._internal.common_utils import run_tests, TestCase + + +class TestDLPack(TestCase): + def test_open_device_dlpack(self): + x_in = torch.randn(2, 3).to("openreg") + capsule = torch.utils.dlpack.to_dlpack(x_in) + x_out = torch.from_dlpack(capsule) + self.assertTrue(x_out.device == x_in.device) + + x_in = x_in.to("cpu") + x_out = x_out.to("cpu") + self.assertEqual(x_in, x_out) + + +if __name__ == "__main__": + run_tests() diff --git a/test/run_test.py b/test/run_test.py index 7276baecabf83..fd809bd13ecac 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -920,7 +920,12 @@ def _test_autoload(test_directory, options, enable=True): os.environ.pop("TORCH_DEVICE_BACKEND_AUTOLOAD") -def run_test_with_openreg(test_module, test_directory, options): +# test_openreg is designed to run all tests under torch_openreg, which +# is an torch backend similar to CUDA or MPS and implemented by using +# third-party accelerator integration mechanism. Therefore, if all the +# tests under torch_openreg are passing, it can means that the mechanism +# mentioned above is working as expected. +def test_openreg(test_module, test_directory, options): openreg_dir = os.path.join( test_directory, "cpp_extensions", "open_registration_extension", "torch_openreg" ) @@ -929,7 +934,16 @@ def run_test_with_openreg(test_module, test_directory, options): return return_code with extend_python_path([install_dir]): - return run_test(test_module, test_directory, options) + cmd = [ + sys.executable, + "-m", + "unittest", + "discover", + "-s", + os.path.join(openreg_dir, "tests"), + "-v", + ] + return shell(cmd, cwd=test_directory, env=os.environ) def test_distributed(test_module, test_directory, options): @@ -1258,8 +1272,7 @@ def run_ci_sanity_check(test: ShardedTest, test_directory, options): "test_ci_sanity_check_fail": run_ci_sanity_check, "test_autoload_enable": test_autoload_enable, "test_autoload_disable": test_autoload_disable, - "test_openreg": run_test_with_openreg, - "test_transformers_privateuse1": run_test_with_openreg, + "test_openreg": test_openreg, } diff --git a/test/test_openreg.py b/test/test_openreg.py deleted file mode 100644 index c0d99f5a6ac1a..0000000000000 --- a/test/test_openreg.py +++ /dev/null @@ -1,629 +0,0 @@ -# Owner(s): ["module: PrivateUse1"] - -import _codecs -import io -import os -import tempfile -import types -import unittest -from unittest.mock import patch - -import numpy as np -import psutil - -import torch -from torch.serialization import safe_globals -from torch.testing._internal.common_utils import ( - run_tests, - skipIfMPS, - skipIfTorchDynamo, - skipIfWindows, - skipIfXpu, - TemporaryFileName, - TestCase, -) - - -class TestPrivateUse1(TestCase): - """Tests of third-parth device integration mechinasm based PrivateUse1""" - - def test_backend_name(self): - self.assertEqual(torch._C._get_privateuse1_backend_name(), "openreg") - # backend can be renamed to the same name multiple times - torch.utils.rename_privateuse1_backend("openreg") - with self.assertRaisesRegex(RuntimeError, "has already been set"): # type: ignore[misc] - torch.utils.rename_privateuse1_backend("dev") - - def test_backend_module_registration(self): - def generate_faked_module(): - return types.ModuleType("fake_module") - - with self.assertRaisesRegex(RuntimeError, "Expected one of cpu"): # type: ignore[misc] - torch._register_device_module("dev", generate_faked_module()) - with self.assertRaisesRegex(RuntimeError, "The runtime module of"): # type: ignore[misc] - torch._register_device_module("openreg", generate_faked_module()) - - def test_backend_generate_methods(self): - with self.assertRaisesRegex(RuntimeError, "The custom device module of"): # type: ignore[misc] - torch.utils.generate_methods_for_privateuse1_backend() # type: ignore[misc] - - self.assertTrue(hasattr(torch.Tensor, "is_openreg")) - self.assertTrue(hasattr(torch.Tensor, "openreg")) - self.assertTrue(hasattr(torch.TypedStorage, "is_openreg")) - self.assertTrue(hasattr(torch.TypedStorage, "openreg")) - self.assertTrue(hasattr(torch.UntypedStorage, "is_openreg")) - self.assertTrue(hasattr(torch.UntypedStorage, "openreg")) - self.assertTrue(hasattr(torch.nn.Module, "openreg")) - self.assertTrue(hasattr(torch.nn.utils.rnn.PackedSequence, "is_openreg")) - self.assertTrue(hasattr(torch.nn.utils.rnn.PackedSequence, "openreg")) - - def test_backend_module_function(self): - with self.assertRaisesRegex(RuntimeError, "Try to call torch.openreg"): # type: ignore[misc] - torch.utils.backend_registration._get_custom_mod_func("func_name_") # type: ignore[misc] - self.assertTrue( - torch.utils.backend_registration._get_custom_mod_func("device_count")() == 2 # type: ignore[misc] - ) - - @skipIfTorchDynamo() - def test_backend_operator_registration(self): - self.assertTrue( - torch._C._dispatch_has_kernel_for_dispatch_key( - "aten::empty.memory_format", torch.DispatchKey.PrivateUse1 - ) - ) - x = torch.empty(3, 3, device="openreg") - self.assertTrue(x.device.type, "openreg") - self.assertTrue(x.shape, torch.Size([3, 3])) - - def test_backend_dispatchstub(self): - x_cpu = torch.randn(2, 2, 3, dtype=torch.float32, device="cpu") - x_openreg = x_cpu.to("openreg") - - y_cpu = torch.abs(x_cpu) - y_openreg = torch.abs(x_openreg) - self.assertEqual(y_cpu, y_openreg.cpu()) - - o_cpu = torch.randn(2, 2, 6, dtype=torch.float32, device="cpu") - o_openreg = o_cpu.to("openreg") - # output operand with resize flag is False in TensorIterator. - torch.abs(x_cpu, out=o_cpu[:, :, 0:6:2]) - torch.abs(x_openreg, out=o_openreg[:, :, 0:6:2]) - self.assertEqual(o_cpu, o_openreg.cpu()) - - # output operand with resize flag is True in TensorIterator and - # convert output to contiguous tensor in TensorIterator. - torch.abs(x_cpu, out=o_cpu[:, :, 0:6:3]) - torch.abs(x_openreg, out=o_openreg[:, :, 0:6:3]) - self.assertEqual(o_cpu, o_openreg.cpu()) - - def test_backend_tensor_type(self): - dtypes_map = { - torch.bool: "torch.openreg.BoolTensor", - torch.double: "torch.openreg.DoubleTensor", - torch.float32: "torch.openreg.FloatTensor", - torch.half: "torch.openreg.HalfTensor", - torch.int32: "torch.openreg.IntTensor", - torch.int64: "torch.openreg.LongTensor", - torch.int8: "torch.openreg.CharTensor", - torch.short: "torch.openreg.ShortTensor", - torch.uint8: "torch.openreg.ByteTensor", - } - - for dtype, str in dtypes_map.items(): - x = torch.empty(4, 4, dtype=dtype, device="openreg") - self.assertTrue(x.type() == str) - - # Note that all dtype-d Tensor objects here are only for legacy reasons - # and should NOT be used. - def test_backend_type_methods(self): - # Tensor - tensor_cpu = torch.randn([8]).float() - self.assertEqual(tensor_cpu.type(), "torch.FloatTensor") - - tensor_openreg = tensor_cpu.openreg() - self.assertEqual(tensor_openreg.type(), "torch.openreg.FloatTensor") - - # Storage - storage_cpu = tensor_cpu.storage() - self.assertEqual(storage_cpu.type(), "torch.FloatStorage") - - tensor_openreg = tensor_cpu.openreg() - storage_openreg = tensor_openreg.storage() - self.assertEqual(storage_openreg.type(), "torch.storage.TypedStorage") - - class CustomFloatStorage: - @property - def __module__(self): - return "torch." + torch._C._get_privateuse1_backend_name() - - @property - def __name__(self): - return "FloatStorage" - - try: - torch.openreg.FloatStorage = CustomFloatStorage() - self.assertEqual(storage_openreg.type(), "torch.openreg.FloatStorage") - - # test custom int storage after defining FloatStorage - tensor_openreg = tensor_cpu.int().openreg() - storage_openreg = tensor_openreg.storage() - self.assertEqual(storage_openreg.type(), "torch.storage.TypedStorage") - finally: - torch.openreg.FloatStorage = None - - def test_backend_tensor_methods(self): - x = torch.empty(4, 4) - self.assertFalse(x.is_openreg) # type: ignore[misc] - - y = x.openreg(torch.device("openreg")) # type: ignore[misc] - self.assertTrue(y.is_openreg) # type: ignore[misc] - z = x.openreg(torch.device("openreg:0")) # type: ignore[misc] - self.assertTrue(z.is_openreg) # type: ignore[misc] - n = x.openreg(0) # type: ignore[misc] - self.assertTrue(n.is_openreg) # type: ignore[misc] - - @unittest.skip("Need to support Parameter in openreg") - def test_backend_module_methods(self): - class FakeModule(torch.nn.Module): - def __init__(self): - super().__init__() - self.x = torch.nn.Parameter(torch.randn(3, 3)) - - def forward(self): - pass - - module = FakeModule() - self.assertEqual(module.x.device.type, "cpu") - module.openreg() # type: ignore[misc] - self.assertEqual(module.x.device.type, "openreg") - - @unittest.skip("Need to support untyped_storage in openreg") - def test_backend_storage_methods(self): - x = torch.empty(4, 4) - - x_cpu = x.storage() - self.assertFalse(x_cpu.is_openreg) # type: ignore[misc] - x_openreg = x_cpu.openreg() # type: ignore[misc] - self.assertTrue(x_openreg.is_openreg) # type: ignore[misc] - - y = torch.empty(4, 4) - - y_cpu = y.untyped_storage() - self.assertFalse(y_cpu.is_openreg) # type: ignore[misc] - y_openreg = y_cpu.openreg() # type: ignore[misc] - self.assertTrue(y_openreg.is_openreg) # type: ignore[misc] - - def test_backend_packed_sequence_methods(self): - x = torch.rand(5, 3) - y = torch.tensor([1, 1, 1, 1, 1]) - - z_cpu = torch.nn.utils.rnn.PackedSequence(x, y) - self.assertFalse(z_cpu.is_openreg) # type: ignore[misc] - - z_openreg = z_cpu.openreg() # type: ignore[misc] - self.assertTrue(z_openreg.is_openreg) # type: ignore[misc] - - -class TestOpenReg(TestCase): - """Tests of mimic accelerator named OpenReg based on PrivateUse1""" - - # Stream & Event - def test_stream_synchronize(self): - stream = torch.Stream(device="openreg:1") - stream.synchronize() - self.assertEqual(True, stream.query()) - - def test_stream_wait_stream(self): - stream_1 = torch.Stream(device="openreg:0") - stream_2 = torch.Stream(device="openreg:1") - # Does not crash! - stream_2.wait_stream(stream_1) - - @skipIfTorchDynamo() - def test_record_event(self): - stream = torch.Stream(device="openreg:1") - event1 = stream.record_event() - self.assertNotEqual(0, event1.event_id) - event2 = stream.record_event() - self.assertNotEqual(0, event2.event_id) - self.assertNotEqual(event1.event_id, event2.event_id) - - @skipIfTorchDynamo() - def test_event_elapsed_time(self): - stream = torch.Stream(device="openreg:1") - e1 = torch.Event(device="openreg:1", enable_timing=True) - e1.record(stream) - e2 = torch.Event(device="openreg:1", enable_timing=True) - e2.record(stream) - - e2.synchronize() - self.assertTrue(e2.query()) - - ms = e1.elapsed_time(e2) - self.assertTrue(ms > 0) - - @skipIfTorchDynamo() - def test_stream_wait_event(self): - s1 = torch.Stream(device="openreg") - s2 = torch.Stream(device="openreg") - e = s1.record_event() - s2.wait_event(e) - - @skipIfTorchDynamo() - def test_event_wait_stream(self): - s1 = torch.Stream(device="openreg") - s2 = torch.Stream(device="openreg") - e1 = s1.record_event() - e1.wait(s2) - - # Copy - def test_cross_device_copy(self): - a = torch.rand(10) - b = a.to(device="openreg").add(2).to(device="cpu") - self.assertEqual(b, a + 2) - - def test_copy_same_device(self): - a = torch.ones(10, device="openreg").clone() - self.assertEqual(a, torch.ones(10, device="openreg")) - - def test_cross_diff_devices_copy(self): - a = torch.ones(10, device="openreg:0").to(device="openreg:1").to(device="cpu") - self.assertEqual(a, torch.ones(10)) - - # RNG - def test_generator(self): - generator = torch.Generator(device="openreg:1") - self.assertEqual(generator.device.type, "openreg") - self.assertEqual(generator.device.index, 1) - - def test_rng_state(self): - state = torch.openreg.get_rng_state(0) # type: ignore[misc] - torch.openreg.set_rng_state(state, 0) # type: ignore[misc] - - def test_manual_seed(self): - torch.openreg.manual_seed_all(2024) # type: ignore[misc] - self.assertEqual(torch.openreg.initial_seed(), 2024) # type: ignore[misc] - - # Autograd - @skipIfMPS - @skipIfWindows() - def test_autograd_init(self): - # Make sure autograd is initialized - torch.ones(2, requires_grad=True, device="openreg").sum().backward() - - pid = os.getpid() - task_path = f"/proc/{pid}/task" - all_threads = psutil.Process(pid).threads() - - all_thread_names = set() - - for t in all_threads: - with open(f"{task_path}/{t.id}/comm") as file: - thread_name = file.read().strip() - all_thread_names.add(thread_name) - - for i in range(torch.accelerator.device_count()): - self.assertIn(f"pt_autograd_{i}", all_thread_names) - - # Storage & Pin Memory - @skipIfTorchDynamo("unsupported aten.is_pinned.default") - def test_pin_memory(self): - tensor = torch.randn(10) - self.assertFalse(tensor.is_pinned()) - pinned_tensor = tensor.pin_memory() - self.assertTrue(pinned_tensor.is_pinned()) - slice_tensor = pinned_tensor[2:5] - self.assertTrue(slice_tensor.is_pinned()) - - tensor = torch.randn(10) - storage = tensor.storage() - self.assertFalse(storage.is_pinned("openreg")) - pinned_storage = storage.pin_memory("openreg") - self.assertTrue(pinned_storage.is_pinned("openreg")) - - tensor = torch.randn(10) - untyped_storage = tensor.untyped_storage() - self.assertFalse(untyped_storage.is_pinned("openreg")) - pinned_untyped_storage = untyped_storage.pin_memory("openreg") - self.assertTrue(pinned_untyped_storage.is_pinned("openreg")) - - @skipIfTorchDynamo("unsupported aten.is_pinned.default") - def test_rewrapped_storage(self): - pinned_a = torch.randn(10).pin_memory() - rewrapped_a = torch.tensor((), dtype=torch.float32).set_( - pinned_a.untyped_storage()[2:], - size=(5,), - stride=(1,), - storage_offset=0, - ) - self.assertTrue(rewrapped_a.is_pinned()) - self.assertNotEqual(pinned_a.data_ptr(), rewrapped_a.data_ptr()) - - # Serialization - def test_serialization(self): - storage = torch.UntypedStorage(4, device=torch.device("openreg")) - self.assertEqual(torch.serialization.location_tag(storage), "openreg:0") - - storage = torch.UntypedStorage(4, device=torch.device("openreg:0")) - self.assertEqual(torch.serialization.location_tag(storage), "openreg:0") - - storage_cpu = torch.empty(4, 4).storage() - storage_openreg = torch.serialization.default_restore_location( - storage_cpu, "openreg:0" - ) - self.assertTrue(storage_openreg.is_openreg) # type: ignore[misc] - - tensor = torch.empty(3, 3, device="openreg") - self.assertEqual(torch._utils.get_tensor_metadata(tensor), {}) # type: ignore[misc] - metadata = {"version_number": True, "format_number": True} - torch._utils.set_tensor_metadata(tensor, metadata) # type: ignore[misc] - self.assertEqual(torch._utils.get_tensor_metadata(tensor), metadata) # type: ignore[misc] - - with tempfile.TemporaryDirectory() as tmpdir: - path = os.path.join(tmpdir, "data.pt") - torch.save(tensor, path) - - tensor_openreg = torch.load(path) - self.assertTrue(tensor_openreg.is_openreg) - self.assertEqual(torch._utils.get_tensor_metadata(tensor_openreg), metadata) # type: ignore[misc] - - tensor_cpu = torch.load(path, map_location="cpu") - self.assertFalse(tensor_cpu.is_openreg) - self.assertEqual(torch._utils.get_tensor_metadata(tensor_cpu), {}) # type: ignore[misc] - - @skipIfTorchDynamo() - @unittest.skipIf( - np.__version__ < "1.25", - "versions < 1.25 serialize dtypes differently from how it's serialized in data_legacy_numpy", - ) - def test_open_device_numpy_serialization(self): - """ - This tests the legacy _rebuild_device_tensor_from_numpy serialization path - """ - data_legacy_numpy = ( - b"PK\x03\x04\x00\x00\x08\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" - b"\x00\x00\x00\x10\x00\x12\x00archive/data.pklFB\x0e\x00ZZZZZZZZZZZZZZ\x80\x02}q\x00X\x01" - b"\x00\x00\x00xq\x01ctorch._utils\n_rebuild_device_tensor_from_numpy\nq\x02(cnumpy.core.m" - b"ultiarray\n_reconstruct\nq\x03cnumpy\nndarray\nq\x04K\x00\x85q\x05c_codecs\nencode\nq\x06" - b"X\x01\x00\x00\x00bq\x07X\x06\x00\x00\x00latin1q\x08\x86q\tRq\n\x87q\x0bRq\x0c(K\x01K\x02K" - b"\x03\x86q\rcnumpy\ndtype\nq\x0eX\x02\x00\x00\x00f4q\x0f\x89\x88\x87q\x10Rq\x11(K\x03X\x01" - b"\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00" - b"PK\x05\x06\x00\x00\x00\x00\x04\x00\x04\x00\x06\x01\x00\x008\x03\x00\x00\x00\x00" - ) - buf_data_legacy_numpy = io.BytesIO(data_legacy_numpy) - - with safe_globals( - [ - ( - ( - np.core.multiarray._reconstruct, - "numpy.core.multiarray._reconstruct", - ) - if np.__version__ >= "2.1" - else np.core.multiarray._reconstruct - ), - np.ndarray, - np.dtype, - _codecs.encode, - np.dtypes.Float32DType, - ] - ): - sd_loaded = torch.load(buf_data_legacy_numpy, weights_only=True) - buf_data_legacy_numpy.seek(0) - # Test map_location - sd_loaded_cpu = torch.load( - buf_data_legacy_numpy, weights_only=True, map_location="cpu" - ) - - expected = torch.tensor( - [[1, 2, 3], [4, 5, 6]], dtype=torch.float32, device="openreg" - ) - self.assertEqual(sd_loaded["x"].cpu(), expected.cpu()) - self.assertFalse(sd_loaded["x"].is_cpu) - self.assertTrue(sd_loaded_cpu["x"].is_cpu) - - def test_open_device_cpu_serialization(self): - default_protocol = torch.serialization.DEFAULT_PROTOCOL - - with patch.object(torch._C, "_has_storage", return_value=False): - x = torch.randn(2, 3) - x_openreg = x.to("openreg") - sd = {"x": x_openreg} - rebuild_func = x_openreg._reduce_ex_internal(default_protocol)[0] - self.assertTrue( - rebuild_func is torch._utils._rebuild_device_tensor_from_cpu_tensor - ) - - # Test map_location - with TemporaryFileName() as f: - torch.save(sd, f) - sd_loaded = torch.load(f, weights_only=True) - # Test map_location - sd_loaded_cpu = torch.load(f, weights_only=True, map_location="cpu") - self.assertFalse(sd_loaded["x"].is_cpu) - self.assertEqual(sd_loaded["x"].cpu(), x) - self.assertTrue(sd_loaded_cpu["x"].is_cpu) - - # Test metadata_only - with TemporaryFileName() as f: - with self.assertRaisesRegex( - RuntimeError, - "Cannot serialize tensors on backends with no storage under skip_data context manager", - ): - with torch.serialization.skip_data(): - torch.save(sd, f) - - # Operators - def test_factory(self): - x = torch.empty(3, device="openreg") - self.assertEqual(x.device.type, "openreg") - self.assertEqual(x.shape, torch.Size([3])) - - y = torch.zeros(3, device="openreg") - self.assertEqual(y.device.type, "openreg") - self.assertEqual(y.shape, torch.Size([3])) - - z = torch.tensor((), device="openreg") - self.assertEqual(z.device.type, "openreg") - self.assertEqual(z.shape, torch.Size([0])) - - def test_fake_tensor(self): - with torch._subclasses.fake_tensor.FakeTensorMode(): - a = torch.empty(1, device="openreg") - b = torch.empty(1, device="openreg:0") - result = a + b # noqa: F841 - - def test_named_tensor(self): - return torch.empty([2, 3, 4, 5], device="openreg", names=["N", "C", "H", "W"]) - - def test_printing(self): - a = torch.ones(20, device="openreg") - # Does not crash! - str(a) - - def test_data_dependent_output(self): - cpu_a = torch.randn(10) - a = cpu_a.to(device="openreg") - mask = a.gt(0) - out = torch.masked_select(a, mask) - - self.assertEqual(out, cpu_a.masked_select(cpu_a.gt(0))) - - def test_expand(self): - x = torch.tensor([[1], [2], [3]], device="openreg") - y = x.expand(3, 2) - self.assertEqual(y.to(device="cpu"), torch.tensor([[1, 1], [2, 2], [3, 3]])) - self.assertEqual(x.data_ptr(), y.data_ptr()) - - def test_resize(self): - tensor_cpu = torch.randn([4, 4]) - - tensor_openreg = tensor_cpu.openreg() - self.assertTrue(tensor_openreg.size() == torch.Size([4, 4])) - - storage_openreg = tensor_openreg.storage() - self.assertTrue(storage_openreg.size() == 16) - - tensor_openreg.resize_(2, 2, 2, 2) - self.assertTrue(tensor_openreg.size() == torch.Size([2, 2, 2, 2])) - - storage_openreg = tensor_openreg.storage() - self.assertTrue(storage_openreg.size() == 16) - - # Quantize - @skipIfXpu(msg="missing kernel for openreg") - def test_quantize(self): - x = torch.randn(3, 4, 5, dtype=torch.float32, device="openreg") - quantized_tensor = torch.quantize_per_tensor(x, 0.1, 10, torch.qint8) - self.assertEqual(quantized_tensor.device, torch.device("openreg:0")) - self.assertEqual(quantized_tensor.dtype, torch.qint8) - - # custom autograd - def test_compile_autograd_function_returns_self(self): - in_ref = torch.randn(4, device="openreg", requires_grad=True) - out_ref = torch.ops.openreg.custom_autograd_fn_returns_self(in_ref) - out_ref.sum().backward() - - in_test = in_ref.detach().clone().requires_grad_(True) - # TODO(FFFrog): Need to support inductor for OpenReg first. - out_test = torch.compile(backend="aot_eager")( - torch.ops.openreg.custom_autograd_fn_returns_self - )(in_test) - out_test.sum().backward() - - self.assertEqual(out_ref, out_test) - self.assertEqual(in_ref.grad, in_test.grad) - - @skipIfTorchDynamo("Temporary disabled due to torch._ops.OpOverloadPacket") - def test_compile_autograd_function_aliasing(self): - in_ref = torch.randn(4, device="openreg", requires_grad=True) - out_ref = torch.ops.openreg.custom_autograd_fn_aliasing(in_ref) - out_ref.sum().backward() - - in_test = in_ref.detach().clone().requires_grad_(True) - # TODO(FFFrog): Need to support inductor for OpenReg first. - out_test = torch.compile(backend="aot_eager")( - torch.ops.openreg.custom_autograd_fn_aliasing - )(in_test) - out_test.sum().backward() - - self.assertEqual(out_ref, out_test) - self.assertEqual(in_ref.grad, in_test.grad) - - def test_open_device_dlpack(self): - x_in = torch.randn(2, 3).to("openreg") - capsule = torch.utils.dlpack.to_dlpack(x_in) - x_out = torch.from_dlpack(capsule) - self.assertTrue(x_out.device == x_in.device) - - x_in = x_in.to("cpu") - x_out = x_out.to("cpu") - self.assertEqual(x_in, x_out) - - # fallback - def test_scalar_type_fallback(self): - x_cpu = torch.Tensor([[0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2]]).to(torch.int64) - x = torch.triu_indices(3, 3, device="openreg") - self.assertEqual(x_cpu, x) - - def test_tensor_type_fallback(self): - x = torch.Tensor([[1, 2, 3], [2, 3, 4]]).to("openreg") - y = torch.Tensor([1, 0, 2]).to("openreg") - self.assertTrue(x.device.type, "openreg") - self.assertFalse(x.is_cpu) - - z_cpu = torch.Tensor([[0, 2, 1], [1, 3, 2]]) - # call sub op, which will fallback to cpu - z = torch.sub(x, y) - self.assertEqual(z_cpu, z) - - # call index op, which will fallback to cpu - z_cpu = torch.Tensor([3, 1]) - y = torch.Tensor([1, 0]).long().to("openreg") - z = x[y, y] - self.assertEqual(z_cpu, z) - - def test_tensorlist_type_fallback(self): - # create tensors located in custom device - v_openreg = torch.Tensor([1, 2, 3]).to("openreg") - # create result tensor located in cpu - z_cpu = torch.Tensor([2, 4, 6]) - # create tensorlist for foreach_add op - x = (v_openreg, v_openreg) - y = (v_openreg, v_openreg) - - # Check that our device is correct. - self.assertTrue(v_openreg.device.type == "openreg") - self.assertFalse(v_openreg.is_cpu) - - # call _foreach_add op, which will fallback to cpu - z = torch._foreach_add(x, y) - self.assertEqual(z_cpu, z[0]) - self.assertEqual(z_cpu, z[1]) - - -if __name__ == "__main__": - run_tests() diff --git a/test/test_transformers_privateuse1.py b/test/test_transformers_privateuse1.py deleted file mode 100644 index 31023875f886d..0000000000000 --- a/test/test_transformers_privateuse1.py +++ /dev/null @@ -1,98 +0,0 @@ -# Owner(s): ["module: sdpa"] - -import unittest -from collections import namedtuple -from functools import partial - -import torch -from torch.nn.attention import SDPBackend -from torch.testing._internal.common_nn import NNTestCase -from torch.testing._internal.common_utils import run_tests, skipIfTorchDynamo, TEST_XPU - - -SdpaShape = namedtuple("Sdpa_Shape", ["batch", "num_heads", "seq_len", "head_dim"]) - - -@unittest.skipIf(TEST_XPU, "XPU does not support cppextension currently") -class TestSDPAPrivateUse1Only(NNTestCase): - @skipIfTorchDynamo() - def test_fused_sdp_choice_privateuseone(self): - batch_size, seq_len, num_heads, head_dim = 4, 256, 2, 128 - make_tensor = partial(torch.rand, device="cpu", dtype=torch.float16) - shape = SdpaShape(batch_size, num_heads, seq_len, head_dim) - q_cpu, k_cpu, v_cpu = make_tensor(shape), make_tensor(shape), make_tensor(shape) - q_privateuse1 = q_cpu.to("openreg") - k_privateuse1 = k_cpu.to("openreg") - v_privateuse1 = v_cpu.to("openreg") - assert ( - torch._fused_sdp_choice(q_privateuse1, k_privateuse1, v_privateuse1) - == SDPBackend.OVERRIDEABLE.value - ) - - def test_scaled_dot_product_fused_attention_overrideable(self): - batch_size, seq_len, num_heads, head_dim = 4, 256, 2, 128 - make_tensor = partial(torch.rand, device="cpu", dtype=torch.float16) - shape = SdpaShape(batch_size, num_heads, seq_len, head_dim) - q_cpu, k_cpu, v_cpu = make_tensor(shape), make_tensor(shape), make_tensor(shape) - q_privateuse1 = q_cpu.to("openreg") - k_privateuse1 = k_cpu.to("openreg") - v_privateuse1 = v_cpu.to("openreg") - torch.nn.functional.scaled_dot_product_attention( - q_privateuse1, k_privateuse1, v_privateuse1, attn_mask=None, dropout_p=0.0 - ) - - def test_scaled_dot_product_fused_attention_overrideable_backward(self): - batch_size, seq_len, num_heads, head_dim = 4, 256, 2, 128 - make_tensor = partial( - torch.rand, device="cpu", dtype=torch.float16, requires_grad=True - ) - shape = (batch_size, num_heads, seq_len, head_dim) - q_cpu, k_cpu, v_cpu = make_tensor(shape), make_tensor(shape), make_tensor(shape) - attn_mask = make_tensor((batch_size, num_heads, seq_len, seq_len)) - q_privateuse1 = q_cpu.to("openreg") - k_privateuse1 = k_cpu.to("openreg") - v_privateuse1 = v_cpu.to("openreg") - attn_mask_privateuse1 = attn_mask.to("openreg") - ( - output, - logsumexp, - cum_seq_q, - cum_seq_k, - max_q, - max_k, - philox_seed, - philox_offset, - debug_attn_mask, - ) = torch.ops.aten._scaled_dot_product_fused_attention_overrideable( - q_privateuse1, k_privateuse1, v_privateuse1, attn_bias=attn_mask_privateuse1 - ) - - rand_upward = torch.rand( - shape, device="cpu", dtype=torch.float16, requires_grad=False - ) - rand_upward_privateuse1 = rand_upward.to("openreg") - grad_input_mask = [True, True, True, True] - grad_q, grad_k, grad_v, grad_attn_mask = ( - torch.ops.aten._scaled_dot_product_fused_attention_overrideable_backward( - rand_upward_privateuse1, - q_privateuse1, - k_privateuse1, - v_privateuse1, - attn_mask_privateuse1, - grad_input_mask, - output, - logsumexp, - cum_seq_q, - cum_seq_k, - max_q, - max_k, - dropout_p=0.0, - is_causal=False, - philox_seed=philox_seed, - philox_offset=philox_offset, - ) - ) - - -if __name__ == "__main__": - run_tests() diff --git a/tools/testing/discover_tests.py b/tools/testing/discover_tests.py index 96aee230f89f8..25fcf07de9373 100644 --- a/tools/testing/discover_tests.py +++ b/tools/testing/discover_tests.py @@ -83,6 +83,7 @@ def skip_test_p(name: str) -> bool: "package", # executed by test_package.py "quantization", # executed by test_quantization.py "autograd", # executed by test_autograd.py + "cpp_extensions/open_registration_extension/torch_openreg/tests", # executed by test_openreg.py ], blocklisted_tests=[ "test_bundled_images", @@ -138,6 +139,7 @@ def skip_test_p(name: str) -> bool: "doctests", "test_autoload_enable", "test_autoload_disable", + "test_openreg", ], ) From 27daa6af6aad0f93f98ea5fa4bfdbb70b30a1eab Mon Sep 17 00:00:00 2001 From: FFFrog Date: Sat, 13 Sep 2025 02:06:25 +0800 Subject: [PATCH 198/693] [OpenReg] Strengthen Openreg's execution limits to minimize the waste of computing resources (#161918) Currently, OpenReg supports Linux, Windows, and OS X, ensuring stability and ease of integration with third-party devices across all three platforms. It also doesn't rely on any other accelerators (such as CUDA or MPS). Therefore, to minimize computational resource usage, `test_openreg` can be added to certain BLOCKLISTS to prevent its execution, limiting OpenReg's execution to only necessary scenarios. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161918 Approved by: https://github.com/albanD ghstack dependencies: #161917 --- .../torch_openreg/tests/test_ops.py | 11 +---------- test/run_test.py | 2 ++ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_ops.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_ops.py index 903a946ea32eb..a307f65cbc6fa 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_ops.py +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_ops.py @@ -2,18 +2,11 @@ import collections import functools -import unittest import torch from torch.nn.attention import SDPBackend from torch.testing._internal.common_nn import NNTestCase -from torch.testing._internal.common_utils import ( - run_tests, - skipIfTorchDynamo, - skipIfXpu, - TEST_XPU, - TestCase, -) +from torch.testing._internal.common_utils import run_tests, skipIfTorchDynamo, TestCase SDPAShape = collections.namedtuple( @@ -123,7 +116,6 @@ def test_backend_dispatchstub(self): class TestQuantization(TestCase): - @skipIfXpu(msg="missing kernel for openreg") def test_quantize(self): x = torch.randn(3, 4, 5, dtype=torch.float32, device="openreg") quantized_tensor = torch.quantize_per_tensor(x, 0.1, 10, torch.qint8) @@ -206,7 +198,6 @@ def test_tensorlist_type_fallback(self): self.assertEqual(z_cpu, z[1]) -@unittest.skipIf(TEST_XPU, "XPU does not support cppextension currently") class TestSDPA(NNTestCase): @skipIfTorchDynamo() def test_fused_sdp_choice_privateuseone(self): diff --git a/test/run_test.py b/test/run_test.py index fd809bd13ecac..9414a340257c0 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -177,6 +177,7 @@ def __contains__(self, item): "test_jit_legacy", "test_cuda_nvml_based_avail", "test_jit_cuda_fuser", + "test_openreg", ] S390X_BLOCKLIST = [ @@ -242,6 +243,7 @@ def __contains__(self, item): # depend on z3-solver "fx/test_z3_gradual_types", "test_proxy_tensor", + "test_openreg", ] XPU_BLOCKLIST = [ From 29f84b0f617013e091fc7a0575684ddf46c541d4 Mon Sep 17 00:00:00 2001 From: FFFrog Date: Sat, 13 Sep 2025 02:06:25 +0800 Subject: [PATCH 199/693] [OpenReg] Improve the Event and Stream capabilities of DeviceGuardImplInterface (#160101) **Changes:** - Based on `OpenRegStream` and `OpenRegEvent`, we improve the implementation of Device Guard for `OpenReg` - Add some related testcases Pull Request resolved: https://github.com/pytorch/pytorch/pull/160101 Approved by: https://github.com/albanD ghstack dependencies: #161917, #161918 --- .../csrc/runtime/OpenRegFunctions.cpp | 2 +- .../torch_openreg/csrc/runtime/OpenRegGuard.h | 183 +++++++++++++----- .../torch_openreg/csrc/runtime/OpenRegHooks.h | 2 + .../csrc/runtime/OpenRegHostAllocator.h | 2 + .../csrc/runtime/OpenRegSerialization.h | 2 + .../torch_openreg/tests/test_device.py | 32 +++ .../torch_openreg/tests/test_event.py | 62 ++++-- .../torch_openreg/tests/test_streams.py | 45 +++++ 8 files changed, 271 insertions(+), 59 deletions(-) create mode 100644 test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_device.py diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegFunctions.cpp b/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegFunctions.cpp index 566bacd06e9ad..ac39453a7f4d1 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegFunctions.cpp +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegFunctions.cpp @@ -36,7 +36,7 @@ OPENREG_EXPORT c10::DeviceIndex device_count() noexcept { static int count = []() { try { auto result = device_count_impl(); - TORCH_INTERNAL_ASSERT( + TORCH_CHECK( result <= std::numeric_limits::max(), "Too many devices, DeviceIndex overflowed"); return result; diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegGuard.h b/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegGuard.h index f0150fe680fb8..ad89b7a208cb4 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegGuard.h +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegGuard.h @@ -1,19 +1,26 @@ +#pragma once + #include #include #include +#include "OpenRegEvent.h" #include "OpenRegFunctions.h" +#include "OpenRegStream.h" namespace c10::openreg { -// Device guard registration struct OpenRegGuardImpl final : public c10::impl::DeviceGuardImplInterface { - static constexpr c10::DeviceType static_type = c10::DeviceType::PrivateUse1; + static constexpr DeviceType static_type = c10::DeviceType::PrivateUse1; OpenRegGuardImpl() = default; + explicit OpenRegGuardImpl(c10::DeviceType t) { - TORCH_INTERNAL_ASSERT(t == static_type); + TORCH_CHECK( + t == static_type, + "OpenRegGuardImpl initialized with non-PrivateUse1 DeviceType: ", + t); } /** @@ -27,7 +34,8 @@ struct OpenRegGuardImpl final : public c10::impl::DeviceGuardImplInterface { * Set the current device to Device, and return the previous c10::Device. */ c10::Device exchangeDevice(c10::Device d) const override { - TORCH_CHECK(d.is_privateuseone()); + TORCH_CHECK( + d.is_privateuseone(), "Excepted a PrivateUse1 device, but got ", d); auto old_device_index = ExchangeDevice(d.index()); return c10::Device(static_type, old_device_index); @@ -45,7 +53,8 @@ struct OpenRegGuardImpl final : public c10::impl::DeviceGuardImplInterface { * Set the current device to c10::Device. */ void setDevice(c10::Device d) const override { - TORCH_CHECK(d.is_privateuseone()); + TORCH_CHECK( + d.is_privateuseone(), "Excepted a PrivateUse1 device, but got ", d); set_device(d.index()); } @@ -55,8 +64,6 @@ struct OpenRegGuardImpl final : public c10::impl::DeviceGuardImplInterface { * (so, e.g., this can be called from a destructor). */ void uncheckedSetDevice(c10::Device d) const noexcept override { - TORCH_CHECK(d.is_privateuseone()); - set_device(d.index()); } @@ -64,32 +71,31 @@ struct OpenRegGuardImpl final : public c10::impl::DeviceGuardImplInterface { * Get the current stream for a given device. */ c10::Stream getStream(c10::Device d) const noexcept override { - return c10::Stream(c10::Stream::DEFAULT, d); + return getCurrentOpenRegStream(d.index()).unwrap(); } /** * Get the default stream for a given device. */ c10::Stream getDefaultStream(c10::Device d) const override { - return c10::Stream(c10::Stream::DEFAULT, d); + return getDefaultOpenRegStream(d.index()); } /** - * Get a stream from the global pool for a given device. + * Return a new stream for a given device and priority. The stream will be + * copied and shared around, device backend should be able to correctly handle + * the lifetime of the stream. */ - c10::Stream getStreamFromGlobalPool( - c10::Device d, - bool isHighPriority = false) const override { - return c10::Stream(c10::Stream::DEFAULT, d); + Stream getNewStream(Device d, int priority = 0) const override { + return getStreamFromPool(priority, d.index()); } /** - * Return a new stream for a given device and priority. The stream will be - * copied and shared around, device backend should be able to correctly handle - * the lifetime of the stream. + * Get a stream from the global pool for a given device. */ - c10::Stream getNewStream(c10::Device d, int priority = 0) const override { - return c10::Stream(c10::Stream::DEFAULT, d); + Stream getStreamFromGlobalPool(Device d, bool isHighPriority = false) + const override { + return getStreamFromPool(isHighPriority, d.index()); } /** @@ -98,14 +104,37 @@ struct OpenRegGuardImpl final : public c10::impl::DeviceGuardImplInterface { * to set the current device to match the device of this stream. */ c10::Stream exchangeStream(c10::Stream s) const noexcept override { - return s; + const OpenRegStream stream(s); + const auto old_stream = getCurrentOpenRegStream(s.device().index()); + setCurrentOpenRegStream(stream); + return old_stream.unwrap(); + } + + /** + * Get the number of devices. + * + * WARNING: This is REQUIRED to not raise an exception. + * If there is some sort of problem, e.g., driver error, + * you should report that there are zero available devices. + */ + DeviceIndex deviceCount() const noexcept override { + return device_count(); } /** * Destroys the given event. */ void destroyEvent(void* event, const c10::DeviceIndex device_index) - const noexcept override {} + const noexcept override { + if (!event) + return; + + auto or_event = static_cast(event); + auto orig_device = current_device(); + set_device(device_index); + orEventDestroy(or_event); + set_device(orig_device); + } /** * Increments the event's version and enqueues a job with this version @@ -118,10 +147,40 @@ struct OpenRegGuardImpl final : public c10::impl::DeviceGuardImplInterface { const c10::Stream& stream, const c10::DeviceIndex device_index, const c10::EventFlag flag) const override { - static int event_id = 1; + TORCH_CHECK( + device_index == -1 || device_index == stream.device_index(), + "Event device index ", + device_index, + " does not match recording stream's device index ", + stream.device_index(), + "."); + + orEvent_t or_event = static_cast(*event); + OpenRegStream or_stream{stream}; - if (!*event) - *event = reinterpret_cast(event_id++); + const auto orig_device = current_device(); + set_device(stream.device().index()); + + if (!or_event) { + auto or_flag = orEventDisableTiming; + switch (flag) { + case EventFlag::PYTORCH_DEFAULT: + or_flag = orEventDisableTiming; + break; + case EventFlag::BACKEND_DEFAULT: + or_flag = orEventEnableTiming; + break; + default: + TORCH_CHECK(false, "Received unknown flag"); + } + + orEventCreateWithFlags(&or_event, or_flag); + } + + orEventRecord(or_event, or_stream); + *event = or_event; + + set_device(orig_device); } /** @@ -132,7 +191,17 @@ struct OpenRegGuardImpl final : public c10::impl::DeviceGuardImplInterface { * When the stream reaches this command it will stop processing * additional commands until that version of the event is marked as recorded. */ - void block(void* event, const c10::Stream& stream) const override {} + void block(void* event, const c10::Stream& stream) const override { + if (!event) + return; + + orEvent_t or_event = static_cast(event); + OpenRegStream or_stream{stream}; + const auto orig_device = current_device(); + set_device(stream.device().index()); + orStreamWaitEvent(or_stream, or_event, 0); + set_device(orig_device); + } /** * Returns true if (and only if) @@ -141,47 +210,56 @@ struct OpenRegGuardImpl final : public c10::impl::DeviceGuardImplInterface { * Returns false otherwise. */ bool queryEvent(void* event) const override { - return true; - } + if (!event) + return true; - /** - * Get the number of devices. WARNING: This is REQUIRED to not raise - * an exception. If there is some sort of problem, e.g., driver error, - * you should report that there are zero available devices. - */ - c10::DeviceIndex deviceCount() const noexcept override { - int device_index = -1; - orGetDeviceCount(&device_index); - return device_index; + orEvent_t or_event = static_cast(event); + const orError_t err = orEventQuery(or_event); + + return err == orSuccess ? true : false; } + /** * Return true if all the work previously enqueued on the stream for * asynchronous execution has completed running on the device. */ bool queryStream(const c10::Stream& stream) const override { - return true; + OpenRegStream or_stream{stream}; + return or_stream.query(); } /** * Wait (by blocking the calling thread) until all the work previously * enqueued on the stream has completed running on the device. */ - void synchronizeStream(const c10::Stream& stream) const override {} + void synchronizeStream(const c10::Stream& stream) const override { + OpenRegStream or_stream{stream}; + or_stream.synchronize(); + } /** * Wait (by blocking the calling thread) until all the work previously * recorded on the event has completed running on the device. */ - void synchronizeEvent(void* event) const override {} + void synchronizeEvent(void* event) const override { + if (!event) + return; + + orEvent_t or_event = static_cast(event); + orEventSynchronize(or_event); + } /** - * Ensure the caching allocator (if any) is aware that the given DataPtr is - * being used on the given stream, and that it should thus avoid recycling the - * DataPtr until all work on that stream is done. + * Wait (by blocking the calling thread) until all the work has + * completed running on the device. */ - void recordDataPtrOnStream( - const c10::DataPtr& data_ptr, - const c10::Stream& stream) const override {} + void synchronizeDevice(const c10::DeviceIndex device_index) const override { + DeviceIndex orig_device{-1}; + auto orig_devicec = current_device(); + set_device(device_index); + orDeviceSynchronize(); + set_device(orig_device); + } /** * Fetch the elapsed time between two recorded events. @@ -190,7 +268,20 @@ struct OpenRegGuardImpl final : public c10::impl::DeviceGuardImplInterface { void* event1, void* event2, const c10::DeviceIndex device_index) const override { - return 1; + TORCH_CHECK( + event1 && event2, + "Both events must be recorded before calculating elapsed time."); + auto orig_device = current_device(); + set_device(device_index); + + orEvent_t or_event1 = static_cast(event1); + orEvent_t or_event2 = static_cast(event2); + float time_ms = 0; + orEventElapsedTime(&time_ms, or_event1, or_event2); + + set_device(orig_device); + + return static_cast(time_ms); } }; diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegHooks.h b/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegHooks.h index 656fba8eae484..e6eb0c6f26083 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegHooks.h +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegHooks.h @@ -1,3 +1,5 @@ +#pragma once + #include #include diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegHostAllocator.h b/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegHostAllocator.h index edef545a27835..264276661913b 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegHostAllocator.h +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegHostAllocator.h @@ -1,3 +1,5 @@ +#pragma once + #include #include diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegSerialization.h b/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegSerialization.h index 559e92ea82f7b..759c425745c23 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegSerialization.h +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/csrc/runtime/OpenRegSerialization.h @@ -1,3 +1,5 @@ +#pragma once + #include #define REGISTER_PRIVATEUSE1_SERIALIZATION( \ diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_device.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_device.py new file mode 100644 index 0000000000000..cb1256b0d63c7 --- /dev/null +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_device.py @@ -0,0 +1,32 @@ +# Owner(s): ["module: PrivateUse1"] + +import torch +import torch_openreg # noqa: F401 +from torch.testing._internal.common_utils import run_tests, TestCase + + +class TestDevice(TestCase): + def test_device_count(self): + count = torch.accelerator.device_count() + self.assertEqual(count, 2) + + def test_device_switch(self): + torch.accelerator.set_device_index(1) + self.assertEqual(torch.accelerator.current_device_index(), 1) + + torch.accelerator.set_device_index(0) + self.assertEqual(torch.accelerator.current_device_index(), 0) + + def test_device_context(self): + device = torch.accelerator.current_device_index() + with torch.accelerator.device_index(None): + self.assertEqual(torch.accelerator.current_device_index(), device) + self.assertEqual(torch.accelerator.current_device_index(), device) + + with torch.accelerator.device_index(1): + self.assertEqual(torch.accelerator.current_device_index(), 1) + self.assertEqual(torch.accelerator.current_device_index(), device) + + +if __name__ == "__main__": + run_tests() diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_event.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_event.py index c381b623839c0..87b938ae43600 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_event.py +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_event.py @@ -6,34 +6,72 @@ class TestEvent(TestCase): @skipIfTorchDynamo() - def test_record_event(self): + def test_event_create(self): + event = torch.Event(device="openreg") + self.assertEqual(event.device.type, "openreg") + self.assertEqual(event.device.index, None) + self.assertEqual(event.event_id, 0) + + event = torch.Event(device="openreg:1") + self.assertEqual(event.device.type, "openreg") + self.assertEqual(event.device.index, None) + self.assertEqual(event.event_id, 0) + + event = torch.Event() + self.assertEqual(event.device.type, "openreg") + self.assertEqual(event.device.index, None) + self.assertEqual(event.event_id, 0) + + stream = torch.Stream(device="openreg:1") + event = stream.record_event() + self.assertEqual(event.device.type, "openreg") + self.assertEqual(event.device.index, 1) + self.assertNotEqual(event.event_id, 0) + + @skipIfTorchDynamo() + def test_event_query(self): + event = torch.Event() + self.assertTrue(event.query()) + + stream = torch.Stream(device="openreg:1") + event = stream.record_event() + event.synchronize() + self.assertTrue(event.query()) + + @skipIfTorchDynamo() + def test_event_record(self): stream = torch.Stream(device="openreg:1") event1 = stream.record_event() self.assertNotEqual(0, event1.event_id) + event2 = stream.record_event() self.assertNotEqual(0, event2.event_id) + self.assertNotEqual(event1.event_id, event2.event_id) @skipIfTorchDynamo() def test_event_elapsed_time(self): stream = torch.Stream(device="openreg:1") - e1 = torch.Event(device="openreg:1", enable_timing=True) - e1.record(stream) - e2 = torch.Event(device="openreg:1", enable_timing=True) - e2.record(stream) - e2.synchronize() - self.assertTrue(e2.query()) + event1 = torch.Event(device="openreg:1", enable_timing=True) + event1.record(stream) + event2 = torch.Event(device="openreg:1", enable_timing=True) + event2.record(stream) + + stream.synchronize() + self.assertTrue(event1.query()) + self.assertTrue(event2.query()) - ms = e1.elapsed_time(e2) + ms = event1.elapsed_time(event2) self.assertTrue(ms > 0) @skipIfTorchDynamo() def test_event_wait_stream(self): - s1 = torch.Stream(device="openreg") - s2 = torch.Stream(device="openreg") - e1 = s1.record_event() - e1.wait(s2) + stream1 = torch.Stream(device="openreg") + stream2 = torch.Stream(device="openreg") + + event = stream1.record_event() + stream2.wait_event(event) if __name__ == "__main__": diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_streams.py b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_streams.py index 415f438bf0834..9220fefd6902c 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_streams.py +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/tests/test_streams.py @@ -5,11 +5,56 @@ class TestStream(TestCase): + def test_stream_create(self): + stream = torch.Stream(device="openreg") + self.assertEqual(stream.device_index, torch.openreg.current_device()) + + stream = torch.Stream(device="openreg:1") + self.assertEqual(stream.device.type, "openreg") + self.assertEqual(stream.device_index, 1) + + stream = torch.Stream(1) + self.assertEqual(stream.device.type, "openreg") + self.assertEqual(stream.device_index, 1) + + stream1 = torch.Stream( + stream_id=stream.stream_id, + device_type=stream.device_type, + device_index=stream.device_index, + ) + self.assertEqual(stream, stream1) + + def test_stream_context(self): + with torch.Stream(device="openreg:1") as stream: + self.assertEqual(torch.accelerator.current_stream(), stream) + + @skipIfTorchDynamo() + def test_stream_switch(self): + stream1 = torch.Stream(device="openreg:0") + torch.accelerator.set_stream(stream1) + current_stream = torch.accelerator.current_stream() + self.assertEqual(current_stream, stream1) + + stream2 = torch.Stream(device="openreg:1") + torch.accelerator.set_stream(stream2) + current_stream = torch.accelerator.current_stream() + self.assertEqual(current_stream, stream2) + def test_stream_synchronize(self): stream = torch.Stream(device="openreg:1") + self.assertEqual(True, stream.query()) + + event = torch.Event() + event.record(stream) stream.synchronize() self.assertEqual(True, stream.query()) + def test_stream_repr(self): + stream = torch.Stream(device="openreg:1") + self.assertTrue( + "torch.Stream device_type=openreg, device_index=1" in repr(stream) + ) + def test_stream_wait_stream(self): stream_1 = torch.Stream(device="openreg:0") stream_2 = torch.Stream(device="openreg:1") From a94ddd9b00a677a65fea31d38154062646e6ec13 Mon Sep 17 00:00:00 2001 From: FFFrog Date: Sat, 13 Sep 2025 02:06:26 +0800 Subject: [PATCH 200/693] [OpenReg] Fix the docs of Accelerator Intergration (#162826) ---- - Fixed the redirect link about step 1 - Formatted the autoload and added necessary links Pull Request resolved: https://github.com/pytorch/pytorch/pull/162826 Approved by: https://github.com/albanD ghstack dependencies: #161917, #161918, #160101 --- docs/source/accelerator/autoload.md | 14 ++++---------- docs/source/accelerator/operators.md | 2 +- .../torch_openreg/torch_openreg/__init__.py | 2 +- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/docs/source/accelerator/autoload.md b/docs/source/accelerator/autoload.md index 97664adcd735a..c4ebd83be6104 100644 --- a/docs/source/accelerator/autoload.md +++ b/docs/source/accelerator/autoload.md @@ -22,7 +22,7 @@ This tutorial will take **OpenReg** as a new out-of-the-tree device and guide yo ### Entry Point Setup -To enable **Autoload**, register the `_autoload` function as an entry point in `setup.py` file. +To enable **Autoload**, register the `_autoload` function as an entry point in [setup.py](https://github.com/pytorch/pytorch/blob/main/test/cpp_extensions/open_registration_extension/torch_openreg/setup.py) file. ::::{tab-set} @@ -43,19 +43,18 @@ To enable **Autoload**, register the `_autoload` function as an entry point in ` ### Backend Setup -Define the initialization hook `_autoload` for backend initialization. This hook will be automatically invoked by PyTorch during startup. +Define the initialization hook `_autoload` for backend initialization in [torch_openreg](https://github.com/pytorch/pytorch/blob/main/test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py). This hook will be automatically invoked by PyTorch during startup. ::::{tab-set-code} + ```{eval-rst} .. literalinclude:: ../../../test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py :language: python :start-after: LITERALINCLUDE START: AUTOLOAD :end-before: LITERALINCLUDE END: AUTOLOAD :linenos: - :emphasize-lines: 10-12 ``` - :::: ## Result @@ -66,9 +65,6 @@ After setting up the entry point and backend, build and install your backend. No .. grid:: 2 .. grid-item-card:: :octicon:`terminal;1em;` Without Autoload - :class-card: card-prerequisites - - :: >>> import torch >>> import torch_openreg @@ -76,11 +72,9 @@ After setting up the entry point and backend, build and install your backend. No tensor(1, device='openreg:0') .. grid-item-card:: :octicon:`terminal;1em;` With Autoload - :class-card: card-prerequisites - - :: >>> import torch # Automatically import torch_openreg + >>> >>> torch.tensor(1, device="openreg") tensor(1, device='openreg:0') ``` diff --git a/docs/source/accelerator/operators.md b/docs/source/accelerator/operators.md index 2930d6b7f6e46..d5ae2aa5a2c68 100644 --- a/docs/source/accelerator/operators.md +++ b/docs/source/accelerator/operators.md @@ -169,7 +169,7 @@ Of course, global fallbacks can also be combined with a blacklist of fallbacks, ### PyTorch STUB -PyTorch also provides another approach for built-in operators: `STUB`. This method is essentially based on the `Step 1` approach, but adds secondary scheduling capabilities (for example, scheduling based on CPU characteristics). +PyTorch also provides another approach for built-in operators: `STUB`. This method is essentially based on the {ref}`Step 1` approach, but adds secondary scheduling capabilities (for example, scheduling based on CPU characteristics). ```{note} The `STUB` method currently supports only a limited set of operators. For new accelerator devices, the advantage of the `STUB` method is that it significantly reduces the cost of development at the cost of a small performance overhead. PyTorch currently does not clearly list the set of operators that can be registered through `STUB`. Due to the large number of related operators, only the query method for the supported operator list is provided here. diff --git a/test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py b/test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py index 18cee1615705d..874dbf79b2e8f 100644 --- a/test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py +++ b/test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py @@ -9,7 +9,6 @@ _load_dll_libraries() del _load_dll_libraries -# LITERALINCLUDE START: AUTOLOAD import torch_openreg._C # type: ignore[misc] import torch_openreg.openreg @@ -19,6 +18,7 @@ torch.utils.generate_methods_for_privateuse1_backend(for_storage=True) +# LITERALINCLUDE START: AUTOLOAD def _autoload(): # It is a placeholder function here to be registered as an entry point. pass From ddc51076010ae8d26fdf3746db16031505ea688b Mon Sep 17 00:00:00 2001 From: Xuan Zhang Date: Fri, 12 Sep 2025 15:00:36 -0700 Subject: [PATCH 201/693] An improved heuristic for operator reordering for peak memory + debugging logs (#161810) Revisiting the idea in https://github.com/pytorch/pytorch/pull/140195 For the lpmf algorithm in the memory reorder pass, in some cases, when all the nodes that can be scheduled are quite large, it is beneficial to switch the scheduling strategy. So instead of using size as the criterion, we choose a node that can unlock more nodes to become schedulable by analyzing their successor nodes. For an internal use case, we observe up to 20 GiB memory difference and here are the before and after memory snapshot. More information can be found in [D81270682](https://www.internalfb.com/diff/D81270682) (internal only). image In addition, add the functionality to upload the graph to tlparse for offline debugging. The format of the json is in consistency with the simulator [here](https://fburl.com/code/3l3d3qi4) (internal only). Pull Request resolved: https://github.com/pytorch/pytorch/pull/161810 Approved by: https://github.com/yf225 --- torch/_inductor/config.py | 8 ++ torch/_inductor/memory.py | 165 ++++++++++++++++++++++++++++++++++---- torch/_inductor/utils.py | 12 +++ 3 files changed, 168 insertions(+), 17 deletions(-) diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py index d20baa2aaf670..f143e34b5dbc6 100644 --- a/torch/_inductor/config.py +++ b/torch/_inductor/config.py @@ -391,6 +391,14 @@ def prologue_fusion_enabled() -> bool: # enable operator reordering for peak memory optimization reorder_for_peak_memory = True +reorder_for_peak_memory_debug = False + +# In some cases, when all the nodes that can be scheduled are quite large, +# it is beneficial to switch the scheduling strategy. So instead of using +# size as the criterion, we choose a node that can unlock more nodes to +# become schedulable by analyzing their successor nodes. The default value +# is zero, which turns off this optimization. +size_threshold_for_succ_based_strategy: int = 0 reorder_iterative_debug_memory_recompute: bool = False reorder_iterative_debug_limit_to_reorder: Optional[int] = ( diff --git a/torch/_inductor/memory.py b/torch/_inductor/memory.py index 27ca4415c8f0e..1a02dbb1e6af4 100644 --- a/torch/_inductor/memory.py +++ b/torch/_inductor/memory.py @@ -10,8 +10,9 @@ from torch._utils_internal import signpost_event from torch.utils._ordered_set import OrderedSet +from . import config from .ir import MultiOutputLayout, NoneLayout -from .utils import get_dtype_size +from .utils import get_dtype_size, is_nonfreeable_buffers from .virtualized import V @@ -92,14 +93,7 @@ def _dep_size_hint(dep: Dep) -> int: for node in nodes: for dep in node.read_writes.reads: if dep.name in graph_inputs: - dep_name = dep.name - # Subgraphs have a prefix for the name, cleanup the prefix - # before checking for known strings. - if V.graph.name: - dep_name = dep_name.removeprefix(V.graph.name + "_") - if not dep_name.startswith( - ("primals_", "arg", "fwd_rng_state", "bwd_rng_state") - ): + if not is_nonfreeable_buffers(dep): dep_name_to_succ_nodes[dep.name].add(node) dep_name_to_size[dep.name] = _dep_size_hint(dep) @@ -574,6 +568,7 @@ class BufferInfo(TypedDict): elif buf_name in name_to_freeable_input_buf: output_memory += name_to_freeable_input_buf[buf_name].mpi_buffer.size_free max_memory = max(live_memory, output_memory) + memory_gap = max_memory - live_memory # compute the amount of memory that is allocated when a node is scheduled # and the amount of memory that can be freed when a node is scheduled @@ -589,17 +584,33 @@ class BufferInfo(TypedDict): # schedule nodes one at a time schedule: list[BaseSchedulerNode] = [] + size_threshold = config.size_threshold_for_succ_based_strategy num_iters: int = 0 while num_iters < len(nodes) and nodes_to_schedule: # select a node to schedule: - selected_node = min( - nodes_to_schedule, - key=lambda node: ( - max(live_memory + node.mpi_node.size, max_memory), - node.mpi_node.size - node_info[node]["memory_to_free"], - node.mpi_node.index, - ), - ) + if ( + size_threshold > 0 + and min(node.mpi_node.size for node in nodes_to_schedule) > size_threshold + ): + selected_node = min( + nodes_to_schedule, + key=lambda node: min( + ( + succ_node.mpi_node.index + for succ_node in node.mpi_node.succ_nodes + ), + default=len(nodes), + ), + ) + else: + selected_node = min( + nodes_to_schedule, + key=lambda node: ( + node.mpi_node.size if node.mpi_node.size > memory_gap else 0, + node.mpi_node.size - node_info[node]["memory_to_free"], + node.mpi_node.index, + ), + ) nodes_to_schedule.remove(selected_node) schedule.append(selected_node) num_iters += 1 @@ -608,6 +619,7 @@ class BufferInfo(TypedDict): live_memory += selected_node.mpi_node.size max_memory = max(max_memory, live_memory) live_memory -= node_info[selected_node]["memory_to_free"] + memory_gap = max_memory - live_memory # update successor nodes and nodes_to_schedule for succ_node in selected_node.mpi_node.succ_nodes: @@ -887,6 +899,16 @@ def reorder_for_peak_memory( graph_outputs, ) + # export graph for simulator if needed + if config.reorder_for_peak_memory_debug: + export_graph_for_simulator( + nodes, + name_to_freeable_input_buf, + name_to_fused_node, + graph_inputs, + graph_outputs, + ) + # Validate planning info before proceeding with reordering try: validate_graph_acyclic(nodes) @@ -937,3 +959,112 @@ def reorder_for_peak_memory( best_result = min(peak_memory_diff_methods, key=lambda x: x.peak_memory) return best_result.order + + +def export_graph_for_simulator( + nodes: list[BaseSchedulerNode], + name_to_freeable_input_buf: dict[str, FreeableInputBuffer], + name_to_fused_node: dict[str, BaseSchedulerNode], + graph_inputs: OrderedSet[str], + graph_outputs: OrderedSet[str], +) -> None: + """ + This is for debugging purposes. It will dump a json file that records graph information. + The graph can then be used in a simulator: https://fburl.com/code/3l3d3qi4 + """ + + class ORMBuffer(TypedDict): + name: str + size_alloc: int + size_free: int + size: int # for backward compatibility + is_input: bool + is_output: bool + deps: list[str] + unmet_deps: list[str] + + class ORMNode(TypedDict): + name: str + buffer_names: list[str] + + class ORMGraph(TypedDict): + nodes: list[ORMNode] + buffers: list[ORMBuffer] + + orm_buffers: list[ORMBuffer] = [] + orm_nodes: list[ORMNode] = [] + + # get orm buffers for freeable input buffers + for buf_name, input_buf in name_to_freeable_input_buf.items(): + orm_buf_input_buffer: ORMBuffer = { + "name": buf_name, + "size_alloc": input_buf.mpi_buffer.size_free, + "size_free": input_buf.mpi_buffer.size_free, + "size": input_buf.mpi_buffer.size_free, + "is_input": True, + "is_output": buf_name in graph_outputs, + "deps": [], + "unmet_deps": [], + } + orm_buffers.append(orm_buf_input_buffer) + + # get orm buffers for scheduler buffers + name_to_buf: dict[str, SchedulerBuffer] = { + buf.get_name(): buf for node in nodes for buf in node.get_outputs() + } # need to reassign due to probably node pruning + for buf_name, sched_buf in name_to_buf.items(): + if sched_buf.defining_op is None: + continue + deps = [ + pred_buf.get_name() + for pred_buf in name_to_fused_node[ + sched_buf.defining_op.get_name() + ].mpi_node.pred_buffers + ] + orm_buf_scheduler_buffer: ORMBuffer = { + "name": buf_name, + "size_alloc": sched_buf.mpi_buffer.size_alloc, + "size_free": sched_buf.mpi_buffer.size_free, + "size": sched_buf.mpi_buffer.size_free, + "is_input": False, + "is_output": buf_name in graph_outputs, + "deps": deps, + "unmet_deps": [ + buf_name for buf_name in deps if buf_name not in graph_inputs + ], + } + orm_buffers.append(orm_buf_scheduler_buffer) + + # get orm nodes + for node in nodes: + orm_node: ORMNode = { + "name": node.get_name(), + "buffer_names": list(node.get_buffer_names()), + } + orm_nodes.append(orm_node) + + # create the graph object + g: ORMGraph = { + "nodes": orm_nodes, + "buffers": orm_buffers, + } + + # dump the graph + import json + import os + + import torch + from functorch.compile import get_graph_being_compiled + + name = os.path.splitext(get_graph_being_compiled())[0] + "_fused" + + g_str = json.dumps(g, indent=2) + + torch._logging.trace_structured( + "artifact", + metadata_fn=lambda: { + "name": name, + "encoding": "string", + }, + payload_fn=lambda: g_str, + ) diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py index b340f33b9d873..b0619e6592ff1 100644 --- a/torch/_inductor/utils.py +++ b/torch/_inductor/utils.py @@ -90,6 +90,7 @@ from .codegen.common import WorkspaceArg from .codegen.wrapper import PythonWrapperCodegen + from .dependencies import Dep from .graph import GraphLowering from .ir import Buffer, ExternKernel, IRNode, Layout, Operation, ReinterpretView from .output_code import CompiledFxGraph @@ -3695,3 +3696,14 @@ def to_real_tensor(e: Any) -> Any: flat_args = [to_real_tensor(a) for a in flat_args] args, kwargs = pytree.tree_unflatten(flat_args, flat_args_pytree_spec) return args, kwargs + + +def is_nonfreeable_buffers(dep: Dep) -> bool: + from .virtualized import V + + dep_name = dep.name + # Subgraphs have a prefix for the name, cleanup the prefix + # before checking for known strings. + if V.graph.name: + dep_name = dep_name.removeprefix(V.graph.name + "_") + return dep_name.startswith(("primals_", "arg", "fwd_rng_state", "bwd_rng_state")) From 595e13feb71c10621271c30cb402906b6cc83e13 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Sat, 13 Sep 2025 00:45:46 +0000 Subject: [PATCH 202/693] [BE] [Inductor] Update NoValidChoicesError logic (#162814) Summary: Updates the NoValidChoicesError logic to include some additional context for if not choices exists or if no choices compiled. Test Plan: NFC. Depending on CI. Rollback Plan: Differential Revision: D82312035 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162814 Approved by: https://github.com/mlazos --- torch/_inductor/select_algorithm.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py index 70dc4fc78a292..114b9a5a40e49 100644 --- a/torch/_inductor/select_algorithm.py +++ b/torch/_inductor/select_algorithm.py @@ -2426,19 +2426,20 @@ def __call__( N = input_nodes[-1].get_size()[-1] append_to_log(mm_file_name, {"invoke": str((M, K, N))}) - def create_no_valid_choices() -> NoValidChoicesError: + def create_no_valid_choices(reason: str) -> NoValidChoicesError: backend_config = ( "max_autotune_gemm_backends" if name != "convolution" else "max_autotune_conv_backends" ) return NoValidChoicesError( - f"No choices to select, please consider adding ATEN into {backend_config} " + f"No choices to select. Provided reason: {reason} " + f"please consider adding ATEN into {backend_config} " "config (defined in torch/_inductor/config.py) to allow at least one choice. " ) if len(choices) == 0: - raise create_no_valid_choices() + raise create_no_valid_choices("No choices exist for backend.") log.debug("Max autotune selects from %s choices.", str(len(choices))) if len(choices) == 1: @@ -2498,7 +2499,9 @@ def do_autotuning(choices, precompile_fn, hint_override: Optional[int] = None): # Prune anything that failed to compile choices = [c for c in choices if not c.failed] if len(choices) == 0: - raise create_no_valid_choices() + raise create_no_valid_choices( + "All choices failed to compile for backend." + ) candidates = self.prescreen_choices( choices, name, inputs_key, self.prescreening_cache From a749c403422f25bcff95ec141a46c885378831fd Mon Sep 17 00:00:00 2001 From: Parshant Sharma Date: Sat, 13 Sep 2025 01:17:06 +0000 Subject: [PATCH 203/693] [Bilinear] move check to reset_parameters (#160952) Fixes #160407 ### Summary: Moved the check to reset_parameters to make `Bilinear` module lazy. Lazy modules have in_features initialized to 0 and a pre forward hook that initializes these to the appropriate shape, then calls reset parameters, ### Impact: module: nn, linear.py ### Test: Screenshot From 2025-08-19 13-27-12 Pull Request resolved: https://github.com/pytorch/pytorch/pull/160952 Approved by: https://github.com/mikaylagawarecki --- torch/nn/modules/linear.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/torch/nn/modules/linear.py b/torch/nn/modules/linear.py index a3c867d533d6b..2a2d130590ef9 100644 --- a/torch/nn/modules/linear.py +++ b/torch/nn/modules/linear.py @@ -214,8 +214,6 @@ def __init__( ) -> None: factory_kwargs = {"device": device, "dtype": dtype} super().__init__() - if in1_features <= 0: - raise ValueError(f"in1_features must be > 0, but got {in1_features}") self.in1_features = in1_features self.in2_features = in2_features self.out_features = out_features @@ -233,6 +231,10 @@ def reset_parameters(self) -> None: """ Resets parameters based on their initialization used in ``__init__``. """ + if self.in1_features <= 0: + raise ValueError( + f"in1_features must be > 0, but got (in1_features={self.in1_features})" + ) bound = 1 / math.sqrt(self.weight.size(1)) init.uniform_(self.weight, -bound, bound) if self.bias is not None: From b2553a6ec4bb25b9a02f6dd531806e0a3286fa33 Mon Sep 17 00:00:00 2001 From: Xu Han Date: Sat, 13 Sep 2025 01:41:14 +0000 Subject: [PATCH 204/693] [AOTI] raise PyTorchStreamWriter open failed error code on windows (#162799) When I debug AOTI UT: `TestAOTInductorPackage_cpu::test_add`. I found it didn't output the verbose error code, when PyTorchStreamWriter open failed. This PR add the verbose error code output for debug. Local test shows as below: image The error code is 32, we can check the Windows error code 32 at https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499- ``` ERROR_SHARING_VIOLATION 32 (0x20) The process cannot access the file because it is being used by another process. ``` This issue is caused by the file is opened by another process. I fixed same issue in zip open as PR: https://github.com/pytorch/pytorch/pull/162617 But still no idea how to open file with shared access in `std::ofstream`. I will continue to researching it. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162799 Approved by: https://github.com/jansel --- caffe2/serialize/inline_container.cc | 42 ++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/caffe2/serialize/inline_container.cc b/caffe2/serialize/inline_container.cc index e39a78c62dd54..015c480cf04f0 100644 --- a/caffe2/serialize/inline_container.cc +++ b/caffe2/serialize/inline_container.cc @@ -27,6 +27,10 @@ #include "caffe2/serialize/versions.h" #include "miniz.h" +#ifdef _WIN32 +#include +#endif // _WIN32 + namespace caffe2 { namespace serialize { constexpr std::string_view kDebugPklSuffix(".debug_pkl"); @@ -711,21 +715,35 @@ void PyTorchStreamWriter::setup(const string& file_name) { if (archive_name_.size() == 0) { CAFFE_THROW("invalid file name: ", file_name); } + + const std::string dir_name = parentdir(file_name); + if (!dir_name.empty()) { + struct stat st; + bool dir_exists = + (stat(dir_name.c_str(), &st) == 0 && (st.st_mode & S_IFDIR)); + TORCH_CHECK( + dir_exists, "Parent directory ", dir_name, " does not exist."); + } + TORCH_CHECK(file_stream_, "File ", file_name, " cannot be opened."); + if (!writer_func_) { - file_stream_.open( - file_name, - std::ofstream::out | std::ofstream::trunc | std::ofstream::binary); valid("opening archive ", file_name.c_str()); - - const std::string dir_name = parentdir(file_name); - if (!dir_name.empty()) { - struct stat st; - bool dir_exists = - (stat(dir_name.c_str(), &st) == 0 && (st.st_mode & S_IFDIR)); - TORCH_CHECK( - dir_exists, "Parent directory ", dir_name, " does not exist."); + try { + file_stream_.exceptions(std::ios_base::failbit | std::ios_base::badbit); + file_stream_.open( + file_name, + std::ofstream::out | std::ofstream::trunc | std::ofstream::binary + ); + } catch (const std::ios_base::failure& e) { +#ifdef _WIN32 + // Windows have verbose error code, we prefer to use it than std errno. + uint32_t error_code = GetLastError(); + CAFFE_THROW("open file failed with error code: ", error_code); +#else // !_WIN32 + CAFFE_THROW("open file failed with strerror: ", strerror(errno)); +#endif // _WIN32 } - TORCH_CHECK(file_stream_, "File ", file_name, " cannot be opened."); + writer_func_ = [this](const void* buf, size_t nbytes) -> size_t { if (!buf) { // See [Note: write_record_metadata] From 0925c644edafbb6a8ff42fef5f3bd48b6042fad3 Mon Sep 17 00:00:00 2001 From: Kevin Tang Date: Sat, 13 Sep 2025 01:50:40 +0000 Subject: [PATCH 205/693] [DCP] Decrease checkpoint background process Gloo pg init timeout (#162760) Summary: Sometimes checkpoint background process creation times out during gloo pg init. Attempting to destroy the process during that time can block the trainer thread until the timeout completes. This diff reduces the pg init timeout from 30m -> 10m to reduce the cleanup time. Test Plan: CI Rollback Plan: Differential Revision: D81724668 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162760 Approved by: https://github.com/meetv18 --- torch/distributed/checkpoint/_async_process_executor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/torch/distributed/checkpoint/_async_process_executor.py b/torch/distributed/checkpoint/_async_process_executor.py index e708433058440..0614f65837f7d 100644 --- a/torch/distributed/checkpoint/_async_process_executor.py +++ b/torch/distributed/checkpoint/_async_process_executor.py @@ -4,6 +4,7 @@ import os from concurrent.futures import Future, ThreadPoolExecutor from dataclasses import dataclass +from datetime import timedelta from enum import Enum from typing import Any, Optional, Union from uuid import uuid4 @@ -215,7 +216,9 @@ def _checkpointing_subprocess( "Initializing dist.ProcessGroup in checkpoint background process" ) # NOTE: GLOO backend is enforced here. - dist.init_process_group(backend=dist.Backend.GLOO) + dist.init_process_group( + backend=dist.Backend.GLOO, timeout=timedelta(seconds=600) + ) dist.barrier() logger.info("Checkpoint background process is running...") From a956c4ab1cb13079203a8f07eb26218724f54dc8 Mon Sep 17 00:00:00 2001 From: Sherlock Huang Date: Sat, 13 Sep 2025 01:50:49 +0000 Subject: [PATCH 206/693] Return NoOpDeviceGuardImpl in replace of CudaDeviceGuard when device is not available, or cpu-only build (#160532) Summary: To support exporting a cuda model on a CPU-only machine under fake tensor mode. User commonly need to move sample inputs to the cuda device with .to("cuda:0") or .to("cuda") call. This diff supports this. I expect the following pattern to work ``` with FakeTensorMode(allow_non_fake_inputs=True): cuda_module = module.to("cuda:0") cuda_sample_inputs = tuple([x.to("cuda:0") for x in sample_inputs]) with torch.no_grad(): ep = torch.export.export(cuda_module, cuda_sample_inputs) ``` Test Plan: CI Rollback Plan: Differential Revision: D80181887 Pull Request resolved: https://github.com/pytorch/pytorch/pull/160532 Approved by: https://github.com/henryoier, https://github.com/ezyang --- c10/core/impl/DeviceGuardImplInterface.cpp | 24 ++++++++++ c10/core/impl/DeviceGuardImplInterface.h | 5 +- test/export/test_export_opinfo.py | 56 ++++++++++++++++++---- torch/_C/__init__.pyi.in | 1 + torch/_subclasses/fake_tensor.py | 7 +++ torch/csrc/Module.cpp | 18 ++++++- 6 files changed, 100 insertions(+), 11 deletions(-) diff --git a/c10/core/impl/DeviceGuardImplInterface.cpp b/c10/core/impl/DeviceGuardImplInterface.cpp index 015bcd3e64fb3..1fb78aa443e3f 100644 --- a/c10/core/impl/DeviceGuardImplInterface.cpp +++ b/c10/core/impl/DeviceGuardImplInterface.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace c10::impl { @@ -14,4 +15,27 @@ DeviceGuardImplRegistrar::DeviceGuardImplRegistrar( device_guard_impl_registry[static_cast(type)].store(impl); } +namespace { +thread_local std::unique_ptr tls_fake_device_guard = + nullptr; +} + +void ensureCUDADeviceGuardSet() { + constexpr auto cuda_idx = static_cast(DeviceType::CUDA); + + const DeviceGuardImplInterface* p = + device_guard_impl_registry[cuda_idx].load(); + + // A non-null `ptr` indicates that CUDA is already available. + if (p == nullptr || (p && p->deviceCount() == 0)) { + // In following cases, we override CUDA guard interface with a no-op + // device guard. + // 1. p == nullptr; Trying to get a cuda device guard on a cpu-only build. + // 2. p->deviceCount() == 0; cuda build enabled, but no cuda devices + // available. + tls_fake_device_guard = std::make_unique>(); + device_guard_impl_registry[cuda_idx].store(tls_fake_device_guard.get()); + } +} + } // namespace c10::impl diff --git a/c10/core/impl/DeviceGuardImplInterface.h b/c10/core/impl/DeviceGuardImplInterface.h index 523e9ad9f45fa..fc8c367f75e8c 100644 --- a/c10/core/impl/DeviceGuardImplInterface.h +++ b/c10/core/impl/DeviceGuardImplInterface.h @@ -6,6 +6,7 @@ #include // Just for C10_ANONYMOUS_VARIABLE +#include #include #include @@ -251,7 +252,7 @@ struct C10_API DeviceGuardImplInterface { // for devices that don't actually have a concept of device index. Prominent // examples are CPU and Meta. template -struct NoOpDeviceGuardImpl final : public DeviceGuardImplInterface { +struct NoOpDeviceGuardImpl : public DeviceGuardImplInterface { NoOpDeviceGuardImpl() = default; DeviceType type() const override { return D; @@ -371,5 +372,7 @@ inline bool hasDeviceGuardImpl(DeviceType type) { return device_guard_impl_registry[static_cast(type)].load(); } +void C10_API ensureCUDADeviceGuardSet(); + } // namespace impl } // namespace c10 diff --git a/test/export/test_export_opinfo.py b/test/export/test_export_opinfo.py index 35d8b2895bd83..24e2f71ff4354 100644 --- a/test/export/test_export_opinfo.py +++ b/test/export/test_export_opinfo.py @@ -3,6 +3,7 @@ # flake8: noqa import itertools +import unittest import torch from torch._subclasses.fake_tensor import FakeTensor, FakeTensorMode @@ -50,17 +51,11 @@ xfail("masked.std"), xfail("masked.sum"), xfail("masked.var"), - xfail("nn.functional.grid_sample"), xfail("to_sparse"), # cannot xfail as it is passing for cpu-only build + skip("nn.functional.grid_sample"), skip("nn.functional.conv2d"), skip("nn.functional.scaled_dot_product_attention"), - # following are failing due to OptionalDeviceGuard - xfail("__getitem__"), - xfail("nn.functional.batch_norm"), - xfail("nn.functional.instance_norm"), - xfail("nn.functional.multi_margin_loss"), - xfail("nonzero"), } fake_decomposition_failures = { @@ -128,9 +123,52 @@ class TestExportOpInfo(TestCase): def test_fake_export(self, device, dtype, op): _test_export_helper(self, dtype, op) + @unittest.skipIf(not torch.backends.cuda.is_built(), "requires CUDA build") + def test_preserve_original_behavior(self): + def cuda_calls_behavior_unchanged(): + cpu_x = torch.randn(2) + with self.assertRaisesRegex( + RuntimeError, "Found no NVIDIA driver on your system." + ): + cuda_x = cpu_x.to("cuda") + + with self.assertRaisesRegex( + RuntimeError, "Found no NVIDIA driver on your system." + ): + torch.randn(2, device="cuda") + + with self.assertRaisesRegex( + RuntimeError, "Found no NVIDIA driver on your system." + ): + torch.cuda.get_device_capability() + + with self.assertRaisesRegex( + RuntimeError, "Found no NVIDIA driver on your system." + ): + torch.cuda.set_device(1) + + with self.assertRaisesRegex( + RuntimeError, "Found no NVIDIA driver on your system." + ): + torch.cuda.current_device() + + self.assertEqual(torch.cuda.is_available(), False) + self.assertEqual(torch.cuda.device_count(), 0) + + cuda_calls_behavior_unchanged() + + cpu_x = torch.randn(2) + with FakeTensorMode(allow_non_fake_inputs=True) as mode: + cuda_x = mode.from_tensor(cpu_x) + cuda_x.fake_device = torch.device("cuda") + cuda_y = cuda_x + cuda_x + self.assertEqual(cuda_y.device.type, "cuda") + + # should fail again after exiting the fake mode, with the identical error message + cuda_calls_behavior_unchanged() + -only_for = "cpu" -instantiate_device_type_tests(TestExportOpInfo, globals(), only_for=only_for) +instantiate_device_type_tests(TestExportOpInfo, globals(), only_for="cpu") if __name__ == "__main__": diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in index e55137c3d2bfd..83cacaf69dec8 100644 --- a/torch/_C/__init__.pyi.in +++ b/torch/_C/__init__.pyi.in @@ -1379,6 +1379,7 @@ def _get_linalg_preferred_backend() -> _LinalgBackend: ... def _set_linalg_preferred_backend(arg: _LinalgBackend): ... def _get_fp32_precision_getter(backend: str, op: str) -> str: ... def _set_fp32_precision_setter(backend: str, op: str, value: str) -> str: ... +def _ensureCUDADeviceGuardSet() -> None: ... class _LinalgBackend: Default: _LinalgBackend diff --git a/torch/_subclasses/fake_tensor.py b/torch/_subclasses/fake_tensor.py index 5767f6a1d0c1e..6b55abcef00cd 100644 --- a/torch/_subclasses/fake_tensor.py +++ b/torch/_subclasses/fake_tensor.py @@ -1387,6 +1387,12 @@ def __enter__(self) -> Self: # See NOTE: [torch.tensor, lift_fresh, and device movement] prev_only_lift_cpu_tensors = torch._C._only_lift_cpu_tensors() torch._C._set_only_lift_cpu_tensors(True) + + # In the case of CPU-only build or cuda device unavailable, + # we patch the cuda device guard to use NoOpDeviceGuardImpl. + # This enables us to trace over cuda kernels under FakeTensorMode. + torch._C._ensureCUDADeviceGuardSet() + maybe_prev_fake_mode = torch._C._unset_dispatch_mode(self._mode_key) if self is not maybe_prev_fake_mode: self.enter_stack.append( @@ -1397,6 +1403,7 @@ def __enter__(self) -> Self: # no-op (still need to re-set the fake mode though since we unset it) torch._C._set_dispatch_mode(self) self.enter_stack.append((False, None, prev_only_lift_cpu_tensors)) + return self def __exit__( diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index ac2b03d2651cc..d040e16ba5283 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -1550,6 +1551,15 @@ static PyObject* THPModule_are_vmap_fallback_warnings_enabled( END_HANDLE_TH_ERRORS } +static PyObject* THCPModule_ensureCUDADeviceGuardSet( + PyObject* self, + PyObject* noargs) { + HANDLE_TH_ERRORS + c10::impl::ensureCUDADeviceGuardSet(); + Py_RETURN_NONE; + END_HANDLE_TH_ERRORS +} + static std::initializer_list TorchMethods = { {"_initExtension", THPModule_initExtension, METH_O, nullptr}, {"_autograd_init", THPAutograd_initExtension, METH_NOARGS, nullptr}, @@ -1845,7 +1855,13 @@ static std::initializer_list TorchMethods = { (PyCFunction)(void (*)())THPModule_has_torch_function_variadic, METH_FASTCALL, nullptr}, - {nullptr, nullptr, 0, nullptr}}; + {"_ensureCUDADeviceGuardSet", + THCPModule_ensureCUDADeviceGuardSet, + METH_NOARGS, + nullptr}, + {nullptr, nullptr, 0, nullptr} + +}; #ifdef USE_CUDA // NOLINTBEGIN(misc-use-internal-linkage) From 7dd5f7b1256169595c3fc7afc2598437189a7507 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Sat, 13 Sep 2025 02:04:36 +0000 Subject: [PATCH 207/693] Revert "python fastpath for DTensor detach(), confirm that aliasing DTensorSpec is ok (#160580)" This reverts commit 4b2d297eec425475a82934a52e0edd96805524a1. Reverted https://github.com/pytorch/pytorch/pull/160580 on behalf of https://github.com/bdhirsh due to this broke shampoo, yanking ([comment](https://github.com/pytorch/pytorch/pull/160580#issuecomment-3287372891)) --- torch/distributed/tensor/_api.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/torch/distributed/tensor/_api.py b/torch/distributed/tensor/_api.py index 38a46ed5f03c5..7eeafaa8eaf9d 100644 --- a/torch/distributed/tensor/_api.py +++ b/torch/distributed/tensor/_api.py @@ -346,12 +346,6 @@ def __coerce_same_metadata_as_tangent__(self, flatten_spec, expected_type=None): # pyre-fixme[3]: Return type must be annotated. # pyre-fixme[2]: Parameter must be annotated. def __torch_dispatch__(cls, func, types, args=(), kwargs=None): # type: ignore[override] - # These are all ops that can show up in AccumulateGrad, - # which is susceptible to DTensor overheads - if func is torch.ops.aten.detach.default: - return DTensor( - args[0]._local_tensor.detach(), args[0]._spec, requires_grad=False - ) return DTensor._op_dispatcher.dispatch( func, args, From 543d50db2bff4c1e19936bf652b6cbae9c0b8c7d Mon Sep 17 00:00:00 2001 From: Chen Date: Sat, 13 Sep 2025 03:24:26 +0000 Subject: [PATCH 208/693] Fix torch export with dict input nested in args (#162618) Investigated together with @pyemma and @taotaohuang001 ## Problem when calling exported module with dict nested in the args tuple, it will make following complaits ``` Traceback (most recent call last): File "/home/chzhu/infinitrain/test_torch_export.py", line 32, in print(exported_model({"a2": torch.randn(10), "a1": torch.randn(10)})) File "/home/chzhu/infinitrain/build/infinitrain/environments/development-venv/lib/python3.10/site-packages/torch/fx/graph_module.py", line 848, in call_wrapped return self._wrapped_call(self, *args, **kwargs) File "/home/chzhu/infinitrain/build/infinitrain/environments/development-venv/lib/python3.10/site-packages/torch/fx/graph_module.py", line 424, in __call__ raise e File "/home/chzhu/infinitrain/build/infinitrain/environments/development-venv/lib/python3.10/site-packages/torch/fx/graph_module.py", line 411, in __call__ return super(self.cls, obj).__call__(*args, **kwargs) # type: ignore[misc] File "/home/chzhu/infinitrain/build/infinitrain/environments/development-venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/chzhu/infinitrain/build/infinitrain/environments/development-venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1879, in _call_impl return inner() File "/home/chzhu/infinitrain/build/infinitrain/environments/development-venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1806, in inner args_kwargs_result = hook(self, args, kwargs) # type: ignore[misc] File "/home/chzhu/infinitrain/build/infinitrain/environments/development-venv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 929, in _fn return fn(*args, **kwargs) File "/home/chzhu/infinitrain/build/infinitrain/environments/development-venv/lib/python3.10/site-packages/torch/export/_unlift.py", line 81, in _check_input_constraints_pre_hook flat_args_with_path = _check_inputs_match(args, kwargs, self._in_spec) File "/home/chzhu/infinitrain/build/infinitrain/environments/development-venv/lib/python3.10/site-packages/torch/export/_unlift.py", line 64, in _check_inputs_match raise ValueError( # noqa: B904 ValueError: Trying to flatten user inputs with exported input tree spec: TreeSpec(tuple, None, [TreeSpec(tuple, None, [TreeSpec(dict, ['a1', 'a2'], [*, *])]), TreeSpec(dict, [], [])]) but actually got inputs with tree spec of: TreeSpec(tuple, None, [TreeSpec(tuple, None, [TreeSpec(dict, ['a2', 'a1'], [*, *])]), TreeSpec(dict, [], [])]). Please check that the inputs have the same number and type of args and kwargs as the ones you used when tracing. ``` ## How to reproduce the issue ```python import torch # create a nn.Module with data_batch as input and output as output class MyModel(torch.nn.Module): def __init__(self): super(MyModel, self).__init__() self.linear = torch.nn.Linear(10, 1) def forward(self, data_batch): h1 = self.linear(data_batch["a1"]) h2 = self.linear(data_batch["a2"]) return h1 + h2 # torch export this module model = MyModel() example_args_forward = ( { "a1": torch.randn(10), "a2": torch.randn(10), }, ) exported_model = torch.export.export(model, example_args_forward, strict=True) # save the exported model torch.export.save(exported_model, "exported_model.pt2") # load the exported model exported_model = torch.export.load("exported_model.pt2").module() # run the exported model print(exported_model({"a2": torch.randn(10), "a1": torch.randn(10)})) ``` ## Root Cause Input spec is encoded as [TreeSpec](https://github.com/pytorch/pytorch/blob/582d278983b28a91ac0cedd035183f2495bb6887/torch/utils/_pytree.py#L1059) in torch export. With (args, kwargs) at the top level. When we call the exported model, it has a pre-execution [hook](https://github.com/pytorch/pytorch/blob/582d278983b28a91ac0cedd035183f2495bb6887/torch/export/_unlift.py#L66) to check the input TreeSpec matches the received TreeSpec, where in Treespec, the dict key order is preserved. Something like TreeSpec(dict, ['a2', 'a1'], [*,*]) To workaround this, the input check reorders [kwargs](https://github.com/pytorch/pytorch/blob/582d278983b28a91ac0cedd035183f2495bb6887/torch/export/_unlift.py#L67), that is why kwargs can be out of order. But the dict nested in the args is not re-ordered, so any re-ordering of the keys will throw errors. ## Solution Update eq_spec to handle the dict case, where we only guarantee that key set is the same without ordering constraints. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162618 Approved by: https://github.com/angelayi --- test/export/test_export.py | 31 +++++++++++++++++++++++++++++++ torch/export/_unlift.py | 6 +++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/test/export/test_export.py b/test/export/test_export.py index 2c466f162a893..3ec52775cf097 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -16605,6 +16605,37 @@ def forward(self, *args, **kwargs): wrapper = Wrapper(pyt_model, example_inputs) wrapper.forward() + def test_export_with_dict_input_nested_in_args(self): + """Test export with dictionary input nested in args.""" + + class MyModel(torch.nn.Module): + def __init__(self): + super(MyModel, self).__init__() + self.linear = torch.nn.Linear(10, 1) + + def forward(self, data_batch): + h1 = self.linear(data_batch["a1"]) + h2 = self.linear(data_batch["a2"]) + return h1 + h2 + + # Create model and example inputs + model = MyModel() + a1 = torch.randn(10) + a2 = torch.randn(10) + original_input = {"a1": a1, "a2": a2} + example_args_forward = (original_input,) + + # Export the model + exported_model = export(model, example_args_forward) + + # Run both models and compare results + reordered_input = {"a2": a2, "a1": a1} + original_output = exported_model.module()(reordered_input) + loaded_output = model(original_input) + + # Verify outputs are close (allowing for floating point differences) + torch.testing.assert_close(original_output, loaded_output) + def test_strict_export_with_shared_parameters(self): """Test that parameter names are preserved when there are shared parameters with the same name.""" diff --git a/torch/export/_unlift.py b/torch/export/_unlift.py index ae4c09b7c8c2f..59c5ade5824a6 100644 --- a/torch/export/_unlift.py +++ b/torch/export/_unlift.py @@ -51,7 +51,11 @@ def _match_normalized_structure(a, b): return True if _normalize_type(a.type) != _normalize_type(b.type): return False - if a.context != b.context: + if a.type is dict and b.type is dict: + # in the case of dict, the context is list of keys and we allow the keys to be in any order + if set(a.context) != set(b.context): + return False + elif a.context != b.context: return False if len(a.children_specs) != len(b.children_specs): return False From 66133b1ab7881b99405bddd3ec7773faa47457bf Mon Sep 17 00:00:00 2001 From: Huy Do Date: Sat, 13 Sep 2025 03:43:55 +0000 Subject: [PATCH 209/693] Build vLLM aarch64 nightly wheels (#162664) PyTorch has published its aarch64 nightly wheels for all CUDA version after https://github.com/pytorch/pytorch/pull/162364 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162664 Approved by: https://github.com/atalman --- .github/actionlint.yaml | 1 + .github/ci_configs/vllm/Dockerfile.tmp_vllm | 29 ++++++--------- .github/scripts/prepare_vllm_wheels.sh | 3 ++ .github/workflows/build-vllm-wheel.yml | 41 +++++++++++++++------ 4 files changed, 44 insertions(+), 30 deletions(-) diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml index 798dee312306d..d4a7df9d5805b 100644 --- a/.github/actionlint.yaml +++ b/.github/actionlint.yaml @@ -21,6 +21,7 @@ self-hosted-runner: - linux.arm64.2xlarge.ephemeral - linux.arm64.m7g.4xlarge - linux.arm64.m7g.4xlarge.ephemeral + - linux.arm64.r7g.12xlarge.memory - linux.4xlarge.nvidia.gpu - linux.8xlarge.nvidia.gpu - linux.16xlarge.nvidia.gpu diff --git a/.github/ci_configs/vllm/Dockerfile.tmp_vllm b/.github/ci_configs/vllm/Dockerfile.tmp_vllm index 2cee6ed2df19a..a1b68ad28210d 100644 --- a/.github/ci_configs/vllm/Dockerfile.tmp_vllm +++ b/.github/ci_configs/vllm/Dockerfile.tmp_vllm @@ -82,16 +82,10 @@ RUN if command -v apt-get >/dev/null; then \ apt-get update -y \ && apt-get install -y ccache software-properties-common git curl wget sudo vim; \ else \ - dnf install -y git curl wget sudo vim; \ + dnf install -y git curl wget sudo; \ fi \ && python3 --version && python3 -m pip --version -# Workaround for https://github.com/openai/triton/issues/2507 and -# https://github.com/pytorch/pytorch/issues/107960 -- hopefully -# this won't be needed for future versions of this docker image -# or future versions of triton. -RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ - # Install uv for faster pip installs if not existed RUN --mount=type=cache,target=/root/.cache/uv \ if ! python3 -m uv --version >/dev/null 2>&1; then \ @@ -220,11 +214,16 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0 RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=.git,target=.git \ if [ "$USE_SCCACHE" = "1" ]; then \ - echo "Installing sccache..." \ - && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \ + echo "Installing sccache..."; \ + if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + SCCACHE_ARCHIVE="sccache-v0.8.1-aarch64-unknown-linux-musl"; \ + else \ + SCCACHE_ARCHIVE="sccache-v0.8.1-x86_64-unknown-linux-musl"; \ + fi; \ + curl -L -o sccache.tar.gz "https://github.com/mozilla/sccache/releases/download/v0.8.1/${SCCACHE_ARCHIVE}.tar.gz" \ && tar -xzf sccache.tar.gz \ - && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \ - && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \ + && sudo mv "${SCCACHE_ARCHIVE}"/sccache /usr/bin/sccache \ + && rm -rf sccache.tar.gz "${SCCACHE_ARCHIVE}" \ && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \ && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \ && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \ @@ -285,7 +284,7 @@ RUN if command -v apt-get >/dev/null; then \ && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION}; \ else \ - dnf install -y git curl wget sudo vim; \ + dnf install -y git curl wget sudo; \ fi \ && python3 --version && python3 -m pip --version @@ -298,12 +297,6 @@ RUN echo "[INFO] Listing current directory before torch install step:" && \ echo "[INFO] Showing torch_build_versions.txt content:" && \ cat torch_build_versions.txt -# Workaround for https://github.com/openai/triton/issues/2507 and -# https://github.com/pytorch/pytorch/issues/107960 -- hopefully -# this won't be needed for future versions of this docker image -# or future versions of triton. -RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ - # Install uv for faster pip installs if not existed RUN --mount=type=cache,target=/root/.cache/uv \ if ! python3 -m uv --version > /dev/null 2>&1; then \ diff --git a/.github/scripts/prepare_vllm_wheels.sh b/.github/scripts/prepare_vllm_wheels.sh index a1cd387ef4a6f..62362c7ff207c 100755 --- a/.github/scripts/prepare_vllm_wheels.sh +++ b/.github/scripts/prepare_vllm_wheels.sh @@ -84,6 +84,9 @@ repackage_wheel() { rm -rf $package } +# Require to re-package the wheel +${PYTHON_EXECUTABLE} -mpip install wheel==0.45.1 + pushd externals/vllm/wheels for package in xformers flashinfer-python vllm; do repackage_wheel $package diff --git a/.github/workflows/build-vllm-wheel.yml b/.github/workflows/build-vllm-wheel.yml index 1c3b1cce46038..bc22ce8d32a0a 100644 --- a/.github/workflows/build-vllm-wheel.yml +++ b/.github/workflows/build-vllm-wheel.yml @@ -12,6 +12,9 @@ on: paths: - .github/workflows/build-vllm-wheel.yml - .github/ci_commit_pins/vllm.txt + schedule: + # every morning at 01:30PM UTC, 9:30AM EST, 6:30AM PST + - cron: 30 13 * * * concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} @@ -24,21 +27,33 @@ jobs: fail-fast: false matrix: python-version: [ '3.12' ] - # TODO (huydhn): Add cu130 https://github.com/pytorch/pytorch/pull/162000#issuecomment-3261541554 + # TODO (huydhn): Add cu130 after https://github.com/vllm-project/vllm/issues/24464 is resolved + platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ] device: [ 'cu128', 'cu129' ] - runner: [ 'linux.12xlarge.memory' ] include: - - device: cu128 + - platform: manylinux_2_28_x86_64 + device: cu128 manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.8' - - device: cu129 + runner: linux.12xlarge.memory + - platform: manylinux_2_28_x86_64 + device: cu129 manylinux-image: 'pytorch/manylinux2_28-builder:cuda12.9' - name: "Build ${{ matrix.device }} vLLM wheel" + runner: linux.12xlarge.memory + - platform: manylinux_2_28_aarch64 + device: cu128 + manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.8' + runner: linux.arm64.r7g.12xlarge.memory + - platform: manylinux_2_28_aarch64 + device: cu129 + manylinux-image: 'pytorch/manylinuxaarch64-builder:cuda12.9' + runner: linux.arm64.r7g.12xlarge.memory + name: "Build ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}" runs-on: ${{ matrix.runner }} timeout-minutes: 480 env: PY_VERS: ${{ matrix.python-version }} MANYLINUX_IMAGE: ${{ matrix.manylinux-image }} - PLATFORM: 'manylinux_2_28_x86_64' + PLATFORM: ${{ matrix.platform }} BUILD_DEVICE: ${{ matrix.device }} steps: - name: Setup SSH (Click me for login details) @@ -136,7 +151,7 @@ jobs: - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 with: - name: vllm-wheel-${{ matrix.device }}-${{ matrix.python-version }}-${{ env.PLATFORM }} + name: vllm-wheel-${{ matrix.device }}-${{ matrix.platform }}-${{ matrix.python-version }} if-no-files-found: error path: ${{ runner.temp }}/artifacts/externals/vllm/wheels/*.whl @@ -146,15 +161,17 @@ jobs: # Copied from build-triton-wheel workflow (mostly) upload-wheel: - name: "Upload ${{ matrix.device }} vLLM wheel" + name: "Upload ${{ matrix.device }} vLLM wheel on ${{ matrix.platform }}" needs: - build-wheel runs-on: ubuntu-latest strategy: fail-fast: false matrix: + platform: [ 'manylinux_2_28_x86_64', 'manylinux_2_28_aarch64' ] device: [ 'cu128', 'cu129' ] env: + PLATFORM: ${{ matrix.platform }} BUILD_DEVICE: ${{ matrix.device }} permissions: id-token: write @@ -190,15 +207,15 @@ jobs: run: | set -eux mkdir -p "${RUNNER_TEMP}/artifacts/" - mv "${RUNNER_TEMP}"/artifacts-all/vllm-wheel-"${BUILD_DEVICE}"-*/* "${RUNNER_TEMP}/artifacts/" + mv "${RUNNER_TEMP}"/artifacts-all/vllm-wheel-"${BUILD_DEVICE}"-"${PLATFORM}"-*/* "${RUNNER_TEMP}/artifacts/" - - name: Set DRY_RUN (only for tagged pushes) - if: ${{ github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) }} + - name: Set DRY_RUN + if: ${{ (github.event_name == 'push' && (github.event.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v'))) || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} shell: bash run: | echo "DRY_RUN=disabled" >> "$GITHUB_ENV" - - name: Set UPLOAD_CHANNEL (only for tagged pushes) + - name: Set UPLOAD_CHANNEL if: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') }} shell: bash run: | From 49d30f9a234f0816a1ece278c8450d119e417714 Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Sat, 13 Sep 2025 03:57:35 +0000 Subject: [PATCH 210/693] Fix boxcox to return same result for same input in one batch (#162772) Summary: The SIMD path is using SLEEF version of `pow` which is slightly different from `std::pow`. The fix is to use the same vectorized code (with partial load and store) for the trailing data as well to ensure consistency between results. Rollback Plan: Differential Revision: D82265247 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162772 Approved by: https://github.com/swolchok --- caffe2/perfkernels/batch_box_cox_vec.h | 34 +++++++++++++++++--------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/caffe2/perfkernels/batch_box_cox_vec.h b/caffe2/perfkernels/batch_box_cox_vec.h index ed2e83062d107..08e4f84fe4327 100644 --- a/caffe2/perfkernels/batch_box_cox_vec.h +++ b/caffe2/perfkernels/batch_box_cox_vec.h @@ -73,6 +73,19 @@ void box_cox_zero_lambda( } } +template +at::vec::Vectorized box_cox_nonzero_lambda_impl( + at::vec::Vectorized data, + at::vec::Vectorized lambda1, + at::vec::Vectorized lambda2, + at::vec::Vectorized k_eps) { + auto sum = data + lambda2; + auto max = at::vec::max(sum, k_eps); + auto lambda_over_1 = at::vec::fast_recieprocal(lambda1); + auto pow = max.pow(lambda1); + return at::vec::fmsub(pow, lambda_over_1, lambda_over_1); +} + template void box_cox_nonzero_lambda( int64_t D, @@ -88,21 +101,18 @@ void box_cox_nonzero_lambda( auto k_eps_vec = Vec(k_eps); for(; j + VLEN < D; j += VLEN) { auto data = Vec::loadu(data_ptr + j); - auto lambda2 = Vec::loadu(lambda2_ptr + j); - auto sum = data + lambda2; - auto max = at::vec::max(sum, k_eps_vec); auto lambda1 = Vec::loadu(lambda1_ptr + j); - auto lambda_over_1 = at::vec::fast_recieprocal(lambda1); - auto pow = max.pow(lambda1); - auto res = at::vec::fmsub(pow, lambda_over_1, lambda_over_1); + auto lambda2 = Vec::loadu(lambda2_ptr + j); + auto res = box_cox_nonzero_lambda_impl(data, lambda1, lambda2, k_eps_vec); res.store(out + j); } - for ( ;j < D; ++j) { - auto sum = data_ptr[j] + lambda2_ptr[j]; - auto max = std::max(sum, k_eps); - auto lambda_over_1 = at::vec::fast_recieprocal(lambda1_ptr[j]); - auto pow = std::pow(max, lambda1_ptr[j]); - out[j] = pow * lambda_over_1 - lambda_over_1; + if (j < D) { + auto remaining = D - j; + auto data = Vec::loadu(data_ptr + j, remaining); + auto lambda1 = Vec::loadu(lambda1_ptr + j, remaining); + auto lambda2 = Vec::loadu(lambda2_ptr + j, remaining); + auto res = box_cox_nonzero_lambda_impl(data, lambda1, lambda2, k_eps_vec); + res.store(out + j, remaining); } } #else From 783985e9fef021fa362ca6cd5705d1fd8c0a94a9 Mon Sep 17 00:00:00 2001 From: Georgia Phillips Date: Sat, 13 Sep 2025 03:57:40 +0000 Subject: [PATCH 211/693] kjt pytree registration (#161114) Differential Revision: D80656182 Pull Request resolved: https://github.com/pytorch/pytorch/pull/161114 Approved by: https://github.com/henryoier --- test/cpp/nativert/test_itree.cpp | 197 +++++++++++++++++++++++++++++++ torch/nativert/detail/ITree.cpp | 142 ++++++++++++++++++++++ 2 files changed, 339 insertions(+) diff --git a/test/cpp/nativert/test_itree.cpp b/test/cpp/nativert/test_itree.cpp index 4748c11c3e17a..15ff600fe73d5 100644 --- a/test/cpp/nativert/test_itree.cpp +++ b/test/cpp/nativert/test_itree.cpp @@ -4,6 +4,7 @@ #include #include +#include #include namespace torch::nativert::detail { @@ -1147,4 +1148,200 @@ TEST(ITreeTest, ToAtenType) { c10::TypeKind::AnyType); } +TEST(ITreeTest, KeyedJaggedTensorUnflatten) { + // Test KeyedJaggedTensor pytree node registration + // KeyedJaggedTensor has 6 tensor fields: _values, _weights, _lengths, + // _offsets, _stride_per_key_per_rank, _inverse_indices + auto jsonSpec = R"( +[ + 1, + { + "type": "torchrec.sparse.jagged_tensor.KeyedJaggedTensor", + "context": "[\"key1\", \"key2\"]", + "children_spec": [ + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + } + ] + } +] + )"; + + auto [graph, valuePtrs] = makeValues(6); + const auto spec = itreeSpecLoads(jsonSpec, valuePtrs); + + // Create mock tensor values for the 6 fields + std::vector flats = { + c10::IValue(1), // _values + c10::IValue(2), // _weights + c10::IValue(3), // _lengths + c10::IValue(4), // _offsets + c10::IValue(5), // _stride_per_key_per_rank + c10::IValue(6), // _inverse_indices tensor part + }; + + // Test unflatten - this will create a generic tuple since we don't have + // the actual KeyedJaggedTensor constructor available in tests + auto itree = itreeUnflatten(flats, spec); + EXPECT_TRUE(itree.isTuple()); + EXPECT_EQ(itree.toTupleRef().elements().size(), 6); + + // Verify the values match what we put in + for (size_t i = 0; i < 6; i++) { + EXPECT_EQ(itree.toTupleRef().elements()[i], flats[i]); + } + + // Verify spec has correct number of children and structure + EXPECT_EQ(spec.children().size(), 6); + EXPECT_EQ(spec.numIValues(), 6); + EXPECT_FALSE(spec.isIValue()); + EXPECT_EQ( + spec.uniformName(), "torchrec.sparse.jagged_tensor.KeyedJaggedTensor"); +} + +TEST(ITreeTest, KeyedJaggedTensorNodeRegistration) { + // Test that KeyedJaggedTensor pytree node is properly registered + + // Verify the KeyedJaggedTensor node is in the registry by attempting + // to load a spec that references it + auto jsonSpec = R"( +[ + 1, + { + "type": "torchrec.sparse.jagged_tensor.KeyedJaggedTensor", + "context": "[\"key1\", \"key2\"]", + "children_spec": [ + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + } + ] + } +] + )"; + + auto [graph, valuePtrs] = makeValues(6); + + // This should not throw - if KeyedJaggedTensor wasn't registered, + // we'd get an exception about "Unknown pytree node type" + EXPECT_NO_THROW({ + const auto spec = itreeSpecLoads(jsonSpec, valuePtrs); + + // Verify the spec loaded correctly + EXPECT_FALSE(spec.isIValue()); + EXPECT_EQ( + spec.uniformName(), "torchrec.sparse.jagged_tensor.KeyedJaggedTensor"); + EXPECT_EQ(spec.children().size(), 6); + EXPECT_EQ(spec.numIValues(), 6); + + // Verify context is parsed correctly + EXPECT_FALSE(spec.context().is_null()); + EXPECT_TRUE(spec.context().is_array()); + EXPECT_EQ(spec.context().size(), 2); + }); +} + +TEST(ITreeTest, JaggedTensorNodeRegistration) { + // Test that JaggedTensor pytree node is also properly registered + + auto jsonSpec = R"( +[ + 1, + { + "type": "torchrec.sparse.jagged_tensor.JaggedTensor", + "context": "null", + "children_spec": [ + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + }, + { + "type": null, + "context": null, + "children_spec": [] + } + ] + } +] + )"; + + auto [graph, valuePtrs] = makeValues(4); + + // This should not throw - if JaggedTensor wasn't registered, + // we'd get an exception about "Unknown pytree node type" + EXPECT_NO_THROW({ + const auto spec = itreeSpecLoads(jsonSpec, valuePtrs); + + // Verify the spec loaded correctly + EXPECT_FALSE(spec.isIValue()); + EXPECT_EQ(spec.uniformName(), "torchrec.sparse.jagged_tensor.JaggedTensor"); + EXPECT_EQ(spec.children().size(), 4); + EXPECT_EQ(spec.numIValues(), 4); + }); +} + } // namespace torch::nativert::detail diff --git a/torch/nativert/detail/ITree.cpp b/torch/nativert/detail/ITree.cpp index cd24ca78320fb..b24ee65f162b6 100644 --- a/torch/nativert/detail/ITree.cpp +++ b/torch/nativert/detail/ITree.cpp @@ -172,6 +172,148 @@ class PytreeNodeRegistry { registerNode( "torch.fx.immutable_collections.immutable_dict", getNodeDef("builtins.dict")); + // Register JaggedTensor pytree node + registerNode( + "torchrec.sparse.jagged_tensor.JaggedTensor", + NodeDef{ + [](const c10::IValue& nested, + const ITreeSpec& spec, + std::vector& ivalues) { + // JaggedTensor has 4 fields: _values, _weights, _lengths, + // _offsets All fields are optional torch.Tensor except _values + TORCH_CHECK(nested.isObject(), "Expected JaggedTensor object"); + const auto& obj = nested.toObjectRef(); + + // Extract the tensor fields in order: _values, _weights, + // _lengths, _offsets + TORCH_CHECK( + spec.children().size() == 4, + "JaggedTensor should have 4 children"); + + // Flatten each tensor field + itreeFlatten(obj.getAttr("_values"), spec.children(0), ivalues); + itreeFlatten(obj.getAttr("_weights"), spec.children(1), ivalues); + itreeFlatten(obj.getAttr("_lengths"), spec.children(2), ivalues); + itreeFlatten(obj.getAttr("_offsets"), spec.children(3), ivalues); + }, + [](std::vector flats, + const nlohmann::json& obj) -> c10::IValue { + // Reconstruct JaggedTensor from flattened tensors + // This is a simplified reconstruction - in practice would need + // to call the actual JaggedTensor constructor + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(obj.is_null()); + TORCH_CHECK( + flats.size() == 4, "JaggedTensor expects 4 tensor fields"); + + // Return a generic tuple for now - actual implementation would + // need to construct the JaggedTensor custom class + return c10::ivalue::Tuple::create(std::move(flats)); + }, + [](ITreeMapNoReturnFn fn, + const c10::IValue& nested, + const ITreeSpec& spec) { + TORCH_CHECK(nested.isObject(), "Expected JaggedTensor object"); + const auto& obj = nested.toObjectRef(); + + TORCH_CHECK( + spec.children().size() == 4, + "JaggedTensor should have 4 children"); + + // Apply function to each tensor field + ivalueApply(fn, obj.getAttr("_values"), spec.children(0)); + ivalueApply(fn, obj.getAttr("_weights"), spec.children(1)); + ivalueApply(fn, obj.getAttr("_lengths"), spec.children(2)); + ivalueApply(fn, obj.getAttr("_offsets"), spec.children(3)); + }}); + + // Register KeyedJaggedTensor pytree node + registerNode( + "torchrec.sparse.jagged_tensor.KeyedJaggedTensor", + NodeDef{ + [](const c10::IValue& nested, + const ITreeSpec& spec, + std::vector& ivalues) { + // KeyedJaggedTensor has 6 tensor fields plus keys context + // Fields: _values, _weights, _lengths, _offsets, + // _stride_per_key_per_rank, _inverse_indices tensor + TORCH_CHECK( + nested.isObject(), "Expected KeyedJaggedTensor object"); + const auto& obj = nested.toObjectRef(); + + // Extract the tensor fields in order + TORCH_CHECK( + spec.children().size() == 6, + "KeyedJaggedTensor should have 6 children"); + + // Flatten each tensor field + itreeFlatten(obj.getAttr("_values"), spec.children(0), ivalues); + itreeFlatten(obj.getAttr("_weights"), spec.children(1), ivalues); + itreeFlatten(obj.getAttr("_lengths"), spec.children(2), ivalues); + itreeFlatten(obj.getAttr("_offsets"), spec.children(3), ivalues); + itreeFlatten( + obj.getAttr("_stride_per_key_per_rank"), + spec.children(4), + ivalues); + // For _inverse_indices, we need to extract the tensor part + // (second element of tuple) + auto inverse_indices = obj.getAttr("_inverse_indices"); + if (!inverse_indices.isNone()) { + auto tuple = inverse_indices.toTuple(); + itreeFlatten(tuple->elements()[1], spec.children(5), ivalues); + } else { + // Handle None case by adding a null tensor + itreeFlatten(c10::IValue(), spec.children(5), ivalues); + } + }, + [](std::vector flats, + const nlohmann::json& obj) -> c10::IValue { + // Reconstruct KeyedJaggedTensor from flattened tensors and keys + // context + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!obj.is_null()); + TORCH_CHECK( + flats.size() == 6, + "KeyedJaggedTensor expects 6 tensor fields"); + + // The context should contain the keys list + // Return a generic tuple for now - actual implementation would + // need to construct the KeyedJaggedTensor custom class + return c10::ivalue::Tuple::create(std::move(flats)); + }, + [](ITreeMapNoReturnFn fn, + const c10::IValue& nested, + const ITreeSpec& spec) { + TORCH_CHECK( + nested.isObject(), "Expected KeyedJaggedTensor object"); + const auto& obj = nested.toObjectRef(); + + TORCH_CHECK( + spec.children().size() == 6, + "KeyedJaggedTensor should have 6 children"); + + // Apply function to each tensor field + ivalueApply(fn, obj.getAttr("_values"), spec.children(0)); + ivalueApply(fn, obj.getAttr("_weights"), spec.children(1)); + ivalueApply(fn, obj.getAttr("_lengths"), spec.children(2)); + ivalueApply(fn, obj.getAttr("_offsets"), spec.children(3)); + ivalueApply( + fn, + obj.getAttr("_stride_per_key_per_rank"), + spec.children(4)); + // For _inverse_indices, we need to apply to the tensor part + // (second element of tuple) + auto inverse_indices = obj.getAttr("_inverse_indices"); + if (!inverse_indices.isNone()) { + auto tuple = inverse_indices.toTuple(); + ivalueApply(fn, tuple->elements()[1], spec.children(5)); + } else { + // Handle None case + ivalueApply(fn, c10::IValue(), spec.children(5)); + } + }, + [](std::string_view context) { + // Context contains the keys list as JSON + return nlohmann::json::parse(context); + }}); } bool hasNodeDef(std::string_view typeName) const { return registry_.find(std::string{typeName}) != registry_.end(); From da669d51bf163611bed720e0fe8dd8f4386ef46c Mon Sep 17 00:00:00 2001 From: Xuan Zhang Date: Fri, 12 Sep 2025 14:19:24 -0700 Subject: [PATCH 212/693] fusion of large accumulated reads only at ir level (#161978) This is to revert some of the changes in https://github.com/pytorch/pytorch/pull/158667 In particular, we only disallow fusion of large accumulate read at IR level and not at scheduler level, as users can create their own custom fusion logics for the scheduler level. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161978 Approved by: https://github.com/yf225 --- test/inductor/test_memory.py | 27 +++++++++++++++++++++++++++ torch/_inductor/choices.py | 11 ----------- torch/_inductor/ir.py | 9 ++++++--- torch/_inductor/scheduler.py | 8 -------- 4 files changed, 33 insertions(+), 22 deletions(-) diff --git a/test/inductor/test_memory.py b/test/inductor/test_memory.py index 80372bca9fdca..f905990478f7d 100644 --- a/test/inductor/test_memory.py +++ b/test/inductor/test_memory.py @@ -353,6 +353,33 @@ def f(x, y, z): y = torch.rand(N, N, dtype=torch.float32, device=GPU_TYPE) z = torch.rand(N, N, dtype=torch.float32, device=GPU_TYPE) + from torch._inductor.choices import InductorChoices + from torch._inductor.scheduler import BaseSchedulerNode, Scheduler + + class CustomInductorChoices(InductorChoices): + @staticmethod + def can_fuse( + scheduler: Scheduler, + node1: BaseSchedulerNode, + node2: BaseSchedulerNode, + shared_data_score: int, + ) -> bool: + can_fuse_default = InductorChoices.can_fuse( + scheduler, node1, node2, shared_data_score + ) + if (not can_fuse_default) or ( + not config.realize_acc_reads_size_threshold + ): + return can_fuse_default + + all_reads = (node1.read_writes.reads | node2.read_writes.reads) - ( + node1.read_writes.writes | node2.read_writes.writes + ) + size_of_reads = [scheduler.dep_size_hint(dep) for dep in all_reads] + return sum(size_of_reads) < config.realize_acc_reads_size_threshold + + torch._inductor.virtualized.V.set_choices_handler(CustomInductorChoices()) + # CASE 1: no restriction on the amount of accumulation with config.patch({"realize_acc_reads_size_threshold": float("inf")}): f_compiled = torch.compile(f) diff --git a/torch/_inductor/choices.py b/torch/_inductor/choices.py index f35b5e254d013..ed272cc9cb362 100644 --- a/torch/_inductor/choices.py +++ b/torch/_inductor/choices.py @@ -496,17 +496,6 @@ def can_fuse( WhyNoFuse(node1, node2)("Fusion will increase peak memory") return False - if ( - config.realize_acc_reads_size_threshold is not None - and scheduler.fusion_accumulate_large_reads( - node1, - node2, - config.realize_acc_reads_size_threshold, - ) - ): - WhyNoFuse(node1, node2)("Fusion accumulate large amount of reads") - return False - return True @staticmethod diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index 427ca54544a31..9aece7015b970 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -8207,9 +8207,12 @@ def realize_hint(self) -> None: self.realize() def has_accumulated_enough_reads_by_size(self, threshold: int) -> bool: - return ( - sum(V.graph.get_dep_size_hint(dep) for dep in self.get_reads()) > threshold - ) + size_of_reads = [V.graph.get_dep_size_hint(dep) for dep in self.get_reads()] + if not size_of_reads: + return False + total_size = sum(size_of_reads) + max_size = max(size_of_reads) + return total_size > threshold and total_size / max_size >= 2 def has_exceeded_max_reads(self) -> bool: return isinstance(self.data, Pointwise) and ( diff --git a/torch/_inductor/scheduler.py b/torch/_inductor/scheduler.py index 7badacee1a7dd..ab29b0d0cf2c1 100644 --- a/torch/_inductor/scheduler.py +++ b/torch/_inductor/scheduler.py @@ -3807,14 +3807,6 @@ def _find_single_user_inputs( return True return False - def fusion_accumulate_large_reads( - self, node1: BaseSchedulerNode, node2: BaseSchedulerNode, threshold: int - ) -> bool: - all_reads = (node1.read_writes.reads | node2.read_writes.reads) - ( - node1.read_writes.writes | node2.read_writes.writes - ) - return sum(self.dep_size_hint(dep) for dep in all_reads) > threshold - def are_long_distant_nodes( self, node1: BaseSchedulerNode, node2: BaseSchedulerNode ) -> bool: From 56639104721219912e1cd66ef68e9a85322f4887 Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Sat, 13 Sep 2025 04:16:48 +0000 Subject: [PATCH 213/693] [vllm hash update] update the pinned vllm hash (#162751) This PR is auto-generated nightly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/nightly.yml). Update the pinned vllm hash. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162751 Approved by: https://github.com/pytorchbot --- .github/ci_commit_pins/vllm.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ci_commit_pins/vllm.txt b/.github/ci_commit_pins/vllm.txt index 450e7ae4d2adb..c027f16bad9f1 100644 --- a/.github/ci_commit_pins/vllm.txt +++ b/.github/ci_commit_pins/vllm.txt @@ -1 +1 @@ -f510715882304796a96e33028b4f6de1b026c2c7 +4fdd6f5cbf877de7c4de33086fe41bb0ac1d3cf3 From f0fcf436c5f672f7e841fdd5943ea0d8c41543d0 Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Sat, 13 Sep 2025 04:17:17 +0000 Subject: [PATCH 214/693] [audio hash update] update the pinned audio hash (#162864) This PR is auto-generated nightly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/nightly.yml). Update the pinned audio hash. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162864 Approved by: https://github.com/pytorchbot --- .github/ci_commit_pins/audio.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ci_commit_pins/audio.txt b/.github/ci_commit_pins/audio.txt index f18d293fe6285..530491f7e2feb 100644 --- a/.github/ci_commit_pins/audio.txt +++ b/.github/ci_commit_pins/audio.txt @@ -1 +1 @@ -caba63f0fa29ef9e3d566699f32f11c07c8bda4e +0e72a50dd8818d28e363d39de806e0f54d719a45 From aa41d3e49cbfef8117693ab80ec1ad57accfcb41 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Fri, 12 Sep 2025 09:59:27 -0400 Subject: [PATCH 215/693] Claude loves making these files in top level, ignore them for sanity. (#162806) Signed-off-by: Edward Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/162806 Approved by: https://github.com/albanD --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index d1fa4cd3caf28..2dd40f8cfa853 100644 --- a/.gitignore +++ b/.gitignore @@ -389,3 +389,5 @@ android/pytorch_android_torchvision/.cxx # Claude Code local configuration CLAUDE.local.md +/test_*.py +/debug_*.py From 31040b6357e2e6d3d3ff5de08ddd7cecf28766cc Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Sat, 13 Sep 2025 07:22:14 +0000 Subject: [PATCH 216/693] Revert "port some distributed tensor test files for Intel GPU (#161703)" This reverts commit 179f10621b418427fc6e92f58ea2b0bbe4cc9c52. Reverted https://github.com/pytorch/pytorch/pull/161703 on behalf of https://github.com/huydhn due to Sorry for reverting your change but these tests are failing internally ([comment](https://github.com/pytorch/pytorch/pull/161703#issuecomment-3287720713)) --- .../tensor/debug/test_comm_mode.py | 11 ++++--- test/distributed/tensor/test_dtensor.py | 14 ++++----- .../tensor/test_dtensor_compile.py | 30 +++++++++---------- test/distributed/tensor/test_redistribute.py | 4 ++- test/distributed/tensor/test_tensor_ops.py | 2 +- .../distributed/_tensor/common_dtensor.py | 2 +- 6 files changed, 31 insertions(+), 32 deletions(-) diff --git a/test/distributed/tensor/debug/test_comm_mode.py b/test/distributed/tensor/debug/test_comm_mode.py index ca2d6b7c77ac1..c87164750c684 100644 --- a/test/distributed/tensor/debug/test_comm_mode.py +++ b/test/distributed/tensor/debug/test_comm_mode.py @@ -6,7 +6,7 @@ import torch.nn as nn from torch.distributed.tensor import DeviceMesh, DTensor, Shard from torch.distributed.tensor.debug import CommDebugMode -from torch.testing._internal.common_distributed import requires_accelerator_dist_backend +from torch.testing._internal.common_distributed import requires_nccl from torch.testing._internal.common_utils import run_tests, TestCase from torch.testing._internal.distributed._tensor.common_dtensor import MLPModule from torch.testing._internal.distributed.fake_pg import FakeStore @@ -14,7 +14,6 @@ c10d_functional = torch.ops.c10d_functional c10d_ops = torch.ops.c10d -device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" class TestCommMode(TestCase): @@ -29,7 +28,7 @@ def setUp(self): dist.init_process_group( backend="fake", rank=1, world_size=self.world_size, store=store ) - self.device_type = device_type + self.device_type = "cuda" if torch.cuda.is_available() else "cpu" self.world_pg = dist.distributed_c10d._get_default_group() def checksAssert(self, comm_mode, key, expected_value, expected_total_value): @@ -112,12 +111,12 @@ def f(x, y): self.assertEqual(comm_counts[c10d_functional.all_gather_into_tensor], 1) self.assertEqual(comm_counts[c10d_functional.reduce_scatter_tensor], 0) - @requires_accelerator_dist_backend(["nccl", "xccl"]) + @requires_nccl() def test_comm_mode_with_c10d(self): - if not torch.accelerator.is_available(): + if not torch.cuda.is_available(): return - inp = torch.rand(2, 8, 16).to(device_type) + inp = torch.rand(2, 8, 16).cuda() all_gather_out = inp.new_empty(self.world_size * 2, 8, 16) comm_mode = CommDebugMode() diff --git a/test/distributed/tensor/test_dtensor.py b/test/distributed/tensor/test_dtensor.py index 19225864e67d6..083f6d459c7e0 100644 --- a/test/distributed/tensor/test_dtensor.py +++ b/test/distributed/tensor/test_dtensor.py @@ -616,11 +616,11 @@ def sub_mesh_assert_equal(self, mesh, exp_in_mesh, exp_out_of_mesh, tensor): @with_comms def test_dtensor_device_mesh_device_conversion(self): - # construct a gpu device mesh + # construct a cuda device mesh mesh = self.build_device_mesh() - # construct from a cpu local tensor with gpu device mesh - # should automatically convert the dist tensor to gpu + # construct from a cpu local tensor with cuda device mesh + # should automatically convert the dist tensor to cuda placements = [Shard(0)] local_tensor = torch.randn(3, 3) dist_tensor = DTensor.from_local(local_tensor, mesh, placements) @@ -669,7 +669,7 @@ def test_dtensor_api_device_mesh_context_manager(self): @with_comms def test_dtensor_2d_mesh(self): mesh_tensor = torch.arange(self.world_size).reshape(2, 4) - # construct a gpu device mesh + # construct a cuda device mesh mesh = DeviceMesh(self.device_type, mesh_tensor) # construct a dist tensor on 2d device mesh and test if works @@ -691,7 +691,7 @@ def test_dtensor_2d_mesh(self): @with_comms def test_device_mesh_nd(self): - # construct a gpu device mesh + # construct a cuda device mesh mesh_tensor = torch.arange(self.world_size).reshape(2, 2, 2) mesh = DeviceMesh(self.device_type, mesh_tensor) # construct a dist tensor on 3d device mesh and test if works @@ -953,8 +953,8 @@ def _create_tensor(self, size): # Keep everything deterministic. torch.manual_seed(0) tensor = torch.rand(size) - if self.device_type != "cpu": - return tensor.to(self.device_type) + if self.device_type == "cuda": + return tensor.cuda() else: return tensor diff --git a/test/distributed/tensor/test_dtensor_compile.py b/test/distributed/tensor/test_dtensor_compile.py index 4a94f50362349..fa4c217716b2c 100644 --- a/test/distributed/tensor/test_dtensor_compile.py +++ b/test/distributed/tensor/test_dtensor_compile.py @@ -39,7 +39,6 @@ RowwiseParallel, ) from torch.distributed.tensor.placement_types import _StridedShard -from torch.testing._internal.common_device_type import skipXPUIf from torch.testing._internal.common_distributed import skip_if_lt_x_gpu from torch.testing._internal.common_fsdp import get_devtype from torch.testing._internal.common_utils import ( @@ -48,6 +47,8 @@ run_tests, skipIfHpu, skipIfTorchDynamo, + TEST_CUDA, + TEST_HPU, ) from torch.testing._internal.distributed._tensor.common_dtensor import ( DTensorTestBase, @@ -94,8 +95,6 @@ def extract_graph(fx_g, _, graph_cell): partition_fn=min_cut_rematerialization_partition, ) -device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" - def _apply_sharding(mod: nn.Module, shard_dim: int, device_mesh: DeviceMesh): """ @@ -142,7 +141,7 @@ def tearDown(self): @property def device_type(self) -> str: - return device_type + return "cuda" if TEST_CUDA else "hpu" if TEST_HPU else "cpu" @property def world_size(self) -> int: @@ -161,9 +160,9 @@ def fn(x): res = fn(x) res.to_local().sum().backward() - @unittest.skipIf(not torch.accelerator.is_available(), "accelerator not available") + @unittest.skipIf(not TEST_CUDA, "CUDA not available") def test_dtensor_basic_export(self): - mesh = DeviceMesh(self.device_type, torch.arange(self.world_size)) + mesh = DeviceMesh("cuda", torch.arange(self.world_size)) param = torch.randn(4, 4) param_x = DTensor.from_local(param, mesh, [Shard(0)], run_check=False) @@ -189,10 +188,10 @@ def forward(self, x): ) self.assertExpectedInline( str(ep.graph_module.code).strip(), - f"""\ + """\ def forward(self, b_buffer, x): _assert_tensor_metadata_default = torch.ops.aten._assert_tensor_metadata.default(x, dtype = torch.float64, device = device(type='cpu'), layout = torch.strided); _assert_tensor_metadata_default = None - to = torch.ops.aten.to.dtype_layout(x, dtype = torch.float64, layout = torch.strided, device = device(type='{self.device_type}')); x = None + to = torch.ops.aten.to.dtype_layout(x, dtype = torch.float64, layout = torch.strided, device = device(type='cuda')); x = None view_as = torch.ops.aten.view_as.default(to, to); to = None dtensor___init__0 = self.dtensor___init__0 dtensor_const_func_spec0 = self.dtensor_const_func_spec0 @@ -207,10 +206,10 @@ def forward(self, b_buffer, x): # add is performed in _propagate_tensor_meta_non_cached, hence add_1 instead of add self.assertExpectedInline( str(ep.run_decompositions({}).graph_module.code).strip(), - f"""\ + """\ def forward(self, b_parametrizations_buffer_original0, x): _assert_tensor_metadata = torch.ops.aten._assert_tensor_metadata.default(x, None, None, torch.float64, device = device(type='cpu'), layout = torch.strided); _assert_tensor_metadata = None - _to_copy = torch.ops.aten._to_copy.default(x, dtype = torch.float64, layout = torch.strided, device = device(type='{self.device_type}', index=0)); x = None + _to_copy = torch.ops.aten._to_copy.default(x, dtype = torch.float64, layout = torch.strided, device = device(type='cuda', index=0)); x = None view = torch.ops.aten.view.default(_to_copy, [4, 4]); _to_copy = None add_1 = torch.ops.aten.add.Tensor(b_parametrizations_buffer_original0, view); b_parametrizations_buffer_original0 = view = None view_1 = torch.ops.aten.view.default(add_1, [4, 4]); add_1 = None @@ -340,7 +339,6 @@ def fn(x): self.assertEqual(res, ref) @skipIfHpu - @skipXPUIf(True, "https://github.com/intel/torch-xpu-ops/issues/1981") def test_dtensor_dynamic_loss_parallel_log_softmax(self): mesh = DeviceMesh(self.device_type, torch.arange(self.world_size)) @@ -716,13 +714,13 @@ def fn(x, y, z): out = layer_norm.permute(0, 2, 1) return out - x = torch.randn(4, 2, 4, requires_grad=True, device=self.device_type) + x = torch.randn(4, 2, 4, requires_grad=True, device="cuda") x_dt = DTensor.from_local(x, mesh, [Shard(1)], run_check=False) - y = torch.randn(4, requires_grad=True, device=self.device_type) + y = torch.randn(4, requires_grad=True, device="cuda") y_dt = DTensor.from_local(y, mesh, [Replicate()], run_check=False) - z = torch.randn(4, requires_grad=True, device=self.device_type) + z = torch.randn(4, requires_grad=True, device="cuda") z_dt = DTensor.from_local(z, mesh, [Replicate()], run_check=False) opt_fn = torch.compile(fn, backend="inductor", fullgraph=True) @@ -820,7 +818,7 @@ def test_dtensor_dynamo_device_mesh_attrs(self): # pass in tensor as inputs/outputs, create DTensor and run redistribute # (allgather collective) inside the fn def fn(x_dt): - if x_dt.device_mesh.device_type == f"{self.device_type}": + if x_dt.device_mesh.device_type == "cuda": return x_dt + 1 else: return x_dt + 2 @@ -949,7 +947,7 @@ def forward(self, input): model = FakeTransformer().to(self.device_type) - tp_mesh = init_device_mesh(self.device_type, (2,), mesh_dim_names=("tp",)) + tp_mesh = init_device_mesh("cuda", (2,), mesh_dim_names=("tp",)) # apply sequence parallel parallel_plan = { diff --git a/test/distributed/tensor/test_redistribute.py b/test/distributed/tensor/test_redistribute.py index b385b92f960e6..fe07b0dd6a241 100644 --- a/test/distributed/tensor/test_redistribute.py +++ b/test/distributed/tensor/test_redistribute.py @@ -19,6 +19,8 @@ instantiate_parametrized_tests, parametrize, run_tests, + TEST_CUDA, + TEST_HPU, ) from torch.testing._internal.distributed._tensor.common_dtensor import ( DTensorTestBase, @@ -517,7 +519,7 @@ def test_redistribute_shard_dim_change(self, dtype): local_out_dt = out_dt.to_local() local_expected_dt = expected_dt.to_local() self.assertEqual(out_dt.to_local(), expected_dt.to_local()) - if torch.accelerator.is_available(): + if TEST_HPU or TEST_CUDA: self.assertEqual( comm_mode.get_comm_counts()[ torch.ops._dtensor.shard_dim_alltoall diff --git a/test/distributed/tensor/test_tensor_ops.py b/test/distributed/tensor/test_tensor_ops.py index 1e117353da28d..eaa1969068c1f 100644 --- a/test/distributed/tensor/test_tensor_ops.py +++ b/test/distributed/tensor/test_tensor_ops.py @@ -295,8 +295,8 @@ def test_zeros_like(self): self.assertEqual(dist_tensor.dtype, torch.float32) self.assertEqual(zeros_like_dt.dtype, torch.bfloat16) - @skip_if_lt_x_gpu(4) @with_comms + @skip_if_lt_x_gpu(4) def test_stack(self): mesh_2d = DeviceMesh( self.device_type, torch.arange(self.world_size).reshape(2, 2) diff --git a/torch/testing/_internal/distributed/_tensor/common_dtensor.py b/torch/testing/_internal/distributed/_tensor/common_dtensor.py index 92a232fd9b0db..e25e08fbf5090 100644 --- a/torch/testing/_internal/distributed/_tensor/common_dtensor.py +++ b/torch/testing/_internal/distributed/_tensor/common_dtensor.py @@ -367,7 +367,7 @@ def device_type(self) -> str: @property def backend(self) -> str: - backend = dist.get_default_backend_for_device(self.device_type) + backend = dist.get_default_backend_for_device(DEVICE_TYPE) return backend def build_device_mesh(self) -> DeviceMesh: From 9c93dc8123005c75494a4c3cd46532a75820da70 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Sat, 13 Sep 2025 07:42:12 +0000 Subject: [PATCH 217/693] Revert "Return NoOpDeviceGuardImpl in replace of CudaDeviceGuard when device is not available, or cpu-only build (#160532)" This reverts commit a956c4ab1cb13079203a8f07eb26218724f54dc8. Reverted https://github.com/pytorch/pytorch/pull/160532 on behalf of https://github.com/huydhn due to Reverted internally ([comment](https://github.com/pytorch/pytorch/pull/160532#issuecomment-3287745165)) --- c10/core/impl/DeviceGuardImplInterface.cpp | 24 ---------- c10/core/impl/DeviceGuardImplInterface.h | 5 +- test/export/test_export_opinfo.py | 56 ++++------------------ torch/_C/__init__.pyi.in | 1 - torch/_subclasses/fake_tensor.py | 7 --- torch/csrc/Module.cpp | 18 +------ 6 files changed, 11 insertions(+), 100 deletions(-) diff --git a/c10/core/impl/DeviceGuardImplInterface.cpp b/c10/core/impl/DeviceGuardImplInterface.cpp index 1fb78aa443e3f..015bcd3e64fb3 100644 --- a/c10/core/impl/DeviceGuardImplInterface.cpp +++ b/c10/core/impl/DeviceGuardImplInterface.cpp @@ -1,5 +1,4 @@ #include -#include #include namespace c10::impl { @@ -15,27 +14,4 @@ DeviceGuardImplRegistrar::DeviceGuardImplRegistrar( device_guard_impl_registry[static_cast(type)].store(impl); } -namespace { -thread_local std::unique_ptr tls_fake_device_guard = - nullptr; -} - -void ensureCUDADeviceGuardSet() { - constexpr auto cuda_idx = static_cast(DeviceType::CUDA); - - const DeviceGuardImplInterface* p = - device_guard_impl_registry[cuda_idx].load(); - - // A non-null `ptr` indicates that CUDA is already available. - if (p == nullptr || (p && p->deviceCount() == 0)) { - // In following cases, we override CUDA guard interface with a no-op - // device guard. - // 1. p == nullptr; Trying to get a cuda device guard on a cpu-only build. - // 2. p->deviceCount() == 0; cuda build enabled, but no cuda devices - // available. - tls_fake_device_guard = std::make_unique>(); - device_guard_impl_registry[cuda_idx].store(tls_fake_device_guard.get()); - } -} - } // namespace c10::impl diff --git a/c10/core/impl/DeviceGuardImplInterface.h b/c10/core/impl/DeviceGuardImplInterface.h index fc8c367f75e8c..523e9ad9f45fa 100644 --- a/c10/core/impl/DeviceGuardImplInterface.h +++ b/c10/core/impl/DeviceGuardImplInterface.h @@ -6,7 +6,6 @@ #include // Just for C10_ANONYMOUS_VARIABLE -#include #include #include @@ -252,7 +251,7 @@ struct C10_API DeviceGuardImplInterface { // for devices that don't actually have a concept of device index. Prominent // examples are CPU and Meta. template -struct NoOpDeviceGuardImpl : public DeviceGuardImplInterface { +struct NoOpDeviceGuardImpl final : public DeviceGuardImplInterface { NoOpDeviceGuardImpl() = default; DeviceType type() const override { return D; @@ -372,7 +371,5 @@ inline bool hasDeviceGuardImpl(DeviceType type) { return device_guard_impl_registry[static_cast(type)].load(); } -void C10_API ensureCUDADeviceGuardSet(); - } // namespace impl } // namespace c10 diff --git a/test/export/test_export_opinfo.py b/test/export/test_export_opinfo.py index 24e2f71ff4354..35d8b2895bd83 100644 --- a/test/export/test_export_opinfo.py +++ b/test/export/test_export_opinfo.py @@ -3,7 +3,6 @@ # flake8: noqa import itertools -import unittest import torch from torch._subclasses.fake_tensor import FakeTensor, FakeTensorMode @@ -51,11 +50,17 @@ xfail("masked.std"), xfail("masked.sum"), xfail("masked.var"), + xfail("nn.functional.grid_sample"), xfail("to_sparse"), # cannot xfail as it is passing for cpu-only build - skip("nn.functional.grid_sample"), skip("nn.functional.conv2d"), skip("nn.functional.scaled_dot_product_attention"), + # following are failing due to OptionalDeviceGuard + xfail("__getitem__"), + xfail("nn.functional.batch_norm"), + xfail("nn.functional.instance_norm"), + xfail("nn.functional.multi_margin_loss"), + xfail("nonzero"), } fake_decomposition_failures = { @@ -123,52 +128,9 @@ class TestExportOpInfo(TestCase): def test_fake_export(self, device, dtype, op): _test_export_helper(self, dtype, op) - @unittest.skipIf(not torch.backends.cuda.is_built(), "requires CUDA build") - def test_preserve_original_behavior(self): - def cuda_calls_behavior_unchanged(): - cpu_x = torch.randn(2) - with self.assertRaisesRegex( - RuntimeError, "Found no NVIDIA driver on your system." - ): - cuda_x = cpu_x.to("cuda") - - with self.assertRaisesRegex( - RuntimeError, "Found no NVIDIA driver on your system." - ): - torch.randn(2, device="cuda") - - with self.assertRaisesRegex( - RuntimeError, "Found no NVIDIA driver on your system." - ): - torch.cuda.get_device_capability() - - with self.assertRaisesRegex( - RuntimeError, "Found no NVIDIA driver on your system." - ): - torch.cuda.set_device(1) - - with self.assertRaisesRegex( - RuntimeError, "Found no NVIDIA driver on your system." - ): - torch.cuda.current_device() - - self.assertEqual(torch.cuda.is_available(), False) - self.assertEqual(torch.cuda.device_count(), 0) - - cuda_calls_behavior_unchanged() - - cpu_x = torch.randn(2) - with FakeTensorMode(allow_non_fake_inputs=True) as mode: - cuda_x = mode.from_tensor(cpu_x) - cuda_x.fake_device = torch.device("cuda") - cuda_y = cuda_x + cuda_x - self.assertEqual(cuda_y.device.type, "cuda") - - # should fail again after exiting the fake mode, with the identical error message - cuda_calls_behavior_unchanged() - -instantiate_device_type_tests(TestExportOpInfo, globals(), only_for="cpu") +only_for = "cpu" +instantiate_device_type_tests(TestExportOpInfo, globals(), only_for=only_for) if __name__ == "__main__": diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in index 83cacaf69dec8..e55137c3d2bfd 100644 --- a/torch/_C/__init__.pyi.in +++ b/torch/_C/__init__.pyi.in @@ -1379,7 +1379,6 @@ def _get_linalg_preferred_backend() -> _LinalgBackend: ... def _set_linalg_preferred_backend(arg: _LinalgBackend): ... def _get_fp32_precision_getter(backend: str, op: str) -> str: ... def _set_fp32_precision_setter(backend: str, op: str, value: str) -> str: ... -def _ensureCUDADeviceGuardSet() -> None: ... class _LinalgBackend: Default: _LinalgBackend diff --git a/torch/_subclasses/fake_tensor.py b/torch/_subclasses/fake_tensor.py index 6b55abcef00cd..5767f6a1d0c1e 100644 --- a/torch/_subclasses/fake_tensor.py +++ b/torch/_subclasses/fake_tensor.py @@ -1387,12 +1387,6 @@ def __enter__(self) -> Self: # See NOTE: [torch.tensor, lift_fresh, and device movement] prev_only_lift_cpu_tensors = torch._C._only_lift_cpu_tensors() torch._C._set_only_lift_cpu_tensors(True) - - # In the case of CPU-only build or cuda device unavailable, - # we patch the cuda device guard to use NoOpDeviceGuardImpl. - # This enables us to trace over cuda kernels under FakeTensorMode. - torch._C._ensureCUDADeviceGuardSet() - maybe_prev_fake_mode = torch._C._unset_dispatch_mode(self._mode_key) if self is not maybe_prev_fake_mode: self.enter_stack.append( @@ -1403,7 +1397,6 @@ def __enter__(self) -> Self: # no-op (still need to re-set the fake mode though since we unset it) torch._C._set_dispatch_mode(self) self.enter_stack.append((False, None, prev_only_lift_cpu_tensors)) - return self def __exit__( diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index d040e16ba5283..ac2b03d2651cc 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -1551,15 +1550,6 @@ static PyObject* THPModule_are_vmap_fallback_warnings_enabled( END_HANDLE_TH_ERRORS } -static PyObject* THCPModule_ensureCUDADeviceGuardSet( - PyObject* self, - PyObject* noargs) { - HANDLE_TH_ERRORS - c10::impl::ensureCUDADeviceGuardSet(); - Py_RETURN_NONE; - END_HANDLE_TH_ERRORS -} - static std::initializer_list TorchMethods = { {"_initExtension", THPModule_initExtension, METH_O, nullptr}, {"_autograd_init", THPAutograd_initExtension, METH_NOARGS, nullptr}, @@ -1855,13 +1845,7 @@ static std::initializer_list TorchMethods = { (PyCFunction)(void (*)())THPModule_has_torch_function_variadic, METH_FASTCALL, nullptr}, - {"_ensureCUDADeviceGuardSet", - THCPModule_ensureCUDADeviceGuardSet, - METH_NOARGS, - nullptr}, - {nullptr, nullptr, 0, nullptr} - -}; + {nullptr, nullptr, 0, nullptr}}; #ifdef USE_CUDA // NOLINTBEGIN(misc-use-internal-linkage) From deb7ebe0a399b982ee48001713320a0bac792261 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Sat, 13 Sep 2025 07:52:50 +0000 Subject: [PATCH 218/693] Revert "[Reland] Use std::string_view in torchgen (#158625)" This reverts commit 972e409829343cc2062aeee0994a9c1c735d216a. Reverted https://github.com/pytorch/pytorch/pull/158625 on behalf of https://github.com/huydhn due to Sorry for reverting your change but it seems to break a couple of ExecuTorch tests for Vulkan backend ([comment](https://github.com/pytorch/pytorch/pull/158625#issuecomment-3287754275)) --- test/test_overrides.py | 2 ++ tools/autograd/load_derivatives.py | 2 +- torch/csrc/utils/python_arg_parser.cpp | 1 + torchgen/api/python.py | 2 +- torchgen/api/types/types.py | 2 +- torchgen/api/types/types_base.py | 2 -- torchgen/dest/lazy_ir.py | 6 +++--- torchgen/static_runtime/generator.py | 3 ++- 8 files changed, 11 insertions(+), 9 deletions(-) diff --git a/test/test_overrides.py b/test/test_overrides.py index 2a4f244bad11a..8454677856d0f 100644 --- a/test/test_overrides.py +++ b/test/test_overrides.py @@ -940,6 +940,8 @@ def _simple_type_parser(func, arg_name, arg_type): return None elif arg_type == "ScalarType": return torch.float32 + elif arg_type == "c10::string_view": + return "" elif arg_type in ("std::string_view", "::std::string_view"): return "" elif arg_type == "SymInt": diff --git a/tools/autograd/load_derivatives.py b/tools/autograd/load_derivatives.py index 28fddf0fb8513..f61226f25fb90 100644 --- a/tools/autograd/load_derivatives.py +++ b/tools/autograd/load_derivatives.py @@ -969,7 +969,7 @@ def repl(m: re.Match[str]) -> str: if nctype.type == OptionalCType(BaseCType(stringT)): formula = re.sub( rf"\b{name}\b", - f"{name}.has_value() ? std::optional<::std::string_view>({name}.value()) : std::nullopt", + f"{name}.has_value() ? std::optional({name}.value()) : std::nullopt", formula, ) diff --git a/torch/csrc/utils/python_arg_parser.cpp b/torch/csrc/utils/python_arg_parser.cpp index d801c7f730b01..613657e03b926 100644 --- a/torch/csrc/utils/python_arg_parser.cpp +++ b/torch/csrc/utils/python_arg_parser.cpp @@ -46,6 +46,7 @@ static std::unordered_map type_map = { {"DeviceIndex", ParameterType::INT64}, {"Stream", ParameterType::STREAM}, {"std::string", ParameterType::STRING}, + {"c10::string_view", ParameterType::STRING}, {"std::string_view", ParameterType::STRING}, {"::std::string_view", ParameterType::STRING}, {"Dimname", ParameterType::DIMNAME}, diff --git a/torchgen/api/python.py b/torchgen/api/python.py index 0c5b9ad5e7b4d..dbfa730601630 100644 --- a/torchgen/api/python.py +++ b/torchgen/api/python.py @@ -683,7 +683,7 @@ def argument_type_str( elif t.name == BaseTy.float: return "double" elif t.name == BaseTy.str: - return "std::string_view" + return "c10::string_view" elif t.name in [ BaseTy.Tensor, BaseTy.bool, diff --git a/torchgen/api/types/types.py b/torchgen/api/types/types.py index 97724384c2a20..41c05653fffdf 100644 --- a/torchgen/api/types/types.py +++ b/torchgen/api/types/types.py @@ -52,7 +52,7 @@ float8_e4m3fnT = BaseCppType("at", "Float8_e4m3fn") float8_e4m3fnuzT = BaseCppType("at", "Float8_e4m3fnuz") float8_e8m0fnuT = BaseCppType("at", "Float8_e8m0fnu") -stringT = BaseCppType("std", "string_view") +stringT = BaseCppType("c10", "string_view") generatorT = BaseCppType("at", "Generator") scalarTypeT = BaseCppType("at", "ScalarType") tensorT = BaseCppType("at", "Tensor") diff --git a/torchgen/api/types/types_base.py b/torchgen/api/types/types_base.py index 2288ebce71835..08085fa0fa2bf 100644 --- a/torchgen/api/types/types_base.py +++ b/torchgen/api/types/types_base.py @@ -81,8 +81,6 @@ class BaseCType(CType): type: BaseCppType def cpp_type(self, *, strip_ref: bool = False) -> str: - if self.type.ns == "std": - return "::" + str(self.type) return str(self.type) def remove_const_ref(self) -> CType: diff --git a/torchgen/dest/lazy_ir.py b/torchgen/dest/lazy_ir.py index 6231a36d5d460..b912b8f2427f8 100644 --- a/torchgen/dest/lazy_ir.py +++ b/torchgen/dest/lazy_ir.py @@ -256,7 +256,7 @@ def gen(self, schema: LazyIrSchema) -> list[str]: [ # This code is just special casing the mapping from string_view -> strings f"{a.name}({a.name}.has_value() ? ::std::make_optional(std::string(*{a.name})) : ::std::nullopt)" - if a.lazy_type.cpp_type() == "::std::optional<::std::string_view>" + if a.lazy_type.cpp_type() == "::std::optional" else f"{a.name}({a.name})" for a in scalar_args ] @@ -266,9 +266,9 @@ def gen(self, schema: LazyIrSchema) -> list[str]: scalar_decls = "\n ".join( [ f"std::string {a.name};" - if a.lazy_type.cpp_type() == "::std::string_view" + if a.lazy_type.cpp_type() == "c10::string_view" else f"::std::optional {a.name};" - if a.lazy_type.cpp_type() == "::std::optional<::std::string_view>" + if a.lazy_type.cpp_type() == "::std::optional" else f"{a.lazy_type.cpp_type()} {a.name};" for a in scalar_args ] diff --git a/torchgen/static_runtime/generator.py b/torchgen/static_runtime/generator.py index a9814bd4dee1f..8ad2fd3c45889 100644 --- a/torchgen/static_runtime/generator.py +++ b/torchgen/static_runtime/generator.py @@ -323,7 +323,8 @@ def ivalue_type_conversion_method( ), BaseTy.str: ( (False, "toStringView()"), - (False, "toOptional()"), + (False, "toOptional()"), + (False, "toOptional<::std::string_view>()"), ), } From 5b9114bf197d1db32cc1cfe06f66e2b4a02f55b1 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Sat, 13 Sep 2025 15:43:50 +0000 Subject: [PATCH 219/693] Revert "[ROCm/Windows] Support aotriton for scaled_dot_product_attention on Windows. (#162330)" This reverts commit 62843c14bbf694f5722fd6e1075da4792507fe42. Reverted https://github.com/pytorch/pytorch/pull/162330 on behalf of https://github.com/atalman due to Sorry reverting looks like broke windows nightlies see https://github.com/pytorch/pytorch/issues/162881 ([comment](https://github.com/pytorch/pytorch/pull/162330#issuecomment-3288544921)) --- CMakeLists.txt | 4 +- .../native/transformers/cuda/attention.cu | 66 ---------- .../transformers/hip/flash_attn/flash_api.h | 39 +++++- cmake/External/aotriton.cmake | 113 +----------------- tools/linter/dictionary.txt | 1 - 5 files changed, 44 insertions(+), 179 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f3e4b28bcff98..9b0e87b108e7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -874,7 +874,7 @@ cmake_dependent_option( "Whether to build the flash_attention kernel for scaled dot product attention.\ Will be disabled if not supported by the platform" ON - "USE_CUDA OR USE_ROCM" + "USE_CUDA OR USE_ROCM;NOT MSVC" OFF) cmake_dependent_option( @@ -909,7 +909,7 @@ cmake_dependent_option( # USE_FLASH_ATTENTION -> USE_ROCM -> Dependencies.cmake -> aotriton.cmake # if(USE_ROCM) - if(USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION) + if(UNIX AND (USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION)) include(cmake/External/aotriton.cmake) endif() endif() diff --git a/aten/src/ATen/native/transformers/cuda/attention.cu b/aten/src/ATen/native/transformers/cuda/attention.cu index c2193f2378dd5..b8b43e0086c1a 100644 --- a/aten/src/ATen/native/transformers/cuda/attention.cu +++ b/aten/src/ATen/native/transformers/cuda/attention.cu @@ -95,72 +95,6 @@ #endif #endif -#if defined(USE_ROCM) && (defined(USE_FLASH_ATTENTION) || defined(USE_MEM_EFF_ATTENTION)) -namespace pytorch_flash -{ -std::tuple< - at::Tensor, - at::Tensor, - at::Tensor, - at::Tensor, - at::Tensor, - at::Tensor, - at::Tensor, - at::Tensor> -mha_fwd( - const at::Tensor& q, // batch_size x seqlen_q x num_heads x head_size - const at::Tensor& k, // batch_size x seqlen_k x num_heads_k x head_size - const at::Tensor& v, // batch_size x seqlen_k x num_heads_k x head_size - std::optional& - out_, // batch_size x seqlen_q x num_heads x head_size - std::optional& - alibi_slopes_, // num_heads or batch_size x num_heads - const float p_dropout, - const float softmax_scale, - bool is_causal, - std::optional window_size_left, - std::optional window_size_right, - const float softcap, - const bool return_softmax, - std::optional gen_) { -#if defined(USE_ROCM_CK_SDPA) - if (at::globalContext().getROCmFAPreferredBackend() == - at::ROCmFABackend::Ck) { - const int non_null_window_left = window_size_left.value_or(-1); - const int non_null_window_right = window_size_right.value_or(-1); - std::optional dummy_attn_bias = std::nullopt; - return mha_fwd_ck( - q, - k, - v, - out_, - p_dropout, - softmax_scale, - is_causal, - non_null_window_left, - non_null_window_right, - return_softmax, - gen_, - dummy_attn_bias); // Not used in flash attention - } -#endif - return mha_fwd_aot( - q, - k, - v, - out_, - alibi_slopes_, - p_dropout, - softmax_scale, - is_causal, - window_size_left, - window_size_right, - return_softmax, - gen_); -} -} -#endif - namespace at { namespace cuda::philox { diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h b/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h index 71a1959065970..f6f2240d4f091 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h +++ b/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h @@ -270,7 +270,7 @@ std::tuple mha_varle #endif TORCH_API -std::tuple< +inline std::tuple< at::Tensor, at::Tensor, at::Tensor, @@ -294,7 +294,42 @@ mha_fwd( std::optional window_size_right, const float softcap, const bool return_softmax, - std::optional gen_); + std::optional gen_) { +#if defined(USE_ROCM_CK_SDPA) + if (at::globalContext().getROCmFAPreferredBackend() == + at::ROCmFABackend::Ck) { + const int non_null_window_left = window_size_left.value_or(-1); + const int non_null_window_right = window_size_right.value_or(-1); + std::optional dummy_attn_bias = std::nullopt; + return mha_fwd_ck( + q, + k, + v, + out_, + p_dropout, + softmax_scale, + is_causal, + non_null_window_left, + non_null_window_right, + return_softmax, + gen_, + dummy_attn_bias); // Not used in flash attention + } +#endif + return mha_fwd_aot( + q, + k, + v, + out_, + alibi_slopes_, + p_dropout, + softmax_scale, + is_causal, + window_size_left, + window_size_right, + return_softmax, + gen_); +} inline std::tuple< at::Tensor, diff --git a/cmake/External/aotriton.cmake b/cmake/External/aotriton.cmake index 4f7a79a78bfc6..5d91587746540 100644 --- a/cmake/External/aotriton.cmake +++ b/cmake/External/aotriton.cmake @@ -45,88 +45,13 @@ if(NOT __AOTRITON_INCLUDED) ) set(__AOTRITON_BASE_URL "https://github.com/ROCm/aotriton/releases/download/") # @lint-ignore set(__AOTRITON_Z "gz") - # Set the default __AOTRITON_LIB path - set(__AOTRITON_LIB "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so") - if(WIN32) - set(__AOTRITON_LIB "${__AOTRITON_INSTALL_DIR}/lib/aotriton_v2.lib") - endif() - - function(aotriton_build_windows_dependencies dlfcn-win32_external xz_external dlfcn-win32_DIR liblzma_DIR) - # Windows-specific dependencies - build these first - if(NOT noimage) - message(FATAL_ERROR "noimage must be ON for Windows builds") - endif() - # Build dlfcn-win32 - set(__DLFCN_WIN32_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/dlfcn-win32") - set(__DLFCN_WIN32_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/dlfcn-win32-install") - - ExternalProject_Add(${dlfcn-win32_external} - GIT_REPOSITORY https://github.com/dlfcn-win32/dlfcn-win32.git - GIT_TAG v1.4.2 - PREFIX ${__DLFCN_WIN32_PREFIX} - INSTALL_DIR ${__DLFCN_WIN32_INSTALL_DIR} - CMAKE_ARGS - -DCMAKE_INSTALL_PREFIX=${__DLFCN_WIN32_INSTALL_DIR} - -DCMAKE_BUILD_TYPE=Release - -DCMAKE_C_COMPILER=cl - -DCMAKE_CXX_COMPILER=cl - -DBUILD_SHARED_LIBS=ON - -DBUILD_TESTS=OFF - BUILD_BYPRODUCTS - "${__DLFCN_WIN32_INSTALL_DIR}/lib/dl.lib" - "${__DLFCN_WIN32_INSTALL_DIR}/bin/dl.dll" - ) - ExternalProject_Add_Step(${dlfcn-win32_external} copy_to_aotriton - COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${__DLFCN_WIN32_INSTALL_DIR}/bin/dl.dll" - "${__AOTRITON_INSTALL_DIR}/lib/" - DEPENDEES install - ) - set(${dlfcn-win32_DIR} "${__DLFCN_WIN32_INSTALL_DIR}/share/dlfcn-win32" CACHE PATH "Path to dlfcn-win32 CMake config" FORCE) - - # Build xz/liblzma - set(__XZ_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/xz") - set(__XZ_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/xz-install") - - ExternalProject_Add(${xz_external} - GIT_REPOSITORY https://github.com/tukaani-project/xz.git - GIT_TAG v5.8.1 - PREFIX ${__XZ_PREFIX} - INSTALL_DIR ${__XZ_INSTALL_DIR} - CMAKE_ARGS - -DCMAKE_INSTALL_PREFIX=${__XZ_INSTALL_DIR} - -DCMAKE_BUILD_TYPE=Release - -DBUILD_SHARED_LIBS=ON - -DENABLE_NLS=OFF - -DXZ_TOOL_LZMAINFO=OFF - -DXZ_TOOL_XZ=OFF - -DXZ_TOOL_XZDEC=OFF - -DXZ_TOOL_LZMADEC=OFF - BUILD_BYPRODUCTS - "${__XZ_INSTALL_DIR}/lib/lzma.lib" - "${__XZ_INSTALL_DIR}/bin/liblzma.dll" - ) - ExternalProject_Add_Step(${xz_external} copy_to_aotriton - COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${__XZ_INSTALL_DIR}/bin/liblzma.dll" - "${__AOTRITON_INSTALL_DIR}/lib/" - DEPENDEES install - ) - set(${liblzma_DIR} "${__XZ_INSTALL_DIR}/lib/cmake/liblzma" CACHE PATH "Path to xz/liblzma CMake config" FORCE) - endfunction() - function(aotriton_build_from_source noimage project) if(noimage) SET(RECURSIVE "OFF") else() SET(RECURSIVE "ON") endif() - if(WIN32) - message(STATUS "Building AOTriton Windows dependencies") - aotriton_build_windows_dependencies(dlfcn-win32_external xz_external dlfcn-win32_DIR liblzma_DIR) - endif() message(STATUS "PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}") - ExternalProject_Add(${project} GIT_REPOSITORY https://github.com/ROCm/aotriton.git GIT_SUBMODULES_RECURSE ${RECURSIVE} @@ -140,19 +65,12 @@ if(NOT __AOTRITON_INCLUDED) -DAOTRITON_GPU_BUILD_TIMEOUT=0 -DAOTRITON_NO_PYTHON=ON -DAOTRITON_NOIMAGE_MODE=${noimage} - -DHIP_PLATFORM=amd - $<$:-Ddlfcn-win32_DIR=${dlfcn-win32_DIR}> - $<$:-Dliblzma_DIR=${liblzma_DIR}> - BUILD_BYPRODUCTS - "${__AOTRITON_LIB}" + BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so" USES_TERMINAL_DOWNLOAD TRUE USES_TERMINAL_CONFIGURE TRUE USES_TERMINAL_BUILD TRUE USES_TERMINAL_INSTALL TRUE ) - if(WIN32) - add_dependencies(${project} dlfcn-win32_external xz_external) - endif() endfunction() set(__AOTRITON_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR}) @@ -177,7 +95,7 @@ if(NOT __AOTRITON_INCLUDED) INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_BINARY_DIR}/aotriton_runtime" "${__AOTRITON_INSTALL_DIR}" - BUILD_BYPRODUCTS "${__AOTRITON_LIB}" + BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so" ) message(STATUS "Using AOTriton Runtime from pre-compiled binary ${__AOTRITON_URL}.\ Set env variables AOTRITON_INSTALL_FROM_SOURCE=1 to build from source.") @@ -193,35 +111,14 @@ if(NOT __AOTRITON_INCLUDED) string(CONCAT __AOTRITON_URL "${__AOTRITON_BASE_URL}" "${__AOTRITON_VER}/${__AOTRITON_FILE}") - - # Set up directories - set(__AOTRITON_DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_download-${image}) - set(__AOTRITON_EXTRACT_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_image-${image}) - set(__AOTRITON_INSTALL_SOURCE_DIR ${__AOTRITON_EXTRACT_DIR}) - set(__DOWNLOAD_NO_EXTRACT "") - set(__BUILD_COMMANDS "") - - # On Windows, we need custom tar extraction with UTF-8 support - if(WIN32) - set(__DOWNLOAD_NO_EXTRACT "DOWNLOAD_NO_EXTRACT;TRUE") - set(__BUILD_COMMANDS - COMMAND ${CMAKE_COMMAND} -E make_directory "${__AOTRITON_EXTRACT_DIR}" - COMMAND tar --options hdrcharset=UTF-8 -xf "${__AOTRITON_DOWNLOAD_DIR}/${__AOTRITON_FILE}" -C "${__AOTRITON_EXTRACT_DIR}" - ) - set(__AOTRITON_INSTALL_SOURCE_DIR ${__AOTRITON_EXTRACT_DIR}/aotriton) - endif() - ExternalProject_Add(${project} URL "${__AOTRITON_URL}" URL_HASH SHA256=${__AOTRITON_SHA256} - DOWNLOAD_DIR ${__AOTRITON_DOWNLOAD_DIR} - ${__DOWNLOAD_NO_EXTRACT} - SOURCE_DIR ${__AOTRITON_EXTRACT_DIR} + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_image-${image} CONFIGURE_COMMAND "" BUILD_COMMAND "" - ${__BUILD_COMMANDS} INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory - "${__AOTRITON_INSTALL_SOURCE_DIR}" + "${CMAKE_CURRENT_BINARY_DIR}/aotriton_image-${image}" "${__AOTRITON_INSTALL_DIR}" BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/aotriton.images/${image}/__signature__" @@ -267,7 +164,7 @@ if(NOT __AOTRITON_INCLUDED) endforeach() endforeach() endif() - target_link_libraries(__caffe2_aotriton INTERFACE ${__AOTRITON_LIB}) + target_link_libraries(__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so) target_include_directories(__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/include) set(AOTRITON_FOUND TRUE) endif() # __AOTRITON_INCLUDED diff --git a/tools/linter/dictionary.txt b/tools/linter/dictionary.txt index c4a250db04836..706881a8f10f6 100644 --- a/tools/linter/dictionary.txt +++ b/tools/linter/dictionary.txt @@ -12,7 +12,6 @@ BU contiguities contiguity coo -DEPENDEES deser din dout From f37eaebed145395f88239998bbf2cd5b659b4d7a Mon Sep 17 00:00:00 2001 From: Nyakku Shigure Date: Sat, 13 Sep 2025 19:57:23 +0000 Subject: [PATCH 220/693] Add missing `tags` parameter to `custom_op` overload signatures (#162047) It appears to be an omission in #149782. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162047 Approved by: https://github.com/zou3519, https://github.com/BoyuanFeng Co-authored-by: Boyuan Feng --- torch/_library/custom_ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch/_library/custom_ops.py b/torch/_library/custom_ops.py index 251cdefe0f05d..5c5598bdb4573 100644 --- a/torch/_library/custom_ops.py +++ b/torch/_library/custom_ops.py @@ -28,6 +28,7 @@ def custom_op( mutates_args: Union[str, Iterable[str]], device_types: device_types_t = None, schema: Optional[str] = None, + tags: Optional[Sequence[_C.Tag]] = None, ) -> Callable[[Callable[..., object]], "CustomOpDef"]: ... @@ -40,6 +41,7 @@ def custom_op( mutates_args: Union[str, Iterable[str]], device_types: device_types_t = None, schema: Optional[str] = None, + tags: Optional[Sequence[_C.Tag]] = None, ) -> "CustomOpDef": ... From 72b51597820c25ea4310ec5727b5a04a665c949d Mon Sep 17 00:00:00 2001 From: Varun Patil Date: Sat, 13 Sep 2025 20:24:38 +0000 Subject: [PATCH 221/693] [flatbuffer] Fix compile error due to discarded result (#162767) Summary: One of our builds fails because the return value of fread is discarded. Explicit cast to void fixes the build. ```log In file included from fbcode/caffe2/torch/csrc/jit/mobile/import.cpp:15: fbcode/caffe2/torch/csrc/jit/mobile/file_format.h:156:3: error: ignoring return value of function declared with 'warn_unused_result' attribute [-Werror,-Wunused-result] 156 | fread(data.get(), size, 1, f); | ^~~~~ ~~~~~~~~~~~~~~~~~~~~~~ 1 error generated. ... BUILD FAILED Failed to build 'fbcode//caffe2:libtorch (cfg:opt-linux-x86_64-clang19-no-san-opt-by-default#fef256f7ee896871)' ``` Test Plan: No runtime behavior change. CI. Rollback Plan: Differential Revision: D82265002 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162767 Approved by: https://github.com/Skylion007 --- torch/csrc/jit/mobile/file_format.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch/csrc/jit/mobile/file_format.h b/torch/csrc/jit/mobile/file_format.h index 2156f8695a63c..814d680f83ba7 100644 --- a/torch/csrc/jit/mobile/file_format.h +++ b/torch/csrc/jit/mobile/file_format.h @@ -153,7 +153,8 @@ static inline std::tuple, size_t> get_file_content( size_t buffer_size = (size / kMaxAlignment + 1) * kMaxAlignment; std::shared_ptr data( static_cast(c10::alloc_cpu(buffer_size)), c10::free_cpu); - fread(data.get(), size, 1, f); + auto nread = fread(data.get(), size, 1, f); + TORCH_CHECK(nread == 1, "Failed to read file: ", filename); fclose(f); #endif return std::make_tuple(data, size); From 886699bc5c23105f6105d329f6ff6c0ada7b473c Mon Sep 17 00:00:00 2001 From: Ben Niu Date: Sat, 13 Sep 2025 21:01:00 +0000 Subject: [PATCH 222/693] Port shared_ptr optimization in std::shared_ptr to intrusive_ptr (#162784) Summary: Please see D21021645 for details about the optimization and why it's beneficial. A similar change has been added to libstdc++ as well, see https://github.com/gcc-mirror/gcc/commit/dbf8bd3c2f2cd2d27ca4f0fe379bd9490273c6d7 Rollback Plan: Reviewed By: yfeldblum Differential Revision: D81960754 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162784 Approved by: https://github.com/swolchok --- c10/util/intrusive_ptr.h | 64 ++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/c10/util/intrusive_ptr.h b/c10/util/intrusive_ptr.h index de81d4c1b7df3..449910cbb29e8 100644 --- a/c10/util/intrusive_ptr.h +++ b/c10/util/intrusive_ptr.h @@ -283,23 +283,55 @@ class intrusive_ptr final { } void reset_() noexcept { - if (target_ != NullType::singleton() && - detail::atomic_refcount_decrement(target_->refcount_) == 0) { - // See comment above about weakcount. As long as refcount>0, - // weakcount is one larger than the actual number of weak references. - // So we need to decrement it here. - bool should_delete = - target_->weakcount_.load(std::memory_order_acquire) == 1; - if (!should_delete) { - // justification for const_cast: release_resources is basically a - // destructor and a destructor always mutates the object, even for const - // objects. NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) - const_cast*>(target_)->release_resources(); - should_delete = - detail::atomic_weakcount_decrement(target_->weakcount_) == 0; + if (target_ != NullType::singleton()) { +#if defined(__linux__) && (defined(__aarch64__) || defined(__x86_64__)) + if constexpr ( + std::atomic::is_always_lock_free && + std::atomic::is_always_lock_free && + sizeof(std::atomic) == 8 && + sizeof(std::atomic) == 4) { + auto both_counts_ = + reinterpret_cast*>(&target_->refcount_); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + (reinterpret_cast(both_counts_) % + sizeof(std::atomic)) == 0 && + (reinterpret_cast(&target_->weakcount_) - + reinterpret_cast(both_counts_)) == + sizeof(std::atomic)); + // 0x100000001ULL is a 64-bit number combination of both the refcount_ + // and weakcount_ being 1. + constexpr uint64_t unique_ref_ = 0x100000001ULL; + if (both_counts_->load(std::memory_order_acquire) == unique_ref_) { + // Both counts are 1, so there are no weak references and + // we are releasing the last strong reference. No other + // threads can observe the effects of this target_ deletion + // call (e.g. calling use_count()) without a data race. + target_->refcount_.store(0, std::memory_order_relaxed); + delete target_; + return; + } } - if (should_delete) { - delete target_; +#endif + + if (detail::atomic_refcount_decrement(target_->refcount_) == 0) { + // See comment above about weakcount. As long as refcount>0, + // weakcount is one larger than the actual number of weak references. + // So we need to decrement it here. + bool should_delete = + target_->weakcount_.load(std::memory_order_acquire) == 1; + if (!should_delete) { + // justification for const_cast: release_resources is basically a + // destructor and a destructor always mutates the object, even for + // const objects. + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + const_cast*>(target_) + ->release_resources(); + should_delete = + detail::atomic_weakcount_decrement(target_->weakcount_) == 0; + } + if (should_delete) { + delete target_; + } } } } From f01bf0f64b2fb9a761d7a147f17a862561bc5baf Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Sun, 14 Sep 2025 01:30:33 +0000 Subject: [PATCH 223/693] Do not use // but use CleanDiv or FloorDiv instead (#162869) Summary: When rewriting sympy expressions in the compiler codebase we want to generate FloorDiv(a, b) CleanDiv(a, b) directly and not a//b. since the later become floor(a*pow(b, -1)) For symnodes we automatically handle that conversions in the symnode op dispatch. I will follow up with an issue to track all other usages of //. Block internal Model. Test Plan: add test run existing tests. dakechen1993 testing on the model. Rollback Plan: Differential Revision: D82362241 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162869 Approved by: https://github.com/ezyang --- test/test_dynamic_shapes.py | 15 +++++++++++++++ torch/fx/experimental/symbolic_shapes.py | 5 ++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/test/test_dynamic_shapes.py b/test/test_dynamic_shapes.py index 7ba466119da85..0e90587822d68 100644 --- a/test/test_dynamic_shapes.py +++ b/test/test_dynamic_shapes.py @@ -3685,6 +3685,21 @@ def f(idx, x): out = torch.compile(f)(idx, x) self.assertEqual(out, f(idx, x)) + def test_trunc_int_div_true(self): + @torch.compile(backend="inductor", dynamic=True, fullgraph=True) + def f(x, s13, s57, s77): + torch._check(s13 >= 0) + torch._check(s57 >= 0) + torch._check(s77 >= 0) + if int(s13 * ((s57 // s13) + (s77 // s13)) / s13) >= 1: + return x * 2 + else: + return x * 100 + + # ensure we compile this with no errors. + x = torch.rand(10) + f(x, 4, 4096, 3920) + instantiate_parametrized_tests(TestUnbacked) diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index 5c4ed77bab919..b9a779d2e4635 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -6395,7 +6395,10 @@ def simplify(self, expr: _SympyT, size_oblivious: bool = False) -> _SympyT: if isinstance(atom.args[0], IntTrueDiv): base, divisor = atom.args[0].args if base % divisor == 0: - trunc_replacements[atom] = base // divisor + trunc_replacements[atom] = CleanDiv(base, divisor) + else: + # TruncToInt(IntTrueDiv(a,b)) == FloorDiv(a, b) + trunc_replacements[atom] = FloorDiv(base, divisor) if trunc_replacements: expr = expr.xreplace(trunc_replacements) From e74b21d66a5d18fdfb1ef08b8f329a4d80d7b9e9 Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Sun, 14 Sep 2025 04:27:32 +0000 Subject: [PATCH 224/693] [vllm hash update] update the pinned vllm hash (#162891) This PR is auto-generated nightly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/nightly.yml). Update the pinned vllm hash. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162891 Approved by: https://github.com/pytorchbot --- .github/ci_commit_pins/vllm.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ci_commit_pins/vllm.txt b/.github/ci_commit_pins/vllm.txt index c027f16bad9f1..f1a4e5deb8d92 100644 --- a/.github/ci_commit_pins/vllm.txt +++ b/.github/ci_commit_pins/vllm.txt @@ -1 +1 @@ -4fdd6f5cbf877de7c4de33086fe41bb0ac1d3cf3 +973c9d01da863cac9c51e8a5c0d390fc84b84fbc From d2f6daf6a793fb94da6ec26a02ccb32daf8ef3e7 Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Sun, 14 Sep 2025 04:27:33 +0000 Subject: [PATCH 225/693] [audio hash update] update the pinned audio hash (#162892) This PR is auto-generated nightly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/nightly.yml). Update the pinned audio hash. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162892 Approved by: https://github.com/pytorchbot --- .github/ci_commit_pins/audio.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ci_commit_pins/audio.txt b/.github/ci_commit_pins/audio.txt index 530491f7e2feb..05e0b684b4278 100644 --- a/.github/ci_commit_pins/audio.txt +++ b/.github/ci_commit_pins/audio.txt @@ -1 +1 @@ -0e72a50dd8818d28e363d39de806e0f54d719a45 +87ff22e49ed0e92576c4935ccb8c143daac4a3cd From 74a35c63440e0c0fc7f1e1abd9fff3297b333bea Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Sun, 14 Sep 2025 04:56:49 +0000 Subject: [PATCH 226/693] [Triton] [Inductor] Enable TMA store for TMA mm templates (#160480) Summary: Adds support for TMA store in all TMA matmul templates (notably persistent_tma including addmm and scaled_mm). This works by requiring a template be registered with `tma_store=True` and when met constructs indices/range_trees to hook into the existing code base's TMA store support. This also includes a couple notable changes: - Adds support in the TMA template support for checking the output layout. - Adds support for "hoisting" the tensor descriptor to the top of the kernel. This will currently only be used by template code right now, but in principle it can be generalized to other implementation. - Supports considering multiple indices as the "contiguous" index. This is handled with support for transposing the input data when the alignment is no longer consistent. In general since the TMA support is derived from the index it doesn't seems reasonable that the 1D index math forces a certain alignment depending on index ordering so long as the layout matches. Test Plan: Tested with test_max_autotune.py unit tests. Pull Request resolved: https://github.com/pytorch/pytorch/pull/160480 Approved by: https://github.com/NikhilAPatel --- test/inductor/test_max_autotune.py | 98 ++++++- torch/_inductor/codegen/triton.py | 178 +++++++++--- torch/_inductor/config.py | 3 + torch/_inductor/kernel/mm.py | 27 +- torch/_inductor/ops_handler.py | 2 +- torch/_inductor/select_algorithm.py | 256 ++++++++++++++++-- torch/_inductor/template_heuristics/triton.py | 1 + torch/_inductor/utils.py | 75 +++-- 8 files changed, 539 insertions(+), 101 deletions(-) diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py index c040f7d1fff4b..e34b2c7edd009 100644 --- a/test/inductor/test_max_autotune.py +++ b/test/inductor/test_max_autotune.py @@ -129,11 +129,13 @@ def mm_plus_mm(a, b, c, d): @parametrize("a_transposed", (False, True)) @parametrize("b_transposed", (False, True)) @parametrize("dynamic", (False, True)) + @parametrize("tma_store", (False, True)) def test_max_autotune_regular_mm_persistent_tma( self, a_transposed: bool, b_transposed: bool, dynamic: bool, + tma_store: bool, ): def mm(a, b): # TMA requires 16-byte alignment: here we repeat the dims @@ -165,12 +167,35 @@ def mm(a, b): { "max_autotune": True, "triton.enable_persistent_tma_matmul": "1", + "triton.enable_template_tma_store": tma_store, "test_configs.autotune_choice_name_regex": "mm_persistent_tma", } ): - c_actual = torch.compile(mm, dynamic=dynamic)(a, b) + c_actual, code = run_and_get_code(torch.compile(mm, dynamic=dynamic), a, b) c_expected = mm(a, b) + if has_triton_stable_tma_api(): + make_desc_api = "triton.language.make_tensor_descriptor" + read_api = "tl.load_tensor_descriptor" + if tma_store: + # Note: The tma_descriptor0 is generated by the kernel. If the + # code generation process changes this could change. + write_api = "tma_descriptor0.store" + else: + write_api = "tl.store" + else: + make_desc_api = ( + "triton.language.extra.cuda.experimental_device_tensormap_create2d" + ) + read_api = "tl._experimental_descriptor_load" + # TMA store is not supported with the experimental API + write_api = "tl.store" + + # Verify that we are using a TMA implementation + FileCheck().check("triton_tem_fused_mm").check(make_desc_api).check( + read_api + ).check(write_api).run(code[0]) + torch.testing.assert_close(c_actual, c_expected, atol=1e-2, rtol=1e-2) @unittest.skipIf( @@ -264,6 +289,42 @@ def mm(a, b): # given the config flags above, we should have no choices left. self.assertIn("NoValidChoicesError", str(context.exception)) + @unittest.skipIf( + not has_triton_tma_device(), "Need device-side TMA support in Triton" + ) + @parametrize("dynamic", (False, True)) + def test_max_autotune_regular_mm_persistent_tma_illegal_output_alignment( + self, dynamic + ): + def mm(a, b, out): + torch.mm(a, b, out=out) + return out + + M, N, K = 21, 31, 32 + a = torch.empty_strided((M, K), (K, 1), dtype=torch.float16, device=GPU_TYPE) + a[:] = torch.randn((M, K), dtype=torch.float16) + b = torch.empty_strided((K, N), (1, K), dtype=torch.float16, device=GPU_TYPE) + b[:] = torch.randn((K, N), dtype=torch.float16) + # allocate an output with a stride not divisble by 16, so it can't satisfy TMA alignment checks. + out = torch.empty_strided((M, N), (N, 1), dtype=torch.float16, device=GPU_TYPE) + + with ( + self.assertRaises(BackendCompilerFailed) as context, + config.patch( + { + "max_autotune": True, + "triton.enable_persistent_tma_matmul": "1", + "triton.enable_template_tma_store": True, + "test_configs.autotune_choice_name_regex": "mm_persistent_tma", + } + ), + ): + torch.compile(mm, dynamic=dynamic)(a, b, out) + + # Lowering to the persistent+TMA Triton template should be skipped + # since the output doesn't have a stride of 1 in any dim + self.assertIn("NoValidChoicesError", str(context.exception)) + @unittest.skipIf( not has_triton_tma_device(), "Need device-side TMA support in Triton" ) @@ -317,11 +378,13 @@ def mm(a, b): @parametrize("a_transposed", (False, True)) @parametrize("b_transposed", (False, True)) @parametrize("dynamic", (False, True)) + @parametrize("tma_store", (False, True)) def test_max_autotune_addmm_persistent_tma( self, a_transposed: bool, b_transposed: bool, dynamic: bool, + tma_store: bool, ): def addmm(x, a, b): # TMA requires 16-byte alignment: here we repeat the dims @@ -355,12 +418,37 @@ def addmm(x, a, b): { "max_autotune": True, "triton.enable_persistent_tma_matmul": "1", + "triton.enable_template_tma_store": tma_store, "test_configs.autotune_choice_name_regex": "mm_persistent_tma", } ): - c_actual = torch.compile(addmm, dynamic=dynamic)(x, a, b) + c_actual, code = run_and_get_code( + torch.compile(addmm, dynamic=dynamic), x, a, b + ) c_expected = addmm(x, a, b) + if has_triton_stable_tma_api(): + make_desc_api = "triton.language.make_tensor_descriptor" + read_api = "tl.load_tensor_descriptor" + if tma_store: + # Note: The tma_descriptor0 is generated by the kernel. If the + # code generation process changes this could change. + write_api = "tma_descriptor0.store" + else: + write_api = "tl.store" + else: + make_desc_api = ( + "triton.language.extra.cuda.experimental_device_tensormap_create2d" + ) + read_api = "tl._experimental_descriptor_load" + # TMA store is not supported with the experimental API + write_api = "tl.store" + + # Verify that we are using a TMA implementation + FileCheck().check("triton_tem_fused_addmm").check(make_desc_api).check( + read_api + ).check(write_api).run(code[0]) + torch.testing.assert_close(c_actual, c_expected, atol=1e-2, rtol=1e-2) @unittest.skipIf( @@ -1508,7 +1596,7 @@ def test_triton_template_generated_code_cache_key(self): # Make sure all args of generate_and_load_args are passed to make_key_args (Except generate_with_caching) # update this function each time new arg added to generate_and_load and make sure arg is added to make_key self.assertEqual(generate_and_load_args - 1, make_key_args) - self.assertEqual(generate_and_load_args, 17) + self.assertEqual(generate_and_load_args, 18) @fresh_cache() @config.patch( @@ -1594,7 +1682,7 @@ def func_test1(x, y, z, m): "[[22,30],[30,1],torch.float32,device(type='cuda',index=0),0]"], 'num_stages':1,'num_warps':2,'prefix_args':0,'suffix_args':0,'call_sizes':[10,30], 'layout':"[[10,30],[30,1],torch.float32,device(type='cuda',index=0),0]", - 'num_consumer_groups':0,'num_buffers_warp_spec':0,'epilogue_fn_hash':'identity', + 'num_consumer_groups':0,'num_buffers_warp_spec':0,'epilogue_fn_hash':'identity','tma_store':False, 'kwargs':{'EVEN_K':False,'ALLOW_TF32':True,'USE_FAST_ACCUM':False,'ACC_TYPE':'tl.float32', 'BLOCK_M':16,'BLOCK_N':32,'BLOCK_K':16,'GROUP_M':8},'hint_override':None}""" @@ -1634,7 +1722,7 @@ def func_test1(x, y, z, m): "[[s27,s94],[s94,1],torch.float32,device(type='cuda',index=0),0]"], 'num_stages':1,'num_warps':2,'prefix_args':0,'suffix_args':0,'call_sizes':[s77,s94], 'layout':"[[s77,s94],[s94,1],torch.float32,device(type='cuda',index=0),0]",'num_consumer_groups':0, - 'num_buffers_warp_spec':0,'epilogue_fn_hash':'identity','kwargs':{'EVEN_K':False,'ALLOW_TF32':True, + 'num_buffers_warp_spec':0,'epilogue_fn_hash':'identity','tma_store':False,'kwargs':{'EVEN_K':False,'ALLOW_TF32':True, 'USE_FAST_ACCUM':False,'ACC_TYPE':'tl.float32','BLOCK_M':16,'BLOCK_N':32,'BLOCK_K':16,'GROUP_M':8},'hint_override':None}""" expected = expected.replace("cuda", GPU_TYPE) self.assertExpectedInline( diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py index 7fb6d71cd3620..39bbbf668ba70 100644 --- a/torch/_inductor/codegen/triton.py +++ b/torch/_inductor/codegen/triton.py @@ -254,6 +254,9 @@ class BlockDescriptorOptions: broadcasting_dims: list[bool] final_shape: Sequence[sympy.Expr] _boundary_check: Optional[list[int]] = None + # Can we safely lift the constructor + # to the top of the kernel? + can_lift: bool = False @property def shape(self) -> list[sympy.Expr]: @@ -280,6 +283,8 @@ def create( range_trees: list[IterationRangesRoot], mask_vars: OrderedSet[str], get_max_block: Callable[[str], int], + can_lift=False, + transpose_contiguous=False, ) -> BlockDescriptorOptions: """Helper to create a BlockDescriptorOptions instance""" @@ -337,8 +342,14 @@ def remove_dims(it): # Drop removable dimensions from the input. params = BlockParameters( - **{key: remove_dims(val) for key, val in dataclasses.asdict(params).items()} + **{ + key: remove_dims(val) for key, val in dataclasses.asdict(params).items() + }, ) + # TODO: Generalize to ND tensors. + transpose = transpose_contiguous and params.strides[-1] != 1 + if transpose: + params = params.transpose() # Compute the final shape, adjusting for special kernel types. final_shape = [TritonSymbols.get_block_size(tree) for tree in range_trees] @@ -346,6 +357,12 @@ def remove_dims(it): assert range_trees[0].prefix == "x" final_shape.pop(0) + # Check for when BlockParams have been transposed. + order = list(reversed(range(len(params.shape)))) + if transpose: + final_shape.reverse() + order.reverse() + reduction_ndim = V.kernel.num_reduction_dims if ( not V.kernel.inside_reduction @@ -358,11 +375,12 @@ def remove_dims(it): result = cls( params=params, constant_offset=V.graph.sizevars.lookup_precomputed_size(constant_offset), - order=list(reversed(range(len(params.shape)))), + order=order, mask_vars=mask_vars, final_shape=final_shape, broadcast_shape=broadcast_shape, broadcasting_dims=broadcasting_dims, + can_lift=can_lift, ) result.compute_boundary_check(get_max_block, range_trees) return result @@ -1642,6 +1660,14 @@ def __add__(self, other: BlockParameters) -> BlockParameters: a, b = tuple(dataclasses.asdict(x) for x in (self, other)) return cls(**{key: a[key] + b[key] for key in a}) + def transpose(self) -> BlockParameters: + return BlockParameters( + self.shape[::-1], + self.block_shape[::-1], + self.strides[::-1], + self.offsets[::-1], + ) + class CooperativeReductionWorkspaceCache: """ @@ -1712,6 +1738,7 @@ class TMACompatibilityChecker: kernel: TritonKernel dtype: torch.dtype for_store: bool + force: bool def __post_init__(self): self.failed_debug_prefix = "Cannot use TMA descriptor for load / store since: " @@ -1720,6 +1747,8 @@ def __post_init__(self): def can_use_tma( self, ) -> bool: + if self.force: + return True if not ( V.graph.get_current_device_or_throw().type == "cuda" and torch.cuda.get_device_capability()[0] >= 9 @@ -1756,12 +1785,19 @@ def are_block_parameters_compatible( ) -> bool: """ Check if the block parameters are valid for TMA. + If force, we allow relying on symbolic hints equivalent + to what we check for Triton templates. """ + if self.force: + strides = [ + V.graph.sizevars.symbolic_hint(st) for st in block_params.strides + ] + else: + strides = block_params.strides + # The TMA API requires that the innermost stride is 1 # and that the outer strides are 16 byte aligned - if not V.graph.sizevars.statically_known_equals( - block_params.strides[-1], sympy.Integer(1) - ): + if not V.graph.sizevars.statically_known_equals(strides[-1], sympy.Integer(1)): log.debug( "%s TMA API requires innermost stride to be 1.", self.failed_debug_prefix, @@ -1769,7 +1805,7 @@ def are_block_parameters_compatible( return False element_size = self.dtype.itemsize - for stride in block_params.strides[:-1]: + for stride in strides[:-1]: if not V.graph.sizevars.statically_known_equals( ModularIndexing(stride * element_size, 1, sympy.Integer(16)), sympy.Integer(0), @@ -1871,6 +1907,18 @@ def are_block_parameters_compatible( return True + def can_lift(self) -> bool: + """ + Can you lift the make_tensor_descriptor + call to the top of the kernel? This requires + being certain that all of the shape, stride, + and block_shape information is handled in arguments + or top level definitions. + + Right now we assume this is always possible if you force TMA. + """ + return self.force + class TritonKernel(SIMDKernel[TritonCSEVariable]): """A class to represent a triton kernel and helpers to generate @@ -1896,6 +1944,7 @@ def __init__( self.fixed_config = fixed_config super().__init__(tiling, **kwargs) self.cse = TritonCSE(self.newvar_prefix, self.suffix) + self.prologue: IndentedBuffer = IndentedBuffer() self.post_loop_combine: IndentedBuffer = IndentedBuffer() self.post_loop_store: IndentedBuffer = IndentedBuffer() self.outside_loop_vars = OrderedSet[Any]() @@ -2090,6 +2139,8 @@ def indexing( for symt in TritonSymbols.block_types if symbol_is_type(var, symt) ] + if len(prefix_matches) == 0: + pass assert len(prefix_matches) == 1, f"Ambiguous type: {var.name}" mask_vars.add(f"{prefix_matches[0]}mask") @@ -2305,16 +2356,29 @@ def match_block_expr() -> Optional[BlockDescriptorOptions]: if config.triton.use_block_ptr else TensorDescriptorOptions ) + nonlocal tma_compatibility_checker + if config.triton.use_block_ptr: + can_lift = False + transpose_contiguous = False + else: + tma_compatibility_checker = cast( + TMACompatibilityChecker, tma_compatibility_checker + ) + can_lift = tma_compatibility_checker.can_lift() + # Only try transpose if we know the output shape + # in case we need to transpose the data. + transpose_contiguous = copy_shape is not None + options = options_class.create( params=block_params, constant_offset=offset, range_trees=range_trees, mask_vars=mask_vars, get_max_block=self.max_block, + can_lift=can_lift, + transpose_contiguous=transpose_contiguous, ) - if options_class == TensorDescriptorOptions: - nonlocal tma_compatibility_checker tma_compatibility_checker = cast( TMACompatibilityChecker, tma_compatibility_checker ) @@ -2329,7 +2393,6 @@ def match_block_expr() -> Optional[BlockDescriptorOptions]: options = match_block_expr() if options is not None: return options - expand_str = None expand_shape: BlockShapeType = None index_str = self.index_to_str(index) @@ -2421,17 +2484,19 @@ def codegen_block_ptr( self.inside_reduction and self.range_trees[-1].is_loop and indexing.has_rindex() - ): + ) or indexing.can_lift: block_descriptor_id = next(self.block_ptr_id) if isinstance(indexing, BlockPtrOptions): block_descriptor = f"block_ptr{block_descriptor_id}" else: block_descriptor = f"tma_descriptor{block_descriptor_id}" - self.body.writeline( - DeferredLine( - name, f"{block_descriptor} = {indexing.format(var, roffset=False)}" - ) + line_body = DeferredLine( + name, f"{block_descriptor} = {indexing.format(var, roffset=False)}" ) + if indexing.can_lift: + self.prologue.writeline(line_body) + else: + self.body.writeline(line_body) if isinstance(indexing, BlockPtrOptions): # Store for later use. If the buffer is removed the below advancements @@ -2461,23 +2526,45 @@ def codegen_block_ptr( return block_descriptor, other def codegen_block_ptr_store_line(self, name, indexing, block_ptr, value, other=""): - # Stores require an explicit broadcast. We do this in two phases: - # 1. Broadcast the operand to the final shape of the range trees, e.g. [ZBLOCK, - # YBLOCK, XBLOCK]. This protects against implicit broadcasting from loads. - # 2. In case the block pointer / tma descriptor has different dimensionality, broadcast/reshape the - # result to the shape of the pointer. - value = f"tl.broadcast_to({value}, {indexing.final_shape})" - - # These dims no longer need broadcasting. - for idx, (dim, broadcast_dim) in enumerate( - zip(indexing.final_shape, indexing.broadcast_shape) - ): - if V.graph.sizevars.statically_known_equals(dim, broadcast_dim): - indexing.broadcasting_dims[idx] = False + # TMA stores may require transposing the data to ensure we are contiguous along + # the final dimension. We do this by checking the shape information on value. + # It can either + # 1. Match the final shape. In this case no broadcast/reshape + # is necessary. + # 2. Exist as the Transpose of the final shape, which means we had to transpose + # the store_descriptor relative to the accumulator indexing/value. If this + # happens we will generate a tl.trans(). + # 3. A mismatched provided shape. When this occurs we will error. + # 4. No shape is provided. This will proceed with the default explicit broadcast + # described below. + # + # To prevent unintended side effects we will gate options 1-3 behind isinstance(indexing, TensorDescriptorOptions). + if isinstance(indexing, TensorDescriptorOptions) and value.shape: + str_final_shape = tuple([symt.name for symt in indexing.final_shape]) + if value.shape[::-1] == str_final_shape: + value = f"tl.trans({value})" + elif value.shape != str_final_shape: + raise AssertionError( + "TMA store requires no broadcasting when a shape is provided" + ) + else: + # Stores require an explicit broadcast. We do this in two phases: + # 1. Broadcast the operand to the final shape of the range trees, e.g. [ZBLOCK, + # YBLOCK, XBLOCK]. This protects against implicit broadcasting from loads. + # 2. In case the block pointer / tma descriptor has different dimensionality, broadcast/reshape the + # result to the shape of the pointer. + value = f"tl.broadcast_to({value}, {indexing.final_shape})" + + # These dims no longer need broadcasting. + for idx, (dim, broadcast_dim) in enumerate( + zip(indexing.final_shape, indexing.broadcast_shape) + ): + if V.graph.sizevars.statically_known_equals(dim, broadcast_dim): + indexing.broadcasting_dims[idx] = False - value = indexing.codegen_broadcast_and_reshape( - value, indexing.final_shape, indexing.block_shape, False - ) + value = indexing.codegen_broadcast_and_reshape( + value, indexing.final_shape, indexing.block_shape, False + ) # workaround https://github.com/triton-lang/triton/issues/2814 value = f"{value}.to({triton_store_type(V.graph.get_dtype(name))})" @@ -2541,7 +2628,10 @@ def load(self, name: str, index: sympy.Expr): index, block_ptr=True, tma_compatibility_checker=self.tma_compatibility_checker_cls( - self, dtype, for_store=False + self, + dtype, + for_store=False, + force=False, ), ) has_rindex = indexing.has_rindex() @@ -2694,9 +2784,13 @@ def store( dtype = V.graph.get_dtype(name) tma_compatibility_checker = None - if mode is None: + if mode is None or mode == "tma": + force = mode == "tma" tma_compatibility_checker = self.tma_compatibility_checker_cls( - self, dtype, for_store=True + self, + dtype, + for_store=True, + force=force, ) indexing = self.indexing( index, @@ -3470,7 +3564,10 @@ def store_reduction( index, block_ptr=True, tma_compatibility_checker=self.tma_compatibility_checker_cls( - kernel=self, dtype=dtype, for_store=True + kernel=self, + dtype=dtype, + for_store=True, + force=False, ), ) self.inside_reduction = True @@ -3769,6 +3866,18 @@ def cse_multiple(line, broadcasted_values, masks, dtypes): return tuple(result_vars) + def codegen_prologue(self, code: IndentedBuffer): + """ + Generate the output from prologue. This should be + extracted from the subgraph, which is why this is + partitioned from codegen_body. + """ + if not self.prologue: + return + + code.splice(self.prologue) + self.prologue.clear() + def codegen_body(self): """ Concat output code from index_code, loads, compute, stores, @@ -4296,6 +4405,7 @@ def add_constexpr_arg(arg_name): self.triton_meta = triton_meta + self.codegen_prologue(self.body) self.codegen_body() for helper in self.helper_functions: diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py index f143e34b5dbc6..13512b19a05fa 100644 --- a/torch/_inductor/config.py +++ b/torch/_inductor/config.py @@ -1432,6 +1432,9 @@ class triton: enable_persistent_tma_matmul = ( os.environ.get("ENABLE_PERSISTENT_TMA_MATMUL", "0") == "1" ) + # Should TMA store be enable from templates. TODO: Remove once we + # can autotune over the result. + enable_template_tma_store = os.environ.get("ENABLE_TEMPLATE_TMA_STORE", "0") == "1" # Skip L1 cache for buffers that are used only once. Disabled by default skip_l1_cache = os.environ.get("TORCHINDUCTOR_SKIP_L1", "0") == "1" diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 24c5c23218ba6..07474ed450dd3 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -340,15 +340,18 @@ ) if ki == k_tiles - 1: + # inductor generates a suffix + {%- if TMA_EXPERIMENTAL_API %} # rematerialize rm and rn to save registers rcm = rm + tl.arange(0, BLOCK_M) rcn = rn + tl.arange(0, BLOCK_N) idx_m = rcm[:, None] idx_n = rcn[None, :] mask = (idx_m < M) & (idx_n < N) - - # inductor generates a suffix {{store_output(("idx_m", "idx_n"), "acc", "mask", indent_width=12, val_shape=("BLOCK_M", "BLOCK_N"))}} + {%- else %} + {{store_output(("rm", "rn"), "acc", indent_width=12, val_shape=("BLOCK_M", "BLOCK_N"), block_indexing=True)}} + {%- endif %} acc = tl.zeros((BLOCK_M, BLOCK_N), dtype=ACC_TYPE) """, @@ -535,11 +538,21 @@ def apply_scaling( stride_b_scale_n, ) + # inductor generates a suffix + {%- if TMA_EXPERIMENTAL_API %} idx_m = offs_cm[:, None] idx_n = offs_cn[None, :] mask = (idx_m < M) & (idx_n < N) - # inductor generates a suffix {{store_output(("idx_m", "idx_n"), "accumulator", "mask", indent_width=12, val_shape=("BLOCK_M", "BLOCK_N"))}} + {%- else %} + {{store_output( + ("offs_am", "offs_bn"), + "accumulator", + indent_width=12, + val_shape=("BLOCK_M", "BLOCK_N"), + block_indexing=True, + )}} + {%- endif %} accumulator = tl.zeros((BLOCK_M, BLOCK_N), dtype=tl.float32) """ @@ -761,7 +774,7 @@ def tuned_mm(mat1, mat2, *, layout=None): if is_nonzero and use_triton_template(layout, check_max_autotune=True): templates_to_use.append(mm_template) - if use_triton_tma_template(mat1, mat2): + if use_triton_tma_template(mat1, mat2, output_layout=layout): templates_to_use.append(persistent_tma_mm_template) if use_decompose_k_choice(m, n, k): @@ -964,7 +977,7 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): if is_nonzero and use_triton_template(layout, check_max_autotune=False): templates_to_use.append(mm_template) - if use_triton_tma_template(mat1, mat2): + if use_triton_tma_template(mat1, mat2, output_layout=layout): templates_to_use.append(persistent_tma_mm_template) templates_to_use.append(addmm_contiguous_subgraph_template) @@ -1149,8 +1162,8 @@ def tuned_scaled_mm( overriders = dict(USE_FAST_ACCUM=use_fast_accum) # TODO (paulzhan): There is no template that exists for bias and TMA - # Don't run tma template currently if bias exists - if use_triton_tma_template(mat_a, mat_b) and not bias: + # Don't run tma template currently if bias exist + if use_triton_tma_template(mat_a, mat_b, output_layout=layout) and not bias: templates_to_use.append(scaled_mm_device_tma_template) kwarg_overrides[scaled_mm_device_tma_template.uid] = overriders diff --git a/torch/_inductor/ops_handler.py b/torch/_inductor/ops_handler.py index a52257c61480c..cccb0e2943622 100644 --- a/torch/_inductor/ops_handler.py +++ b/torch/_inductor/ops_handler.py @@ -19,7 +19,7 @@ T = TypeVar("T") -StoreMode = Optional[Literal["atomic_add"]] +StoreMode = Optional[Literal["atomic_add", "tma"]] ReductionType = Literal[ "argmax", "argmin", diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py index 114b9a5a40e49..7bc04f30c6d27 100644 --- a/torch/_inductor/select_algorithm.py +++ b/torch/_inductor/select_algorithm.py @@ -378,6 +378,7 @@ def __init__( num_consumer_groups=0, num_buffers_warp_spec=0, use_jit=False, + tma_store=False, prefix_args=0, suffix_args=0, epilogue_fn=identity, @@ -386,12 +387,25 @@ def __init__( prologue_loads_all_inputs=False, hint_override: Optional[int] = None, ) -> None: + if tma_store: + pass numel = sympy_product(output_node.get_size()) - super().__init__( - { + if tma_store: + assert len(output_node.get_size()) == 2, ( + "TMA store only supported for 2D with templates" + ) + tiling = { + "x": output_node.get_size()[0], + "y": output_node.get_size()[1], + "r0_": sympy.S.One, + } + else: + tiling = { "x": numel, "r0_": sympy.S.One, - }, + } + super().__init__( + tiling, features=SIMDKernelFeatures([], numel), hint_override=hint_override, ) @@ -401,6 +415,7 @@ def __init__( self.defines = defines self.kernel_name = kernel_name self.use_jit = use_jit + self.tma_store = tma_store self.num_stages = num_stages self.num_warps = num_warps self.num_consumer_groups = num_consumer_groups @@ -469,6 +484,12 @@ def __init__( # Extra functions to be exposed during partial template rendering. self.extra_template_env_fns: list[Callable[..., Any]] = [] + # Tracking for intermediate variables + self.tmp_var_ctr = itertools.count() + + def _gen_tmp_var(self) -> str: + return f"_tmp_var{next(self.tmp_var_ctr)}" + def input_dependent_preserved_state(self) -> str: # Not adding self.args.output_buffers on purpose. But we do not need to reproduce it on a cache hit. # (never accessed). @@ -718,11 +739,12 @@ def hook(): with code.indent(): code.splice(self.defines) code.splice(renames.getvalue()) + self.codegen_prologue(code) return code.getvalue() return self._register_hook("", hook) - def size(self, name: str, index: int): + def size(self, name: Optional[str], index: int): """ Hook called from template code to get the size of an arg. Will add needed args to pass it in if it is dynamic. @@ -1016,6 +1038,70 @@ def hook(): return self._register_hook(hook_key, hook) + def _generate_index_from_tma_index( + self, + output_name: str, + offset_name: str, + tma_index: sympy.Symbol, + block_size: str, + dim: int, + num_dims: int, + block_name: Optional[str] = None, + ) -> list[str]: + """ + Generate the logic to compute the regular tl.load index from the provided + tma index. This is used to ensure variables can support fusions. + + Args: + output_name (str): The output variable name. + offset_name (str): The name used for the intermediate offset. + tma_index (sympy.Symbol): The symbol used for the original TMA index. + block_size (str): The block size of the index. + dim (int): Which dimension to project the index in. + num_dims (int): The total number of dimensions in the output. + block_name (Optional[str]): The name of the block variable. If not passed + in then we aren't reusing standard symbol names. + + Returns: + list[str]: The lines used to generate the index. + + """ + if block_name: + # Generate the expected names for the structure: + # XBLOCK/YBLOCK and xoffset/yoffset. We append XBLOCK/YBLOCK + # to the top of the kernel so we can safely extract the tensor + # descriptor construction to the top of the kernel. + self.defines += f"{block_name}: tl.constexpr = {block_size}\n" + else: + block_name = block_size + line0 = f"{offset_name} = {texpr(tma_index)}" + expr = f"({offset_name} + tl.arange(0, {block_name}))" + prefix_none = "".join(["None, "] * dim) + suffix_none = ", ".join(["None"] * (num_dims - (dim + 1))) + line1 = f"{output_name} = {expr}[{prefix_none}:, {suffix_none}]" + return [line0, line1] + + def _generated_mask_for_tma( + self, + index_name: str, + shape_val: str, + output_name: str, + ) -> str: + """ + Generate the mask logic to feed to fusions for mask. The expectation + is that if we have X/Y there will be a variable named xmask and ymask. + + Args: + index_name (str): The index used in the mask. Should be one of + xindex or yindex. + shape_val (str): The expression for the upper bound shape. + output_name (str): The expression used for the output. + + Returns: + str: The mask generation line. + """ + return f"{output_name} = {index_name} < {shape_val}" + def store_output( self, indices: Union[list[Any], tuple[Any]], @@ -1023,6 +1109,7 @@ def store_output( mask: Optional[str] = None, indent_width: int = 4, val_shape: Optional[tuple[str]] = None, + block_indexing: bool = False, ): """Stores the final output and appends any epilogue fusions if the buffer hasn't been optimized away. @@ -1034,11 +1121,14 @@ def store_output( will be applied to the store. indent_width (int): The number of spaces to use for indentation. This is used when the call to store_output is indented in the kernel definition. + block_indexing (bool): Are the input indices presented as offsets for creating the block (e.g. + inputs to TMA) or are they tensors that should be passed in directly. """ with self.create_subgraph_body(""): assert isinstance(indices, (list, tuple)) assert isinstance(val, str) assert isinstance(mask, (str, type(None))) + assert isinstance(block_indexing, bool) assert self.template_mask is None indices = list(map(OpOverrides.paren, indices)) index_symbols = [sympy.Symbol(x, integer=True) for x in indices] @@ -1047,27 +1137,136 @@ def store_output( ] assert len(indices) == len(lengths) - # glue to make generated code use same indexing from template - for name, range_tree_entry in zip( - indices, self.range_trees[0].construct_entries(lengths) - ): - range_tree_entry.set_name(name) - contiguous_index = sympy_dot( - ir.FlexibleLayout.contiguous_strides(lengths), index_symbols - ) - contiguous_index = self.rename_indexing(contiguous_index) - self.body.writeline("xindex = " + texpr(contiguous_index)) - self.range_trees[0].lookup(sympy.S.One, sympy_product(lengths)).set_name( - "xindex" - ) - self.template_mask = mask - self.template_out_shape = val_shape if val_shape else val - self.template_indices = indices - output_index = self.output_node.get_layout().make_indexer()(index_symbols) - output_index = self.rename_indexing(output_index) - if output_index == contiguous_index: - output_index = sympy.Symbol("xindex", integer=True) + output_layout = self.output_node.get_layout() + self.template_out = val + if block_indexing: + assert val_shape, "Blocking indexing requires passing in val_shape" + assert len(val_shape) == 2, ( + "Blocking indexing only supports 2D data at this time" + ) + assert not mask, "Mask is not supported with blocking indexing" + intermediate_lines: list[str] = [] + epilogue_index_symbols: list[sympy.Symbol] = [] + if self.tma_store: + # Generate the expected indexing symbols. + # Note: TMA indices are expected to be in the + # format (x, y), but the range_tree is always + # (yindex, xindex). + index_order = [1, 0] + val_shape_copy = list(val_shape) + for i, range_tree in zip(index_order, self.range_trees[:-1]): + name = range_tree.name + symbol = range_tree.symbol() + epilogue_index_symbols.append(symbol) + lookup_output = range_tree.lookup(sympy.S.One, lengths[i]) + old_name = lookup_output.symbol() + lookup_output.set_name(name) + # Update var_list and var_range + range_tree.var_list[range_tree.var_list.index(old_name)] = ( + symbol + ) + range_val = range_tree.var_ranges[old_name] + del range_tree.var_ranges[old_name] + range_tree.var_ranges[symbol] = range_val + intermediate_lines.extend( + self._generate_index_from_tma_index( + name, + "xoffset" if name == "xindex" else "yoffset", + index_symbols[i], + val_shape[i], + i, + len(index_order), + block_name=range_tree.symt.name, + ) + ) + # Generate the xmask and ymask + intermediate_lines.append( + self._generated_mask_for_tma( + name, + self.size(None, i), + "xmask" if name == "xindex" else "ymask", + ) + ) + # Update the val_shape information to use consistent naming + # after the remapping. + val_shape_copy[i] = range_tree.symt.name + # Reverse the index symbols because TMA is indexed + # as (x, y) whereas the variables will naturally be indexed + # as (y, x) + epilogue_index_symbols.reverse() + val_shape = tuple(val_shape_copy) + else: + mask_vars: list[str] = [] + for i, (index, shape) in enumerate(zip(index_symbols, val_shape)): + index_name = self._gen_tmp_var() + offset_name = self._gen_tmp_var() + intermediate_lines.extend( + self._generate_index_from_tma_index( + index_name, + offset_name, + index, + shape, + i, + len(index_symbols), + ) + ) + epilogue_index_symbols.append( + sympy.Symbol(index_name, integer=True) + ) + mask_name = self._gen_tmp_var() + intermediate_lines.append( + self._generated_mask_for_tma( + index_name, + self.size(None, i), + mask_name, + ) + ) + mask_vars.append(mask_name) + final_mask_var = self._gen_tmp_var() + final_mask_rhs = " & ".join( + f"{mask_name}" for mask_name in mask_vars + ) + intermediate_lines.append(f"{final_mask_var} = {final_mask_rhs}") + self.template_mask = final_mask_var + index_symbols = epilogue_index_symbols + contiguous_index = sympy_dot(output_layout.stride, index_symbols) + if not self.tma_store: + # Convert to just use xindex. + contiguous_index = self.rename_indexing(contiguous_index) + intermediate_lines.append(f"xindex = {texpr(contiguous_index)}") + self.range_trees[0].lookup( + sympy.S.One, sympy_product(lengths) + ).set_name("xindex") + index_symbols = epilogue_index_symbols + output_index = contiguous_index + # Write out the intermediate lines + for line in intermediate_lines: + self.body.writeline(line) + else: + assert not self.tma_store, "TMA store requires block indexing" + # glue to make generated code use same indexing from template + for name, range_tree_entry in zip( + indices, self.range_trees[0].construct_entries(lengths) + ): + range_tree_entry.set_name(name) + contiguous_index = sympy_dot( + ir.FlexibleLayout.contiguous_strides(lengths), index_symbols + ) + contiguous_index = self.rename_indexing(contiguous_index) + self.body.writeline("xindex = " + texpr(contiguous_index)) + self.range_trees[0].lookup( + sympy.S.One, sympy_product(lengths) + ).set_name("xindex") + self.template_mask = mask + self.template_indices = indices + output_index = self.output_node.get_layout().make_indexer()( + index_symbols + ) + output_index = self.rename_indexing(output_index) + if output_index == contiguous_index: + output_index = sympy.Symbol("xindex", integer=True) + self.template_out_shape = val_shape if val_shape else val acc_dtype = ( triton_type_to_torch(self.meta["ACC_TYPE"]) if "ACC_TYPE" in self.meta @@ -1095,6 +1294,7 @@ def store_output( self.output_node.get_name(), output_index, self.epilogue_fn(*epilogue_args), + mode="tma" if self.tma_store else None, ) self.codegen_body() @@ -1330,6 +1530,7 @@ def make_key( suffix_args: int, epilogue_fn: Optional[Callable[..., Any]], epilogue_fn_hash: Optional[str], + tma_store: bool, subgraphs: Optional[list[ir.Buffer]], # has to be none to cache workspace_arg: Optional[WorkspaceArg], # has to be none to cache layout: ir.Layout, @@ -1386,6 +1587,7 @@ def has_flexible_layout() -> bool: "num_consumer_groups": num_consumer_groups, "num_buffers_warp_spec": num_buffers_warp_spec, "epilogue_fn_hash": epilogue_fn_hash, + "tma_store": tma_store, "kwargs": kwargs, "hint_override": hint_override, } @@ -1500,6 +1702,7 @@ def generate_and_load( kwargs: dict[str, Any], generate_with_caching, hint_override: Optional[int] = None, + tma_store: bool = False, ) -> Optional[GenerateAndLoadResult]: """Generate the python code and load it into the current process""" caching_enabled = ( @@ -1518,6 +1721,7 @@ def generate_and_load( suffix_args, epilogue_fn, epilogue_fn_hash, + tma_store, subgraphs, workspace_arg, layout, @@ -1577,6 +1781,7 @@ def make_kernel(): workspace_arg=workspace_arg, use_jit=False, hint_override=hint_override, + tma_store=tma_store, **kernel_options, ) @@ -1697,6 +1902,7 @@ def generate( # type: ignore[override] workspace_arg: Optional[WorkspaceArg] = None, generate_with_caching=False, hint_override: Optional[int] = None, + tma_store: bool = False, **kwargs, ): """This function generates a TritonTemplateCaller @@ -1742,6 +1948,7 @@ def generate( # type: ignore[override] kwargs, generate_with_caching and self._cache_codegen_enabled_for_template, hint_override=hint_override, + tma_store=tma_store, ) # May happen as result of dev by 0. @@ -1795,6 +2002,7 @@ def make_kernel_render(out_node, hint_override: Optional[int] = None): workspace_arg=workspace_arg, use_jit=False, hint_override=hint_override, + tma_store=tma_store, **options, ) render = functools.partial( diff --git a/torch/_inductor/template_heuristics/triton.py b/torch/_inductor/template_heuristics/triton.py index 7fb1541d172ae..731baa650e37e 100644 --- a/torch/_inductor/template_heuristics/triton.py +++ b/torch/_inductor/template_heuristics/triton.py @@ -1642,6 +1642,7 @@ def _get_template_configs_impl( "NUM_SMS": get_num_sms(), "TMA_SIZE": TMA_DESCRIPTOR_SIZE, "TMA_EXPERIMENTAL_API": not has_triton_stable_tma_api(), + "tma_store": config.triton.enable_template_tma_store, } # Get base template configs from superclass for template_kwargs in super()._get_template_configs_impl( diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py index b0619e6592ff1..b54c9dfe4965a 100644 --- a/torch/_inductor/utils.py +++ b/torch/_inductor/utils.py @@ -1664,7 +1664,9 @@ def use_triton_template( ) -def can_use_tma(*matrices: IRNode, add_guards: bool = False) -> bool: +def can_use_tma( + *matrices: IRNode, output_layout: Optional[Layout] = None, add_guards: bool = False +) -> bool: """ Return True iff *all* supplied tensors satisfy the CUDA-12.9 TMA constraints that Triton relies on today. @@ -1686,11 +1688,37 @@ def can_use_tma(*matrices: IRNode, add_guards: bool = False) -> bool: def _aligned(expr_bytes: Union[int, sympy.Expr]) -> bool: return V.graph.sizevars.statically_known_multiple_of(expr_bytes, TMA_ALIGNMENT) - def _is_tma_compatible_default(x: IRNode) -> bool: - sizes = x.get_size() - strides = x.get_stride() + def _is_tma_compatible_layout(layout: Optional[Layout]) -> bool: + if layout is None: + return True + sizes = layout.size + strides = layout.stride + dtype = layout.dtype + + # Verify the output is 16-byte aligned + if not _aligned(layout.offset): + return False + + return _is_tma_compatible(sizes, strides, dtype, allow_float32=True) + + def _is_tma_compatible_matrix(m: IRNode) -> bool: + sizes = m.get_size() + strides = m.get_stride() + dtype = m.get_dtype() + + # Base pointer 16-byte aligned + if m.get_name() in V.graph.unaligned_buffers: + return False + + return _is_tma_compatible(sizes, strides, dtype, allow_float32=False) + + def _is_tma_compatible( + sizes: Sequence[sympy.Expr], + strides: Sequence[_IntLike], + dtype: torch.dtype, + allow_float32: bool, + ) -> bool: rank = len(sizes) - dtype = x.get_dtype() itemsize = dtype.itemsize # 2 ≤ rank ≤ 5 @@ -1698,11 +1726,9 @@ def _is_tma_compatible_default(x: IRNode) -> bool: return False # dtype ∈ {FP16, BF16, FP8-E4M3FN} - if dtype not in (torch.float16, torch.bfloat16, torch.float8_e4m3fn): - return False - - # Base pointer 16-byte aligned - if x.get_name() in V.graph.unaligned_buffers: + if dtype not in (torch.float16, torch.bfloat16, torch.float8_e4m3fn) and ( + not allow_float32 or dtype != torch.float32 + ): return False if add_guards: @@ -1746,31 +1772,20 @@ def _is_tma_compatible_default(x: IRNode) -> bool: return True - def _is_tma_compatible_xpu(x: IRNode) -> bool: - strides = x.get_stride() - strides_i = [V.graph.sizevars.symbolic_hint(st) for st in strides] - # Find the single contiguous (“inner”) dim - inner = [ - i - for i, st in enumerate(strides_i) - if V.graph.sizevars.statically_known_equals(st, 1) - ] - if len(inner) != 1: - return False - return True - - return has_triton_tma_device() and all( - _is_tma_compatible_default(m) - if (m_device := m.get_device()) is None or m_device.type != "xpu" - else _is_tma_compatible_xpu(m) - for m in matrices + return ( + has_triton_tma_device() + and all(_is_tma_compatible_matrix(m) for m in matrices) + and _is_tma_compatible_layout(output_layout) ) -def use_triton_tma_template(*matrices: IRNode, add_guards: bool = False) -> bool: +def use_triton_tma_template( + *matrices: IRNode, output_layout: Layout, add_guards: bool = False +) -> bool: + layout = output_layout if config.triton.enable_template_tma_store else None return ( all(len(m.get_size()) == 2 for m in matrices) - and can_use_tma(*matrices, add_guards=add_guards) + and can_use_tma(*matrices, output_layout=layout, add_guards=add_guards) and config.triton.enable_persistent_tma_matmul ) From 84186c39ed0330c155ec1e1bc5c8c9d46858ea9b Mon Sep 17 00:00:00 2001 From: Thien Tran Date: Sun, 14 Sep 2025 06:17:33 +0000 Subject: [PATCH 227/693] [NVRTC] Enable compiling templated kernels (#162875) Per NVRTC doc - https://docs.nvidia.com/cuda/nvrtc/index.html#accessing-lowered-names, we can compile a templated kernel (e.g. `kernel`) with the following steps NVRTC side - (new) `nvrtcAddNameExpression` -> C++ template e.g. `f` - `nvrtcCompileProgram` - (new) `nvrtcGetLoweredName` -> get mangled name. need to do a copy since later this string is freed after NVRTC program is destroyed - `nvrtcDestroyProgram` CUDA side - use mangled name instead of normal name -> profit - `extern "C"` is not even needed Pull Request resolved: https://github.com/pytorch/pytorch/pull/162875 Approved by: https://github.com/msaroufim --- test/test_cuda.py | 61 ++++++++++++++++++++++++++++++++++++++++++ torch/cuda/__init__.py | 10 +++---- torch/cuda/_utils.py | 28 ++++++++++++++----- 3 files changed, 86 insertions(+), 13 deletions(-) diff --git a/test/test_cuda.py b/test/test_cuda.py index b5db4bac59f9a..6a4155825c7c6 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -7160,6 +7160,67 @@ def test_compile_kernel_cuda_headers(self): expected = torch.full((n,), test_value, device="cuda", dtype=torch.float16) torch.testing.assert_close(output, expected, rtol=1e-3, atol=1e-3) + @unittest.skipIf(not TEST_CUDA, "No CUDA") + def test_compile_kernel_template(self): + kernel_source = """ + template + __global__ void add_tensors(const T* a, const T* b, T* c, int n) { + int i = threadIdx.x + blockIdx.x * blockDim.x; + if (i < n) + c[i] = a[i] + b[i]; + } + """ + + # Compile the kernel + from torch.cuda import _compile_kernel + + add_kernel_float = _compile_kernel(kernel_source, "add_tensors") + + # Prepare data + N = 1024 + a = torch.rand(N, device="cuda") + b = torch.rand(N, device="cuda") + c = torch.empty_like(a) + + # Calculate grid and block dimensions + threads_per_block = 256 + blocks_per_grid = (N + threads_per_block - 1) // threads_per_block + + # Launch kernel + add_kernel_float( + grid=(blocks_per_grid, 1, 1), + block=(threads_per_block, 1, 1), + args=[a, b, c, N], + ) + + # Verify results + expected = a + b + self.assertEqual(c, expected) + + # do again with different dtype + add_kernel_int = _compile_kernel(kernel_source, "add_tensors") + + # Prepare data + N = 1024 + a = torch.randint(-1000, 1000, size=(N,), dtype=torch.int, device="cuda") + b = torch.randint(-1000, 1000, size=(N,), dtype=torch.int, device="cuda") + c = torch.empty_like(a) + + # Calculate grid and block dimensions + threads_per_block = 256 + blocks_per_grid = (N + threads_per_block - 1) // threads_per_block + + # Launch kernel + add_kernel_int( + grid=(blocks_per_grid, 1, 1), + block=(threads_per_block, 1, 1), + args=[a, b, c, N], + ) + + # Verify results + expected = a + b + self.assertEqual(c, expected) + @unittest.skipIf(not TEST_CUDA, "CUDA not available, skipping tests") class TestCudaDeviceParametrized(TestCase): diff --git a/torch/cuda/__init__.py b/torch/cuda/__init__.py index 01bc4d73a4595..70a7269d4404e 100644 --- a/torch/cuda/__init__.py +++ b/torch/cuda/__init__.py @@ -1772,12 +1772,10 @@ def _compile_kernel( >>> c = torch.empty_like(a) >>> add_kernel(grid=(4, 1, 1), block=(256, 1, 1), args=[a, b, c, a.numel()]) """ - import ctypes - from torch.cuda._utils import _cuda_load_module, _nvrtc_compile # Compile the kernel to PTX - ptx = _nvrtc_compile( + ptx, mangled_name = _nvrtc_compile( kernel_source, kernel_name, compute_capability, @@ -1787,14 +1785,14 @@ def _compile_kernel( ) # Load the module and get the kernel - result = _cuda_load_module(ptx, [kernel_name]) + result = _cuda_load_module(ptx, [mangled_name]) if isinstance(result, dict): - return result[kernel_name] + return result[mangled_name] else: # This branch shouldn't be executed if kernel_names is provided, # but MyPy needs this to understand type narrowing - return getattr(result, kernel_name) + return getattr(result, mangled_name) from . import amp, jiterator, nvtx, profiler, sparse, tunable diff --git a/torch/cuda/_utils.py b/torch/cuda/_utils.py index dc269fa629aaf..c09ccb70ab290 100644 --- a/torch/cuda/_utils.py +++ b/torch/cuda/_utils.py @@ -65,6 +65,8 @@ def _get_hiprtc_library() -> ctypes.CDLL: lib.nvrtcGetPTX = lib.hiprtcGetCode # type: ignore[attr-defined] lib.nvrtcGetProgramLogSize = lib.hiprtcGetProgramLogSize # type: ignore[attr-defined] lib.nvrtcGetProgramLog = lib.hiprtcGetProgramLog # type: ignore[attr-defined] + lib.nvrtcAddNameExpression = lib.hiprtcAddNameExpression # type: ignore[attr-defined] + lib.nvrtcGetLoweredName = lib.hiprtcGetLoweredName # type: ignore[attr-defined] return lib @@ -115,7 +117,7 @@ def _nvrtc_compile( header_code: str = "", cuda_include_dirs: Optional[list] = None, nvcc_options: Optional[list] = None, -) -> bytes: +) -> tuple[bytes, str]: """ Compiles a CUDA kernel using NVRTC and returns the PTX code. @@ -129,7 +131,7 @@ def _nvrtc_compile( nvcc_options (list, None): Additional options to pass to NVRTC Returns: - str: The compiled PTX code + Tuple[bytes, str]: The compiled PTX code and mangled kernel name """ # Ensure CUDA is initialized import torch.cuda @@ -152,10 +154,6 @@ def check_nvrtc(result: int) -> None: ) raise RuntimeError(f"CUDA error: {error_message}") - # Add 'extern "C"' if not already present to ensure C linkage - if not kernel_source.strip().startswith('extern "C"'): - kernel_source = f'extern "C" {kernel_source}' - # Combine header code and kernel source if header_code: full_source = header_code + "\n" + kernel_source @@ -217,6 +215,10 @@ def check_nvrtc(result: int) -> None: ) ) + # Add kernel name, which can be a template expression + c_kernel_name = kernel_name.encode("utf-8") + check_nvrtc(libnvrtc.nvrtcAddNameExpression(prog, c_kernel_name)) + # Compile program res = libnvrtc.nvrtcCompileProgram(prog, num_options, options_array) @@ -234,12 +236,24 @@ def check_nvrtc(result: int) -> None: check_nvrtc(libnvrtc.nvrtcGetPTXSize(prog, ctypes.byref(ptx_size))) ptx = ctypes.create_string_buffer(ptx_size.value) check_nvrtc(libnvrtc.nvrtcGetPTX(prog, ptx)) + + # Get mangled name + c_mangled_name = ctypes.c_char_p() + check_nvrtc( + libnvrtc.nvrtcGetLoweredName(prog, c_kernel_name, ctypes.byref(c_mangled_name)) + ) + if c_mangled_name.value is not None: + mangled_name = c_mangled_name.value.decode() # make a copy + else: + mangled_name = "" + libnvrtc.nvrtcDestroyProgram(ctypes.byref(prog)) # For HIP, hipRTC generates raw CO binaries instead of PTX, # and for some reason, ".value" causes the string to be truncated, # likely due to the presence of '\0' in the string. So we use .raw instead. - return ptx.raw if torch.version.hip else ptx.value + ptx_bytes = ptx.raw if torch.version.hip else ptx.value + return ptx_bytes, mangled_name class _CudaModule: From 972140b7e9af066dcc67e4dad2213220b61ec719 Mon Sep 17 00:00:00 2001 From: angelayi Date: Sun, 14 Sep 2025 07:41:06 +0000 Subject: [PATCH 228/693] [benchmark] Add HF LLM benchmarks (#156967) Results in https://docs.google.com/spreadsheets/d/1xXOPg9JjEmPx0zc5QBNdyXQq8-K2_r4ybHaiS-q7pZ0/edit?gid=88695043#gid=88695043 Pull Request resolved: https://github.com/pytorch/pytorch/pull/156967 Approved by: https://github.com/huydhn Co-authored-by: Huy Do --- .github/workflows/inductor-periodic.yml | 4 +- benchmarks/dynamo/check_accuracy.py | 6 ++ benchmarks/dynamo/check_graph_breaks.py | 6 ++ .../aot_eager_huggingface_inference.csv | 20 ++++ .../aot_eager_huggingface_training.csv | 20 ++++ .../aot_inductor_huggingface_inference.csv | 20 ++++ ...tor_amp_freezing_huggingface_inference.csv | 20 ++++ ...nductor_freezing_huggingface_inference.csv | 20 ++++ .../cpu_inductor_huggingface_inference.csv | 20 ++++ ...ynamic_aot_eager_huggingface_inference.csv | 20 ++++ ...dynamic_aot_eager_huggingface_training.csv | 20 ++++ ...mic_cpu_inductor_huggingface_inference.csv | 20 ++++ ...dynamic_inductor_huggingface_inference.csv | 20 ++++ .../dynamic_inductor_huggingface_training.csv | 20 ++++ .../dynamo_eager_huggingface_inference.csv | 20 ++++ .../inductor_huggingface_inference.csv | 20 ++++ .../inductor_huggingface_training.csv | 20 ++++ benchmarks/dynamo/common.py | 94 +++++++++++----- benchmarks/dynamo/huggingface.py | 69 ++++++++---- benchmarks/dynamo/huggingface.yaml | 14 ++- benchmarks/dynamo/huggingface_llm_models.py | 102 ++++++++++++++++++ benchmarks/dynamo/huggingface_models_list.txt | 5 + 22 files changed, 533 insertions(+), 47 deletions(-) create mode 100644 benchmarks/dynamo/huggingface_llm_models.py diff --git a/.github/workflows/inductor-periodic.yml b/.github/workflows/inductor-periodic.yml index e2395087326a2..454cd166c90bb 100644 --- a/.github/workflows/inductor-periodic.yml +++ b/.github/workflows/inductor-periodic.yml @@ -39,7 +39,7 @@ jobs: runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" build-environment: linux-jammy-cuda12.8-py3.10-gcc9-sm86 docker-image-name: ci-image:pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks - cuda-arch-list: '8.6' + cuda-arch-list: '8.0;8.6' test-matrix: | { include: [ { config: "dynamo_eager_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, @@ -62,7 +62,7 @@ jobs: { config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" }, + { config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.aws.a100" }, { config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, diff --git a/benchmarks/dynamo/check_accuracy.py b/benchmarks/dynamo/check_accuracy.py index 678cee5f752c3..4bd518790b3c6 100644 --- a/benchmarks/dynamo/check_accuracy.py +++ b/benchmarks/dynamo/check_accuracy.py @@ -72,6 +72,12 @@ def check_accuracy(actual_csv, expected_csv, expected_filename): "timm_vovnet", "torchrec_dlrm", "vgg16", + # LLM + "meta-llama/Llama-3.2-1B", + "google/gemma-2-2b", + "google/gemma-3-4b-it", + "openai/whisper-tiny", + "Qwen/Qwen3-0.6B", } ) diff --git a/benchmarks/dynamo/check_graph_breaks.py b/benchmarks/dynamo/check_graph_breaks.py index 57814dacd00b3..87ef46b68324d 100644 --- a/benchmarks/dynamo/check_graph_breaks.py +++ b/benchmarks/dynamo/check_graph_breaks.py @@ -55,6 +55,12 @@ def check_graph_breaks(actual_csv, expected_csv, expected_filename): "timm_nfnet", "torchrec_dlrm", "vgg16", + # LLM + "meta-llama/Llama-3.2-1B", + "google/gemma-2-2b", + "google/gemma-3-4b-it", + "openai/whisper-tiny", + "Qwen/Qwen3-0.6B", } ) diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_inference.csv index 0f088e7892d8f..b759310805957 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,5 + + + +google/gemma-2-2b,pass,5 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass,6 + + + +Qwen/Qwen3-0.6B,pass,5 diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_training.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_training.csv index f65909f3a24ea..37e1b792b3dc3 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_huggingface_training.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,5 YituTechConvBert,pass,5 + + + +meta-llama/Llama-3.2-1B,eager_fail_to_run,0 + + + +google/gemma-2-2b,eager_fail_to_run,0 + + + +google/gemma-3-4b-it,eager_fail_to_run,0 + + + +openai/whisper-tiny,eager_fail_to_run,0 + + + +Qwen/Qwen3-0.6B,eager_fail_to_run,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_huggingface_inference.csv index 169a42ff7cd41..2283da42b1455 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/aot_inductor_huggingface_inference.csv @@ -167,3 +167,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,fail_accuracy,0 + + + +google/gemma-2-2b,fail_accuracy,0 + + + +google/gemma-3-4b-it,fail_accuracy,0 + + + +openai/whisper-tiny,fail_to_run,0 + + + +Qwen/Qwen3-0.6B,fail_accuracy,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_huggingface_inference.csv index 0f088e7892d8f..386f9099731c7 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass_due_to_skip,0 + + + +google/gemma-2-2b,pass_due_to_skip,0 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass_due_to_skip,0 + + + +Qwen/Qwen3-0.6B,pass_due_to_skip,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_huggingface_inference.csv index 0f088e7892d8f..386f9099731c7 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass_due_to_skip,0 + + + +google/gemma-2-2b,pass_due_to_skip,0 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass_due_to_skip,0 + + + +Qwen/Qwen3-0.6B,pass_due_to_skip,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv index 0f088e7892d8f..386f9099731c7 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass_due_to_skip,0 + + + +google/gemma-2-2b,pass_due_to_skip,0 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass_due_to_skip,0 + + + +Qwen/Qwen3-0.6B,pass_due_to_skip,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_inference.csv index 0f088e7892d8f..b759310805957 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,5 + + + +google/gemma-2-2b,pass,5 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass,6 + + + +Qwen/Qwen3-0.6B,pass,5 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_training.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_training.csv index f65909f3a24ea..37e1b792b3dc3 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_huggingface_training.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,5 YituTechConvBert,pass,5 + + + +meta-llama/Llama-3.2-1B,eager_fail_to_run,0 + + + +google/gemma-2-2b,eager_fail_to_run,0 + + + +google/gemma-3-4b-it,eager_fail_to_run,0 + + + +openai/whisper-tiny,eager_fail_to_run,0 + + + +Qwen/Qwen3-0.6B,eager_fail_to_run,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv index 0f088e7892d8f..b2595458b1322 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,0 + + + +google/gemma-2-2b,pass,0 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass,0 + + + +Qwen/Qwen3-0.6B,pass,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_inference.csv index 0f088e7892d8f..b5e1a0989e74a 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,5 + + + +google/gemma-2-2b,pass,5 + + + +google/gemma-3-4b-it,pass,0 + + + +openai/whisper-tiny,pass,6 + + + +Qwen/Qwen3-0.6B,pass,5 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_training.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_training.csv index f65909f3a24ea..37e1b792b3dc3 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_huggingface_training.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,5 YituTechConvBert,pass,5 + + + +meta-llama/Llama-3.2-1B,eager_fail_to_run,0 + + + +google/gemma-2-2b,eager_fail_to_run,0 + + + +google/gemma-3-4b-it,eager_fail_to_run,0 + + + +openai/whisper-tiny,eager_fail_to_run,0 + + + +Qwen/Qwen3-0.6B,eager_fail_to_run,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_huggingface_inference.csv index 0f088e7892d8f..b759310805957 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,5 + + + +google/gemma-2-2b,pass,5 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass,6 + + + +Qwen/Qwen3-0.6B,pass,5 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv index 0f088e7892d8f..b759310805957 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,5 + + + +google/gemma-2-2b,pass,5 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass,6 + + + +Qwen/Qwen3-0.6B,pass,5 diff --git a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv index f65909f3a24ea..37e1b792b3dc3 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,5 YituTechConvBert,pass,5 + + + +meta-llama/Llama-3.2-1B,eager_fail_to_run,0 + + + +google/gemma-2-2b,eager_fail_to_run,0 + + + +google/gemma-3-4b-it,eager_fail_to_run,0 + + + +openai/whisper-tiny,eager_fail_to_run,0 + + + +Qwen/Qwen3-0.6B,eager_fail_to_run,0 diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py index 2901009f7c4d1..922fc977eb198 100644 --- a/benchmarks/dynamo/common.py +++ b/benchmarks/dynamo/common.py @@ -733,7 +733,7 @@ def vary_batch(t: torch.Tensor, new_batch_size) -> torch.Tensor: time_total = 0 # Dont collect outputs to correctly measure timing - for _ in range(times): + for i in range(times): # If batch_size is 1, it too often collides with other non batch size # dimensions resulting in errors. if batch_size and batch_size > 1: @@ -1106,7 +1106,13 @@ def maybe_mark_profile(*args, **kwargs): elif args.torchscript_jit_trace: frozen_model_iter_fn = torchscript_jit_trace(model, example_inputs) else: - frozen_model_iter_fn = torch._dynamo.run(model_iter_fn) + if kwargs["hf_llm"]: + # If it's an llm, we want to optimize model.forward, and use + # the generate function + model.forward = torch._dynamo.run(model) + frozen_model_iter_fn = model_iter_fn + else: + frozen_model_iter_fn = torch._dynamo.run(model_iter_fn) for rep in trange(args.repeat, desc="running benchmark"): inputs = ( @@ -1120,7 +1126,10 @@ def maybe_mark_profile(*args, **kwargs): maybe_mark_step(args) # interleave the runs to handle frequency scaling and load changes - with maybe_mark_profile(p=p, mark="expected"): + with ( + maybe_mark_profile(p=p, mark="expected"), + torch.compiler.set_stance("force_eager"), + ): timings[rep, 0], expected_output = timed( model, model_iter_fn, @@ -2233,11 +2242,12 @@ def record_status(accuracy_status, dynamo_start_stats): reset_rng_state() model_copy = None try: - model_copy = self.deepcopy_and_maybe_parallelize(model) - self.init_optimizer(name, current_device, model_copy.parameters()) - correct_result = self.run_n_iterations( - model_copy, clone_inputs(example_inputs), self.model_iter_fn - ) + with torch.compiler.set_stance("force_eager"): + model_copy = self.deepcopy_and_maybe_parallelize(model) + self.init_optimizer(name, current_device, model_copy.parameters()) + correct_result = self.run_n_iterations( + model_copy, clone_inputs(example_inputs), self.model_iter_fn + ) except Exception as e: accuracy_status = ( "eager_1st_run_OOM" @@ -2254,11 +2264,12 @@ def record_status(accuracy_status, dynamo_start_stats): reset_rng_state() model_copy = None try: - model_copy = self.deepcopy_and_maybe_parallelize(model) - self.init_optimizer(name, current_device, model_copy.parameters()) - correct_rerun_result = self.run_n_iterations( - model_copy, clone_inputs(example_inputs), self.model_iter_fn - ) + with torch.compiler.set_stance("force_eager"): + model_copy = self.deepcopy_and_maybe_parallelize(model) + self.init_optimizer(name, current_device, model_copy.parameters()) + correct_rerun_result = self.run_n_iterations( + model_copy, clone_inputs(example_inputs), self.model_iter_fn + ) except Exception as e: accuracy_status = ( "eager_2nd_run_OOM" @@ -2542,7 +2553,11 @@ def warmup(fn, model, example_inputs, mode, niters=10): ) baseline_timings = experiment( - model, example_inputs, mark="expected", **experiment_kwargs + self.model_iter_fn, + model, + example_inputs, + mark="expected", + **experiment_kwargs, ) if self.args.export_aot_inductor: @@ -2610,7 +2625,11 @@ def warmup(fn, model, example_inputs, mode, niters=10): ) backend_timings = experiment( - model, example_inputs, mark="expected", **experiment_kwargs + self.model_iter_fn, + model, + example_inputs, + mark="expected", + **experiment_kwargs, ) timings = np.stack((baseline_timings, backend_timings), axis=1) result_summary = latency_experiment_summary( @@ -2629,9 +2648,17 @@ def run_performance_test( tag=None, batch_size=None, ): + niters = 5 + if getattr(self, "hf_llm", False): + # If we're benchmarking an llm, we want to use the generate function + self.model_iter_fn = self.generate + niters = 1 + if self.args.xla: with self.pick_grad(name, self.args.training): - return experiment(*self.maybe_cast(model, example_inputs)) + return experiment( + self.model_iter_fn, *self.maybe_cast(model, example_inputs) + ) def warmup(fn, model, example_inputs, mode, niters=5): gc.collect() @@ -2696,17 +2723,22 @@ def warmup(fn, model, example_inputs, mode, niters=5): with maybe_snapshot_memory( self.args.snapshot_memory, f"eager_{self.args.only}" ): - eager_latency, eager_peak_mem, _ = warmup( - self.model_iter_fn, copy.deepcopy(model), example_inputs, "eager" - ) - if self.args.use_warm_peak_memory: - _, eager_peak_mem, _ = warmup( + with torch.compiler.set_stance("force_eager"): + eager_latency, eager_peak_mem, _ = warmup( self.model_iter_fn, copy.deepcopy(model), example_inputs, "eager", - niters=1, + niters=niters, ) + if self.args.use_warm_peak_memory: + _, eager_peak_mem, _ = warmup( + self.model_iter_fn, + copy.deepcopy(model), + example_inputs, + "eager", + niters=1, + ) if ( self.args.export_aot_inductor @@ -2715,7 +2747,13 @@ def warmup(fn, model, example_inputs, mode, niters=5): ): optimized_model_iter_fn = optimize_ctx else: - optimized_model_iter_fn = optimize_ctx(self.model_iter_fn) + if getattr(self, "hf_llm", False): + # If it's an llm, we want to optimize model.forward, and use + # the generate function + model = optimize_ctx(model) + optimized_model_iter_fn = self.model_iter_fn + else: + optimized_model_iter_fn = optimize_ctx(self.model_iter_fn) with maybe_snapshot_memory( self.args.snapshot_memory, f"compiled_{self.args.only}" @@ -2793,7 +2831,13 @@ def warmup(fn, model, example_inputs, mode, niters=5): f"{ok:3}/{total:3} +{frames_third_pass} frames {compilation_time:3.0f}s" ) - results.append(experiment(model, example_inputs, **experiment_kwargs)) + experiment_kwargs["hf_llm"] = getattr(self, "hf_llm", False) + + results.append( + experiment( + self.model_iter_fn, model, example_inputs, **experiment_kwargs + ) + ) return " ".join(map(str, results)) def minify_model( @@ -4084,7 +4128,7 @@ def model_iter_fn_and_mark_step(*args, **kwargs): # Overwrite 'translation_validation' config, if specified. torch.fx.experimental._config.translation_validation = False - experiment = functools.partial(experiment, args, runner.model_iter_fn) + experiment = functools.partial(experiment, args) if args.only and should_diff_branch(args): import git diff --git a/benchmarks/dynamo/huggingface.py b/benchmarks/dynamo/huggingface.py index 76026731fe890..cceb448a849c1 100755 --- a/benchmarks/dynamo/huggingface.py +++ b/benchmarks/dynamo/huggingface.py @@ -7,6 +7,7 @@ import re import subprocess import sys +import types import warnings @@ -128,6 +129,12 @@ def process_hf_reformer_output(out): assert len(BATCH_SIZE_KNOWN_MODELS) +try: + from .huggingface_llm_models import HF_LLM_MODELS +except ImportError: + from huggingface_llm_models import HF_LLM_MODELS + + def get_module_cls_by_model_name(model_cls_name): _module_by_model_name = { "Speech2Text2Decoder": "transformers.models.speech_to_text_2.modeling_speech_to_text_2", @@ -418,11 +425,8 @@ def load_model( use_eval_mode = self.args.use_eval_mode dtype = torch.float32 reset_rng_state() - model_cls, config = self._get_model_cls_and_config(model_name) - model = self._download_model(model_name) - model = model.to(device, dtype=dtype) - if self.args.enable_activation_checkpointing: - model.gradient_checkpointing_enable() + + # Get batch size if model_name in BATCH_SIZE_KNOWN_MODELS: batch_size_default = BATCH_SIZE_KNOWN_MODELS[model_name] elif batch_size is None: @@ -440,14 +444,46 @@ def load_model( f"Running smaller batch size={batch_size} for {model_name}, orig batch_size={batch_size_default}" # noqa: G004 ) - example_inputs = generate_inputs_for_model( - model_cls, model, model_name, batch_size, device, include_loss_args=True - ) + # Get model and example inputs + if model_name in HF_LLM_MODELS: + benchmark_cls = HF_LLM_MODELS[model_name] + model, example_inputs = benchmark_cls.get_model_and_inputs( + model_name, device + ) + + # Set this flag so that when we test for speedup, we use + # model.generate instead of using model.forward + self.hf_llm = True + + def generate(self, _, example_inputs, collect_outputs=True): + return model.generate(**example_inputs) - # So we can check for correct gradients without eliminating the dropout computation - for attr in dir(config): - if "drop" in attr and isinstance(getattr(config, attr), float): - setattr(config, attr, 1e-30) + self.generate = types.MethodType(generate, self) + + else: + self.hf_llm = False + + model_cls, config = self._get_model_cls_and_config(model_name) + model = self._download_model(model_name) + model = model.to(device, dtype=dtype) + + example_inputs = generate_inputs_for_model( + model_cls, model, model_name, batch_size, device, include_loss_args=True + ) + + # So we can check for correct gradients without eliminating the dropout computation + for attr in dir(config): + if "drop" in attr and isinstance(getattr(config, attr), float): + setattr(config, attr, 1e-30) + + # Turning off kv cache for torchbench models. This is not the right + # thing to do, but the pt2 dashboard is outdated. Real transformers + # benchmarks will be added soon using a different infra. + if hasattr(model, "config") and hasattr(model.config, "use_cache"): + model.config.use_cache = False + + if self.args.enable_activation_checkpointing: + model.gradient_checkpointing_enable() if ( is_training @@ -460,12 +496,6 @@ def load_model( else: model.eval() - # Turning off kv cache for torchbench models. This is not the right - # thing to do, but the pt2 dashboard is outdated. Real transformers - # benchmarks will be added soon using a different infra. - if hasattr(model, "config") and hasattr(model.config, "use_cache"): - model.config.use_cache = False - self.validate_model(model, example_inputs) return device, model_name, model, example_inputs, batch_size @@ -530,7 +560,8 @@ def compute_loss(self, pred): def forward_pass(self, mod, inputs, collect_outputs=True): with self.autocast(**self.autocast_arg): - return mod(**inputs) + res = mod(**inputs) + return res.logits if self.hf_llm else res def forward_and_backward_pass(self, mod, inputs, collect_outputs=True): cloned_inputs = clone_inputs(inputs) diff --git a/benchmarks/dynamo/huggingface.yaml b/benchmarks/dynamo/huggingface.yaml index 5640776117096..b45f199f4d4cf 100644 --- a/benchmarks/dynamo/huggingface.yaml +++ b/benchmarks/dynamo/huggingface.yaml @@ -9,9 +9,16 @@ skip: # Fails with even batch size = 1 - GPTJForCausalLM - GPTJForQuestionAnswering + # Model too big + - google/gemma-3-4b-it device: - cpu: [] + cpu: + - meta-llama/Llama-3.2-1B + - google/gemma-2-2b + - google/gemma-3-4b-it + - openai/whisper-tiny + - Qwen/Qwen3-0.6B control_flow: - AllenaiLongformerBase @@ -67,6 +74,11 @@ batch_size: XGLMForCausalLM: 4 XLNetLMHeadModel: 2 YituTechConvBert: 2 + meta-llama/Llama-3.2-1B: 8 + google/gemma-2-2b: 8 + google/gemma-3-4b-it: 8 + openai/whisper-tiny: 8 + Qwen/Qwen3-0.6B: 8 tolerance: diff --git a/benchmarks/dynamo/huggingface_llm_models.py b/benchmarks/dynamo/huggingface_llm_models.py new file mode 100644 index 0000000000000..c8b0524c4d63b --- /dev/null +++ b/benchmarks/dynamo/huggingface_llm_models.py @@ -0,0 +1,102 @@ +import subprocess +import sys + +import torch + + +def pip_install(package): + subprocess.check_call([sys.executable, "-m", "pip", "install", package]) + + +try: + from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + WhisperForConditionalGeneration, + WhisperProcessor, + ) +except ModuleNotFoundError: + print("Installing HuggingFace Transformers...") + pip_install("git+https://github.com/huggingface/transformers.git#egg=transformers") +finally: + from transformers import ( + AutoModelForCausalLM, + AutoTokenizer, + WhisperForConditionalGeneration, + WhisperProcessor, + ) + + +class Benchmark: + @staticmethod + def get_model_and_inputs(model_name, device): + raise NotImplementedError("get_model_and_inputs() not implemented") + + +class WhisperBenchmark(Benchmark): + SAMPLE_RATE = 16000 + DURATION = 30.0 # seconds + + @staticmethod + def get_model_and_inputs(model_name, device): + processor = WhisperProcessor.from_pretrained(model_name) + model = WhisperForConditionalGeneration.from_pretrained(model_name).to(device) + model.config.forced_decoder_ids = None + + model.generation_config.do_sample = False + model.generation_config.temperature = 0.0 + + num_samples = int(WhisperBenchmark.DURATION * WhisperBenchmark.SAMPLE_RATE) + audio = torch.randn(num_samples) * 0.1 + inputs = dict( + processor( + audio, sampling_rate=WhisperBenchmark.SAMPLE_RATE, return_tensors="pt" + ) + ) + inputs["input_features"] = inputs["input_features"].to(device) + + decoder_start_token = model.config.decoder_start_token_id + inputs["decoder_input_ids"] = torch.tensor( + [[decoder_start_token]], device=device + ) + + return model, inputs + + +class TextGenerationBenchmark(Benchmark): + INPUT_LENGTH = 1000 + OUTPUT_LENGTH = 2000 + + @staticmethod + def get_model_and_inputs(model_name, device): + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device) + model.eval() + + model.generation_config.do_sample = False + model.generation_config.use_cache = True + model.generation_config.cache_implementation = "static" + model.generation_config.max_new_tokens = TextGenerationBenchmark.OUTPUT_LENGTH + model.generation_config.pad_token_id = tokenizer.eos_token_id + model.generation_config.temperature = 0.0 + + vocab_size = tokenizer.vocab_size + input_ids = torch.randint( + low=0, + high=vocab_size, + size=(1, TextGenerationBenchmark.INPUT_LENGTH), + device=device, + dtype=torch.long, + ) + example_inputs = {"input_ids": input_ids} + + return model, example_inputs + + +HF_LLM_MODELS: dict[str, Benchmark] = { + "meta-llama/Llama-3.2-1B": TextGenerationBenchmark, + "google/gemma-2-2b": TextGenerationBenchmark, + "google/gemma-3-4b-it": TextGenerationBenchmark, + "openai/whisper-tiny": WhisperBenchmark, + "Qwen/Qwen3-0.6B": TextGenerationBenchmark, +} diff --git a/benchmarks/dynamo/huggingface_models_list.txt b/benchmarks/dynamo/huggingface_models_list.txt index 12ceedd5c4ccc..0a6327ae1aad2 100644 --- a/benchmarks/dynamo/huggingface_models_list.txt +++ b/benchmarks/dynamo/huggingface_models_list.txt @@ -46,3 +46,8 @@ TrOCRForCausalLM,64 XGLMForCausalLM,32 XLNetLMHeadModel,16 YituTechConvBert,32 +meta-llama/Llama-3.2-1B,8 +google/gemma-2-2b,8 +google/gemma-3-4b-it,8 +openai/whisper-tiny,8 +Qwen/Qwen3-0.6B,8 From 6d64bc399073744fbc12dd4b6b8ed95c72bae9ba Mon Sep 17 00:00:00 2001 From: Clark Kang Date: Sun, 14 Sep 2025 08:09:08 +0000 Subject: [PATCH 229/693] [data foundation][vizard] Prevent checking the device type of numpy object in Tensorboard logger (#162888) Summary: The check is introduced in D82262053 - `scalar_value` could be a numpy object - Move the check of `device.type` into `make_np` method where it happens only when it's a `torch.Tensor`. Test Plan: ``` vizard launch -j 1x8 --launch=flow --config-path=pkg://vizard_projects.image_classification.configs --config-name=resnet50 ++flow.secure_group=ml_sensors ++flow.entitlement=ai_frameworks_pnb ++max_train_steps_per_epoch=10 ++max_epochs=5 ++log_every_n_steps=10 ++profiler=null ++max_eval_steps_per_epoch=10 ``` Rollback Plan: Differential Revision: D82383428 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162888 Approved by: https://github.com/xush6528 --- torch/utils/tensorboard/_convert_np.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch/utils/tensorboard/_convert_np.py b/torch/utils/tensorboard/_convert_np.py index 4e20ec6337c36..afa801343334b 100644 --- a/torch/utils/tensorboard/_convert_np.py +++ b/torch/utils/tensorboard/_convert_np.py @@ -20,6 +20,8 @@ def make_np(x: torch.Tensor) -> np.ndarray: if np.isscalar(x): return np.array([x]) if isinstance(x, torch.Tensor): + if x.device.type == "meta": + return np.random.randn(1) return _prepare_pytorch(x) raise NotImplementedError( f"Got {type(x)}, but numpy array or torch tensor are expected." From 3f8a2e62ea883766d56b5c82bc5b24fd04c4770e Mon Sep 17 00:00:00 2001 From: Shen Zhang Date: Sun, 14 Sep 2025 17:07:14 +0000 Subject: [PATCH 230/693] Fix rebind_unbacked in torch.fx.experimental.symbolic_shapes (#162788) ## Description Fix a float type handling in `torch.fx.experimental.symbolic_shapes` function. [#162480](https://github.com/pytorch/pytorch/issues/162480) ## Issue When I use AOTInductor to compile the YOLOv10, I encounter the bug `'float' object has no attribute 'node'`. [Torch AOTInductor Ahead-Of-Time Compilation Fail](https://github.com/opendatalab/DocLayout-YOLO/issues/177) The problem is due to missing float type handling. https://github.com/pytorch/pytorch/blob/main/torch/fx/experimental/symbolic_shapes.py#L597 ``` if isinstance(u1, int): log.info( "rebind_unbacked: discard %s %s %s -> %s", n.target, raw_u0, path, u1, ) continue ``` ## Solution Change the code `if isinstance(u1, float)` to `if isinstance(u1, (int,float))` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162788 Approved by: https://github.com/ezyang --- torch/fx/experimental/symbolic_shapes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index b9a779d2e4635..a06568101705f 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -594,7 +594,7 @@ def rebind_unbacked( # exist in the ShapeEnv but are never bound anywhere. You might # like an invariant that unbacked symbols never get lost. But # we do not have this invariant, so do not try to enforce it. - if isinstance(u1, int): + if isinstance(u1, (int, float)): log.info( "rebind_unbacked: discard %s %s %s -> %s", n.target, From aff24385548bc9377df28f0461fdc8d02c9cfe1c Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Sat, 13 Sep 2025 23:31:54 -0400 Subject: [PATCH 231/693] QoL: add pip to requirements-build.txt (#162896) uv venvs by default don't come with pip, but for example setup.py assumes it is available. Signed-off-by: Edward Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/162896 Approved by: https://github.com/Skylion007 --- requirements-build.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-build.txt b/requirements-build.txt index be19d987f73db..2009ddb658ed8 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -8,3 +8,4 @@ pyyaml requests six # dependency chain: NNPACK -> PeachPy -> six typing-extensions>=4.10.0 +pip # not technically needed, but this makes setup.py invocation work From 8e1db46493fb37ef932266cbc892f2da8593295e Mon Sep 17 00:00:00 2001 From: Isalia20 Date: Sun, 14 Sep 2025 17:47:06 +0000 Subject: [PATCH 232/693] [MPS] enable empty like and unsqueeze for SparseMPS (#162910) Enable empty like and unsqueeze for SparseMPS Pull Request resolved: https://github.com/pytorch/pytorch/pull/162910 Approved by: https://github.com/malfet, https://github.com/Skylion007 --- aten/src/ATen/native/native_functions.yaml | 4 ++-- test/test_sparse.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 4a5c4ac51558b..a67dcc9aef850 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -2517,7 +2517,7 @@ dispatch: CompositeExplicitAutograd: empty_like QuantizedCPU, QuantizedCUDA: empty_like_quantized - SparseCPU, SparseCUDA, SparseMeta: empty_like_sparse_coo + SparseCPU, SparseCUDA, SparseMPS, SparseMeta: empty_like_sparse_coo SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: empty_like_sparse_csr NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: empty_like_nested autogen: empty_like.out @@ -6492,7 +6492,7 @@ device_guard: False dispatch: CompositeExplicitAutograd: unsqueeze - SparseCPU, SparseCUDA: unsqueeze_sparse + SparseCPU, SparseCUDA, SparseMPS: unsqueeze_sparse QuantizedCPU, QuantizedCUDA: unsqueeze_quantized NestedTensorCPU, NestedTensorHPU, NestedTensorCUDA: unsqueeze_nested tags: core diff --git a/test/test_sparse.py b/test/test_sparse.py index 2e197d9546721..9127a7a75fb9a 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -1164,9 +1164,9 @@ def test_shapes(shapes, dim, fail_message=None): "Concatenating sparse tensors, but a dense tensor was found at position 1."): torch.cat((sp, dn)) - @expectedFailureMPS @coalescedonoff @dtypes(torch.double, torch.cdouble) + @dtypesIfMPS(torch.float32, torch.complex64) def test_unsqueeze(self, device, dtype, coalesced): def test_shape(sparse_dims, nnz, sizes, unsqueeze_dim, fail_message=None): x, _, _ = self._gen_sparse(sparse_dims, nnz, sizes, dtype, device, coalesced) @@ -2353,14 +2353,14 @@ def _test_empty_like(self, sparse_tensor, dtype, device, coalesced): self.assertTrue(result.layout == torch.strided) with self.assertRaisesRegex( - RuntimeError, r"Could not run 'aten::empty_strided' with arguments from the 'Sparse(CPU|CUDA)' backend" + RuntimeError, r"Could not run 'aten::empty_strided' with arguments from the 'Sparse(CPU|CUDA|MPS)' backend" ): dense_tensor = sparse_tensor.to_dense() result = torch.empty_like(dense_tensor, layout=torch.sparse_coo) @coalescedonoff - @expectedFailureMPS @dtypes(torch.double, torch.cdouble) + @dtypesIfMPS(torch.float32, torch.complex64) def test_empty_like(self, device, dtype, coalesced): # tests https://github.com/pytorch/pytorch/issues/43699 From ba5ca31676f5461c00fd2b99d8a801faba1fbbea Mon Sep 17 00:00:00 2001 From: Isalia20 Date: Sun, 14 Sep 2025 18:57:53 +0000 Subject: [PATCH 233/693] [MPS] sparse mps any (#162885) Add SparseMPS key for any op Pull Request resolved: https://github.com/pytorch/pytorch/pull/162885 Approved by: https://github.com/malfet, https://github.com/Skylion007 --- aten/src/ATen/native/native_functions.yaml | 2 +- test/test_sparse.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index a67dcc9aef850..45c69690c9563 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -10259,7 +10259,7 @@ structured_delegate: any.all_out variants: method, function dispatch: - SparseCPU, SparseCUDA: any_sparse + SparseCPU, SparseCUDA, SparseMPS: any_sparse tags: core - func: any.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) diff --git a/test/test_sparse.py b/test/test_sparse.py index 9127a7a75fb9a..9e7c797d38087 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -3310,7 +3310,6 @@ def test_pickle(self, device, dtype, coalesced): sp_tensor_loaded = pickle.loads(serialized) self.assertEqual(sp_tensor, sp_tensor_loaded) - @expectedFailureMPS def test_any(self, device): t = torch.sparse_coo_tensor(torch.tensor(([0, 0], [2, 0])), torch.tensor([False, False]), device=device) t_any = torch.tensor(False) From e156a071712447a8f6930ce49785bab24549637c Mon Sep 17 00:00:00 2001 From: James Wu Date: Sun, 14 Sep 2025 13:41:55 -0700 Subject: [PATCH 234/693] [Precompile] [RFC] Implement aot_compile_module (#162171) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds a new interface _aot_compile to `OptimizedModule`, so that the following is possible: ``` mod = SimpleLinearModule() inputs = [ ModelInput( args=(torch.randn(3, 3),), kwargs={}, contexts=[torch.no_grad(), eval_mode(model)], ), ModelInput( args=(torch.randn(3, 3),), kwargs={}, contexts=[train_mode(model)] ), ] assert isinstance(model, torch._dynamo.eval_frame.OptimizedModule) model._aot_compile( inputs, ) ``` After this PR, you can AOT precompile NanoGPT and use it to train directly. I'll share my fork of the repo to make this work. ## ModelInput The `ModelInput` API is a work in progress; for now it represents a set of inputs and contexts to instruct the compiler to compile. Most commonly, this is "compile an eval mode with no grad, and a training mode with grad", but also contains things like autocasting contexts, etc. ## Dispatch Dispatching is super simple here, we just iterate through all the precompiled fullgraphs and check guards for each one until there's one htat passes. I'm a bit worried that having this in python code is going to be too expensive. The guard checks are happening in C++ anyway, though, so the only python bottlenecked step here is just the for loop, so perhaps the overhead will not be high. I'll work on measuring this, though. ## TODOs This PR does not support `mod.compile()`, only `torch.compile(mod)`. In order to support `mod.compile()`, we'll need to update torch.nn.Module with an updated implementation — I can add that frontend later. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162171 Approved by: https://github.com/zhxchen17 --- test/dynamo/test_aot_compile.py | 81 +++++++++++++++ torch/_dynamo/aot_compile.py | 172 ++++++++++++++++++++++++------- torch/_dynamo/eval_frame.py | 51 +++++++++ torch/_functorch/aot_autograd.py | 1 + torch/_functorch/config.py | 5 + 5 files changed, 271 insertions(+), 39 deletions(-) diff --git a/test/dynamo/test_aot_compile.py b/test/dynamo/test_aot_compile.py index 6589428bda6c6..9c72c86fef5c1 100644 --- a/test/dynamo/test_aot_compile.py +++ b/test/dynamo/test_aot_compile.py @@ -2,6 +2,7 @@ import os import pickle +from contextlib import contextmanager import torch import torch._dynamo.testing @@ -9,6 +10,7 @@ import torch._inductor.test_case import torch.onnx.operators import torch.utils.cpp_extension +from torch._dynamo.aot_compile import ModelInput from torch._dynamo.exc import PackageError, Unsupported from torch._dynamo.package import DynamoCache from torch._dynamo.precompile_context import PrecompileContext @@ -226,6 +228,85 @@ def fn(x, y): actual = compiled_fn(*inputs) self.assertEqual(expected, actual) + def test_aot_compile_module(self): + mod = SimpleLinearModule() + + model = torch.compile( + mod, + fullgraph=True, + backend="inductor", + options={ + "guard_filter_fn": torch.compiler.skip_guard_on_globals_unsafe, + }, + ) + + @contextmanager + def train_mode(model): + """ + Context manager that sets the model to training mode before entering the context. + """ + model.train() + yield + + @contextmanager + def eval_mode(model): + """ + Context manager that sets the model to evaluation mode before entering the context. + """ + model.eval() + yield + + inputs = [ + ModelInput( + args=(torch.randn(3, 3),), + kwargs={}, + contexts=[torch.no_grad(), eval_mode(model)], + ), + ModelInput( + args=(torch.randn(3, 3),), kwargs={}, contexts=[train_mode(model)] + ), + ] + assert isinstance(model, torch._dynamo.eval_frame.OptimizedModule) + model._aot_compile( + inputs, + ) + with torch.compiler.set_stance("fail_on_recompile"): + model.eval() + inputs = (torch.randn(3, 3),) + expected = mod(*inputs) + actual = model(*inputs) + self.assertEqual(expected, actual) + + # Shouldn't recompile + model.train() + expected.sum().backward() + + model._save_aot_compiled_module(self.path()) + torch._dynamo.reset() + model = torch.compile( + mod, + fullgraph=True, + backend="inductor", + options={ + "guard_filter_fn": torch.compiler.skip_guard_on_globals_unsafe, + }, + ) + assert isinstance(model, torch._dynamo.eval_frame.OptimizedModule) + with open(self.path(), "rb") as f: + data = f.read() + model._load_aot_compiled_module(data) + + with torch.compiler.set_stance("fail_on_recompile"): + model.eval() + inputs = (torch.randn(3, 3),) + expected = mod(*inputs) + actual = model(*inputs) + self.assertEqual(expected, actual) + + # Shouldn't recompile + model.train() + expected.sum().backward() + if __name__ == "__main__": from torch._dynamo.test_case import run_tests diff --git a/torch/_dynamo/aot_compile.py b/torch/_dynamo/aot_compile.py index 980c230b7bccd..c93f9cc397f03 100644 --- a/torch/_dynamo/aot_compile.py +++ b/torch/_dynamo/aot_compile.py @@ -6,6 +6,7 @@ import logging import pickle import types +from contextlib import AbstractContextManager, ExitStack from dataclasses import dataclass from typing import Any, Callable, Optional @@ -264,48 +265,141 @@ def new_guard_filter_fn( assert check_fn.guards_state is not None - backend_input = capture_output.backend_input - assert backend_input is not None - backend_input.graph_module._backend_id = backend_input.backend_id # type: ignore[assignment] - output_graph = dynamo_output.tracer_output.output_graph - assert output_graph is not None - use_cuda = _graph_uses_non_cpu(output_graph.current_tracer.graph) + backend_input = capture_output.backend_input + assert backend_input is not None + backend_input.graph_module._backend_id = backend_input.backend_id # type: ignore[assignment] + output_graph = dynamo_output.tracer_output.output_graph + assert output_graph is not None + use_cuda = _graph_uses_non_cpu(output_graph.current_tracer.graph) + import_sources = output_graph.import_sources + with ( + torch._guards.tracing(TracingContext(backend_input.fake_mode)), + torch._functorch.config.patch( + { + "bundled_autograd_cache": True, + "force_non_lazy_backward_lowering": True, + } + ), + ): + compiled_fn = backend( + backend_input.graph_module, backend_input.example_inputs + ) - import_sources = output_graph.import_sources - with ( - torch._guards.tracing(TracingContext(backend_input.fake_mode)), - torch._functorch.config.patch("bundled_autograd_cache", True), - ): - compiled_fn = backend(backend_input.graph_module, backend_input.example_inputs) + # If Inductor backend is used, grab the compiled_fn from PrecompileContext + # TODO: this should be replaced once we make the backend return the SerializableCallable directly. + if isinstance(backend, torch._TorchCompileInductorWrapper): + compiled_fn = BundledAOTAutogradSerializableCallable.from_backend_id( + backend_input.backend_id + ) - # If Inductor backend is used, grab the compiled_fn from PrecompileContext - # TODO: this should be replaced once we make the backend return the SerializableCallable directly. - if isinstance(backend, torch._TorchCompileInductorWrapper): - compiled_fn = BundledAOTAutogradSerializableCallable.from_backend_id( - backend_input.backend_id - ) + if not isinstance(compiled_fn, SerializableCallable): + if hasattr(backend, "compiler_fn"): + compiler_fn = backend.compiler_fn + else: + compiler_fn = backend + raise RuntimeError( + f"Compiled function type {type(compiled_fn)} (produced " + + f"from backend {compiler_fn}) does not implement SerializableCallable." + ) - if not isinstance(compiled_fn, SerializableCallable): - if hasattr(backend, "compiler_fn"): - compiler_fn = backend.compiler_fn - else: - compiler_fn = backend - raise RuntimeError( - f"Compiled function type {type(compiled_fn)} (produced " - + f"from backend {compiler_fn}) does not implement SerializableCallable." + artifacts = CompileArtifacts( + signature=signature, + bytecode=dynamo_output.bytecode, + guard_manager=check_fn.guard_manager, + guards_state=check_fn.guards_state, + import_sources=import_sources, + backend_id=backend_input.backend_id, + compiled_fn=compiled_fn, + original_code=fn.__code__, + closure=fn.__closure__, + use_cuda=use_cuda, ) + aot_compiled_fn = AOTCompiledFunction(_artifacts=artifacts) - artifacts = CompileArtifacts( - signature=signature, - bytecode=dynamo_output.bytecode, - guard_manager=check_fn.guard_manager, - guards_state=check_fn.guards_state, - import_sources=import_sources, - backend_id=backend_input.backend_id, - compiled_fn=compiled_fn, - original_code=fn.__code__, - closure=fn.__closure__, - use_cuda=use_cuda, - ) - aot_compiled_fn = AOTCompiledFunction(_artifacts=artifacts) return aot_compiled_fn + + +@dataclass +class ModelInput: + """ + WIP type: represents a single model input + Which consists of a tuple of arguments and a set of contexts in which to run the model. + + For each ModelInput, we'll compile one full graph of the model, and then use the guards generated + to dispatch between the compiled graphs. + + + """ + + args: tuple[Any] + kwargs: dict[str, Any] + contexts: list[AbstractContextManager[Any]] + + +@dataclass +class AOTCompiledModel: + # Represents a single forward function of a model along with dispatch + # compiled_results is serializable. We require the model to deserialize again. + model: torch.nn.Module + compiled_results: list[AOTCompiledFunction] + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + for result in self.compiled_results: + if result.guard_check(self.model, *args, **kwargs): + return result(self.model, *args, **kwargs) + # All guards failed, just run one of them and throw the guard check error. + return self.compiled_results[0](self.model, *args, **kwargs) + + def serialize(self) -> bytes: + data: list[bytes] = [] + for result in self.compiled_results: + data.append(AOTCompiledFunction.serialize(result)) + return pickle.dumps(data) + + @classmethod + def deserialize(cls, model: torch.nn.Module, data: bytes) -> "AOTCompiledModel": + from torch._dynamo.utils import get_metrics_context + from torch._guards import compile_context, CompileContext + + results: list[bytes] = pickle.loads(data) + compiled_results = [] + for result in results: + with ( + compile_context(CompileContext(convert_frame.get_compile_id({}))), + get_metrics_context(), + ): + compiled_results.append(AOTCompiledFunction.deserialize(result)) + return cls(model, compiled_results) + + +def aot_compile_module( + model: torch.nn.Module, + inputs: list[ModelInput], + hooks: Hooks, + backend: Callable[[torch.fx.GraphModule, list[torch.Tensor]], SerializableCallable], +) -> AOTCompiledModel: + """ + Compiles a single nn.Module with any number of inputs, and returns a compiled forward function. + """ + + def compile_single_graph(model_input: ModelInput) -> AOTCompiledFunction: + example_inputs = (model_input.args, model_input.kwargs) + orig_forward = model.forward + with ExitStack() as stack: + for ctx in model_input.contexts: + stack.enter_context(ctx) + return aot_compile_fullgraph( + orig_forward, + example_inputs, + hooks=hooks, + backend=backend, + ) + + compiled_results = [] + for model_input in inputs: + log.info("Compiling input %s..", model_input) + compiled_results.append(compile_single_graph(model_input)) + + assert len(compiled_results) > 0 + + return AOTCompiledModel(model, compiled_results) diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py index 177541e8f3341..22e16f168565f 100644 --- a/torch/_dynamo/eval_frame.py +++ b/torch/_dynamo/eval_frame.py @@ -413,6 +413,57 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any: ) return super().__call__(*args, **kwargs) + def _aot_compile(self, inputs: list[torch._dynamo.aot_compile.ModelInput]) -> None: + """ + Experimental: AOT Compile a set of inputs and use that as the forward function + """ + model = self._orig_mod + hooks = self.dynamo_ctx._hooks + assert hooks is not None + if not config.enable_aot_compile: + raise RuntimeError( + "AOT Compile is not enabled, please set torch._dynamo.config.enable_aot_config=True" + ) + if not self.dynamo_ctx.fullgraph: + raise RuntimeError( + "Graph breaks are not supported with aot compile. Please use torch.compile(fullgraph=True)." + ) + + if not callable(self.dynamo_ctx.callback): + raise RuntimeError("aot compile requires a callable dynamo callback.") + + backend = innermost_fn( + self.dynamo_ctx.callback, unaltered_fn_attr="_torchdynamo_orig_backend" + ) + from torch._dynamo.aot_compile import aot_compile_module + + self.forward = aot_compile_module(model, inputs, hooks, backend) + + def _save_aot_compiled_module(self, path: Optional[str] = None) -> bytes: + if not config.enable_aot_compile: + raise RuntimeError( + "AOT Compile is not enabled, please set torch._dynamo.config.enable_aot_config=True" + ) + from torch._dynamo.aot_compile import AOTCompiledModel + + assert isinstance(self.forward, AOTCompiledModel) + result: bytes = self.forward.serialize() + if path is not None: + with open(path, "wb") as f: + f.write(result) + return result + + def _load_aot_compiled_module(self, data: bytes) -> None: + if not config.enable_aot_compile: + raise RuntimeError( + "AOT Compile is not enabled, please set torch._dynamo.config.enable_aot_config=True" + ) + from torch._dynamo.aot_compile import AOTCompiledModel + + compiled_forward = AOTCompiledModel.deserialize(self._orig_mod, data) + assert isinstance(compiled_forward, AOTCompiledModel) + self.forward = compiled_forward + def __reduce__( self, ) -> tuple[type[OptimizedModule], tuple[torch.nn.Module, _TorchDynamoContext]]: diff --git a/torch/_functorch/aot_autograd.py b/torch/_functorch/aot_autograd.py index 1e0cb6a2ef8be..3215472292346 100644 --- a/torch/_functorch/aot_autograd.py +++ b/torch/_functorch/aot_autograd.py @@ -1072,6 +1072,7 @@ def aot_module_simplified( boxed_forward_device_index, ignore_shape_env, flatten=False, + force_non_lazy_backward_lowering=config.force_non_lazy_backward_lowering, ) compiled_fn = None diff --git a/torch/_functorch/config.py b/torch/_functorch/config.py index 5bf2dee3e1d7d..d53480e0e5113 100644 --- a/torch/_functorch/config.py +++ b/torch/_functorch/config.py @@ -296,6 +296,11 @@ def remote_autograd_cache_default() -> Optional[bool]: # TODO: turn on by default graphsafe_rng_functionalization = True +# Whether or not to eagerly compile the backward +# used by AOT compile and other settings +# TODO: once AOT compile calls aot autograd directly instead of +# through compile_fx, we can remove this +force_non_lazy_backward_lowering = False # Error on BypassAOTAutogradCache instead of just a warning # Used for tests From 7fe1f5ea49798a51895f5f58f5c7da62abe3f1af Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Sun, 14 Sep 2025 14:41:14 -0700 Subject: [PATCH 235/693] [BE] Delete [Ventura|Sonoma]Ops header (#162921) Was a temp solution to make PyTorch+MPS buildable on MacOS-12, but it's no longer needed, as in 2.9+ MPS is only supported on MacOS Sonoma+ Pull Request resolved: https://github.com/pytorch/pytorch/pull/162921 Approved by: https://github.com/Skylion007, https://github.com/dcci --- aten/src/ATen/native/mps/MPSGraphSonomaOps.h | 48 ----- aten/src/ATen/native/mps/MPSGraphVenturaOps.h | 196 ------------------ aten/src/ATen/native/mps/OperationUtils.mm | 2 - .../native/mps/operations/BinaryKernel.mm | 2 - .../ATen/native/mps/operations/Convolution.mm | 18 +- aten/src/ATen/native/mps/operations/Copy.mm | 1 - .../native/mps/operations/Distributions.mm | 2 - .../mps/operations/FastFourierTransform.mm | 16 -- .../ATen/native/mps/operations/GridSampler.mm | 1 - .../ATen/native/mps/operations/Indexing.mm | 1 - .../native/mps/operations/LinearAlgebra.mm | 2 - .../ATen/native/mps/operations/ReduceOps.mm | 1 - aten/src/ATen/native/mps/operations/Shape.mm | 1 - aten/src/ATen/native/mps/operations/Sort.mm | 1 - .../ATen/native/mps/operations/UnaryOps.mm | 2 - aten/src/ATen/native/mps/operations/Unique.mm | 1 - .../ATen/native/mps/operations/UpSample.mm | 1 - aten/src/ATen/native/mps/operations/View.mm | 2 - 18 files changed, 2 insertions(+), 296 deletions(-) delete mode 100644 aten/src/ATen/native/mps/MPSGraphSonomaOps.h delete mode 100644 aten/src/ATen/native/mps/MPSGraphVenturaOps.h diff --git a/aten/src/ATen/native/mps/MPSGraphSonomaOps.h b/aten/src/ATen/native/mps/MPSGraphSonomaOps.h deleted file mode 100644 index 6290245083a44..0000000000000 --- a/aten/src/ATen/native/mps/MPSGraphSonomaOps.h +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once - -#include - -#if !defined(__MAC_14_0) && (!defined(MAC_OS_X_VERSION_14_0) || (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_14_0)) - -typedef NS_ENUM(NSUInteger, MPSGraphFFTScalingMode) { - MPSGraphFFTScalingModeNone = 0L, - MPSGraphFFTScalingModeSize = 1L, - MPSGraphFFTScalingModeUnitary = 2L, -}; - -@interface FakeMPSGraphFFTDescriptor : NSObject -@property(readwrite, nonatomic) BOOL inverse; -@property(readwrite, nonatomic) MPSGraphFFTScalingMode scalingMode; -@property(readwrite, nonatomic) BOOL roundToOddHermitean; -+ (nullable instancetype)descriptor; -@end - -@compatibility_alias MPSGraphFFTDescriptor FakeMPSGraphFFTDescriptor; - -@interface MPSGraph (SonomaOps) -- (MPSGraphTensor* _Nonnull)conjugateWithTensor:(MPSGraphTensor* _Nonnull)tensor name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)realPartOfTensor:(MPSGraphTensor* _Nonnull)tensor name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)fastFourierTransformWithTensor:(MPSGraphTensor* _Nonnull)tensor - axes:(NSArray* _Nonnull)axes - descriptor:(MPSGraphFFTDescriptor* _Nonnull)descriptor - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)realToHermiteanFFTWithTensor:(MPSGraphTensor* _Nonnull)tensor - axes:(NSArray* _Nonnull)axes - descriptor:(MPSGraphFFTDescriptor* _Nonnull)descriptor - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)HermiteanToRealFFTWithTensor:(MPSGraphTensor* _Nonnull)tensor - axes:(NSArray* _Nonnull)axes - descriptor:(MPSGraphFFTDescriptor* _Nonnull)descriptor - name:(NSString* _Nullable)name; -@end - -// define BFloat16 enums for MacOS13 -#define MPSDataTypeBFloat16 ((MPSDataType)(MPSDataTypeAlternateEncodingBit | MPSDataTypeFloat16)) - -// define Metal version -#define MTLLanguageVersion3_1 ((MTLLanguageVersion)((3 << 16) + 1)) -#endif diff --git a/aten/src/ATen/native/mps/MPSGraphVenturaOps.h b/aten/src/ATen/native/mps/MPSGraphVenturaOps.h deleted file mode 100644 index 5497c83f7b9a6..0000000000000 --- a/aten/src/ATen/native/mps/MPSGraphVenturaOps.h +++ /dev/null @@ -1,196 +0,0 @@ -#pragma once -#include - -// TODO: Remove me when moved to MacOS 13 -#if !defined(__MAC_13_2) && (!defined(MAC_OS_X_VERSION_13_2) || (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_13_2)) - -@interface FakeMPSGraphConvolution3DOpDescriptor : NSObject - -@property(readwrite, nonatomic) NSUInteger strideInX; -@property(readwrite, nonatomic) NSUInteger strideInY; -@property(readwrite, nonatomic) NSUInteger strideInZ; -@property(readwrite, nonatomic) NSUInteger dilationRateInX; -@property(readwrite, nonatomic) NSUInteger dilationRateInY; -@property(readwrite, nonatomic) NSUInteger dilationRateInZ; - -@property(readwrite, nonatomic) NSUInteger paddingLeft; -@property(readwrite, nonatomic) NSUInteger paddingRight; -@property(readwrite, nonatomic) NSUInteger paddingTop; -@property(readwrite, nonatomic) NSUInteger paddingBottom; -@property(readwrite, nonatomic) NSUInteger paddingFront; -@property(readwrite, nonatomic) NSUInteger paddingBack; - -@property(readwrite, nonatomic) MPSGraphPaddingStyle paddingStyle; -@property(readwrite, nonatomic) MPSGraphTensorNamedDataLayout dataLayout; -@property(readwrite, nonatomic) MPSGraphTensorNamedDataLayout weightsLayout; - -@property(readwrite, nonatomic) NSUInteger groups; - -@end - -@compatibility_alias MPSGraphConvolution3DOpDescriptor FakeMPSGraphConvolution3DOpDescriptor; - -#endif - -@interface MPSGraph (VenturaOps) - -#if !defined(__MAC_13_0) && (!defined(MAC_OS_X_VERSION_13_0) || (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_13_0)) - -typedef NS_ENUM(NSUInteger, MPSGraphResizeNearestRoundingMode) { - MPSGraphResizeNearestRoundingModeRoundPreferCeil = 0L, - MPSGraphResizeNearestRoundingModeRoundPreferFloor = 1L, - MPSGraphResizeNearestRoundingModeCeil = 2L, - MPSGraphResizeNearestRoundingModeFloor = 3L, - MPSGraphResizeNearestRoundingModeRoundToEven = 4L, - MPSGraphResizeNearestRoundingModeRoundToOdd = 5L, -}; - -// Define complex enums for MacOS 12 -#define MPSDataTypeComplexBit 0x01000000 -#define MPSDataTypeComplexFloat32 ((MPSDataType)(MPSDataTypeFloatBit | MPSDataTypeComplexBit | 64)) -#define MPSDataTypeComplexFloat16 ((MPSDataType)(MPSDataTypeFloatBit | MPSDataTypeComplexBit | 32)) -#endif - -- (MPSGraphTensor* _Nonnull)convolution3DWithSourceTensor:(MPSGraphTensor* _Nonnull)source - weightsTensor:(MPSGraphTensor* _Nonnull)weights - descriptor:(MPSGraphConvolution3DOpDescriptor* _Nonnull)descriptor - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull) - convolution3DDataGradientWithIncomingGradientTensor:(MPSGraphTensor* _Nonnull)incomingGradient - weightsTensor:(MPSGraphTensor* _Nonnull)weights - outputShape:(MPSShape* _Nonnull)outputShape - forwardConvolutionDescriptor: - (MPSGraphConvolution3DOpDescriptor* _Nonnull)forwardConvolutionDescriptor - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull) - convolution3DWeightsGradientWithIncomingGradientTensor:(MPSGraphTensor* _Nonnull)incomingGradient - sourceTensor:(MPSGraphTensor* _Nonnull)source - outputShape:(MPSShape* _Nonnull)outputShape - forwardConvolutionDescriptor: - (MPSGraphConvolution3DOpDescriptor* _Nonnull)forwardConvolutionDescriptor - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)cumulativeSumWithTensor:(MPSGraphTensor* _Nonnull)tensor - axis:(NSInteger)axis - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)sortWithTensor:(MPSGraphTensor* _Nonnull)tensor - axis:(NSInteger)axis - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)sortWithTensor:(MPSGraphTensor* _Nonnull)tensor - axis:(NSInteger)axis - descending:(BOOL)descending - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)sortWithTensor:(MPSGraphTensor* _Nonnull)tensor - axisTensor:(MPSGraphTensor* _Nonnull)axisTensor - descending:(BOOL)descending - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)sortWithTensor:(MPSGraphTensor* _Nonnull)tensor - axisTensor:(MPSGraphTensor* _Nonnull)axisTensor - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)argSortWithTensor:(MPSGraphTensor* _Nonnull)tensor - axis:(NSInteger)axis - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)argSortWithTensor:(MPSGraphTensor* _Nonnull)tensor - axis:(NSInteger)axis - descending:(BOOL)descending - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)argSortWithTensor:(MPSGraphTensor* _Nonnull)tensor - axisTensor:(MPSGraphTensor* _Nonnull)axisTensor - descending:(BOOL)descending - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)argSortWithTensor:(MPSGraphTensor* _Nonnull)tensor - axisTensor:(MPSGraphTensor* _Nonnull)axisTensor - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)inverseOfTensor:(MPSGraphTensor* _Nonnull)inputTensor name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)resizeNearestWithTensor:(MPSGraphTensor* _Nonnull)imagesTensor - sizeTensor:(MPSGraphTensor* _Nonnull)size - nearestRoundingMode:(MPSGraphResizeNearestRoundingMode)nearestRoundingMode - centerResult:(BOOL)centerResult - alignCorners:(BOOL)alignCorners - layout:(MPSGraphTensorNamedDataLayout)layout - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)resizeNearestWithTensor:(MPSGraphTensor* _Nonnull)imagesTensor - sizeTensor:(MPSGraphTensor* _Nonnull)size - scaleOffsetTensor:(MPSGraphTensor* _Nonnull)scaleOffset - nearestRoundingMode:(MPSGraphResizeNearestRoundingMode)nearestRoundingMode - layout:(MPSGraphTensorNamedDataLayout)layout - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)resizeBilinearWithTensor:(MPSGraphTensor* _Nonnull)imagesTensor - sizeTensor:(MPSGraphTensor* _Nonnull)size - centerResult:(BOOL)centerResult - alignCorners:(BOOL)alignCorners - layout:(MPSGraphTensorNamedDataLayout)layout - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)resizeBilinearWithTensor:(MPSGraphTensor* _Nonnull)imagesTensor - sizeTensor:(MPSGraphTensor* _Nonnull)size - scaleOffsetTensor:(MPSGraphTensor* _Nonnull)scaleOffset - layout:(MPSGraphTensorNamedDataLayout)layout - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)resizeNearestWithGradientTensor:(MPSGraphTensor* _Nonnull)gradient - input:(MPSGraphTensor* _Nonnull)input - nearestRoundingMode:(MPSGraphResizeNearestRoundingMode)nearestRoundingMode - centerResult:(BOOL)centerResult - alignCorners:(BOOL)alignCorners - layout:(MPSGraphTensorNamedDataLayout)layout - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)resizeNearestWithGradientTensor:(MPSGraphTensor* _Nonnull)gradient - input:(MPSGraphTensor* _Nonnull)input - scaleOffsetTensor:(MPSGraphTensor* _Nonnull)scaleOffset - nearestRoundingMode:(MPSGraphResizeNearestRoundingMode)nearestRoundingMode - layout:(MPSGraphTensorNamedDataLayout)layout - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)resizeBilinearWithGradientTensor:(MPSGraphTensor* _Nonnull)gradient - input:(MPSGraphTensor* _Nonnull)input - centerResult:(BOOL)centerResult - alignCorners:(BOOL)alignCorners - layout:(MPSGraphTensorNamedDataLayout)layout - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)resizeBilinearWithGradientTensor:(MPSGraphTensor* _Nonnull)gradient - input:(MPSGraphTensor* _Nonnull)input - scaleOffsetTensor:(MPSGraphTensor* _Nonnull)scaleOffset - layout:(MPSGraphTensorNamedDataLayout)layout - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)sampleGridWithSourceTensor:(MPSGraphTensor* _Nonnull)source - coordinateTensor:(MPSGraphTensor* _Nonnull)coordinates - layout:(MPSGraphTensorNamedDataLayout)layout - normalizeCoordinates:(BOOL)normalizeCoordinates - relativeCoordinates:(BOOL)relativeCoordinates - alignCorners:(BOOL)alignCorners - paddingMode:(MPSGraphPaddingMode)paddingMode - samplingMode:(MPSGraphResizeMode)samplingMode - constantValue:(double)constantValue - name:(NSString* _Nullable)name; - -- (MPSGraphTensor* _Nonnull)sampleGridWithSourceTensor:(MPSGraphTensor* _Nonnull)source - coordinateTensor:(MPSGraphTensor* _Nonnull)coordinates - layout:(MPSGraphTensorNamedDataLayout)layout - normalizeCoordinates:(BOOL)normalizeCoordinates - relativeCoordinates:(BOOL)relativeCoordinates - alignCorners:(BOOL)alignCorners - paddingMode:(MPSGraphPaddingMode)paddingMode - nearestRoundingMode:(MPSGraphResizeNearestRoundingMode)nearestRoundingMode - constantValue:(double)constantValue - name:(NSString* _Nullable)name; -- (MPSGraphTensor* _Nonnull)truncateWithTensor:(MPSGraphTensor* _Nonnull)tensor name:(NSString* _Nullable)name; - -@end diff --git a/aten/src/ATen/native/mps/OperationUtils.mm b/aten/src/ATen/native/mps/OperationUtils.mm index bf3e94207e25b..cae9f5de31092 100644 --- a/aten/src/ATen/native/mps/OperationUtils.mm +++ b/aten/src/ATen/native/mps/OperationUtils.mm @@ -9,8 +9,6 @@ #include #include #include -#include -#include #include #include #include diff --git a/aten/src/ATen/native/mps/operations/BinaryKernel.mm b/aten/src/ATen/native/mps/operations/BinaryKernel.mm index 0b303f48028f4..32b0fff8081ee 100644 --- a/aten/src/ATen/native/mps/operations/BinaryKernel.mm +++ b/aten/src/ATen/native/mps/operations/BinaryKernel.mm @@ -8,8 +8,6 @@ #include #include #include -// For MTLLanguageVersion_3_1 -#include #include #ifndef AT_PER_OPERATOR_HEADERS diff --git a/aten/src/ATen/native/mps/operations/Convolution.mm b/aten/src/ATen/native/mps/operations/Convolution.mm index d572d52d103a1..943e20e993950 100644 --- a/aten/src/ATen/native/mps/operations/Convolution.mm +++ b/aten/src/ATen/native/mps/operations/Convolution.mm @@ -1,24 +1,12 @@ // Copyright © 2022 Apple Inc. #define TORCH_ASSERT_ONLY_METHOD_OPERATORS #include -#include #include #include #include #include #include -#if !defined(__MAC_13_2) && (!defined(MAC_OS_X_VERSION_13_2) || (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_13_2)) - -@implementation FakeMPSGraphConvolution3DOpDescriptor -- (nonnull id)copyWithZone:(nullable NSZone*)zone { - return self; -} - -@end - -#endif - namespace at::native { // Create 3D convolution descriptor @@ -50,11 +38,9 @@ static void fill_conv3d_desc(MPSGraphConvolution3DOpDescriptor* descriptor_, descriptor_.paddingFront = paddingDepth; descriptor_.paddingBack = paddingDepth; - // PyTorch always uses NCDHW memory layout for 3D tensors - descriptor_.dataLayout = (MPSGraphTensorNamedDataLayout)7L; // MPSGraphTensorNamedDataLayoutNCDHW; + descriptor_.dataLayout = MPSGraphTensorNamedDataLayoutNCDHW; - // PyTorch always uses OIDHW memory layout for 3D weights - descriptor_.weightsLayout = (MPSGraphTensorNamedDataLayout)9L; // MPSGraphTensorNamedDataLayoutOIDHW; + descriptor_.weightsLayout = MPSGraphTensorNamedDataLayoutOIDHW; descriptor_.groups = groups; // not yet tested in Xcode/C++ } diff --git a/aten/src/ATen/native/mps/operations/Copy.mm b/aten/src/ATen/native/mps/operations/Copy.mm index 0c121cee8fb62..a3cba05c975cf 100644 --- a/aten/src/ATen/native/mps/operations/Copy.mm +++ b/aten/src/ATen/native/mps/operations/Copy.mm @@ -2,7 +2,6 @@ #define TORCH_ASSERT_ONLY_METHOD_OPERATORS #include #include -#include #include #include #include diff --git a/aten/src/ATen/native/mps/operations/Distributions.mm b/aten/src/ATen/native/mps/operations/Distributions.mm index 4d3f99ea9e02d..85c22c59caf96 100644 --- a/aten/src/ATen/native/mps/operations/Distributions.mm +++ b/aten/src/ATen/native/mps/operations/Distributions.mm @@ -5,8 +5,6 @@ #include #include #include -#include -#include #include #ifndef AT_PER_OPERATOR_HEADERS diff --git a/aten/src/ATen/native/mps/operations/FastFourierTransform.mm b/aten/src/ATen/native/mps/operations/FastFourierTransform.mm index 7e9867c9b948d..9a208e814cfac 100644 --- a/aten/src/ATen/native/mps/operations/FastFourierTransform.mm +++ b/aten/src/ATen/native/mps/operations/FastFourierTransform.mm @@ -1,6 +1,4 @@ #include -#include -#include #include #ifndef AT_PER_OPERATOR_HEADERS @@ -12,20 +10,6 @@ #include #endif -#if !defined(__MAC_14_0) && (!defined(MAC_OS_X_VERSION_14_0) || (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_14_0)) -@implementation FakeMPSGraphFFTDescriptor -+ (nullable instancetype)descriptor { - // Redispatch the constructor to the actual implementation - id desc = NSClassFromString(@"MPSGraphFFTDescriptor"); - return (FakeMPSGraphFFTDescriptor*)[desc descriptor]; -} - -- (nonnull id)copyWithZone:(nullable NSZone*)zone { - return self; -} -@end -#endif - namespace at::native { namespace { MPSGraphFFTScalingMode normalization_to_ScalingMode(int64_t normalization) { diff --git a/aten/src/ATen/native/mps/operations/GridSampler.mm b/aten/src/ATen/native/mps/operations/GridSampler.mm index ef85633889487..92f2b9c6fbf74 100644 --- a/aten/src/ATen/native/mps/operations/GridSampler.mm +++ b/aten/src/ATen/native/mps/operations/GridSampler.mm @@ -2,7 +2,6 @@ #include #include #include -#include #include #include diff --git a/aten/src/ATen/native/mps/operations/Indexing.mm b/aten/src/ATen/native/mps/operations/Indexing.mm index fa19d2f4d127f..b759eb1373cc6 100644 --- a/aten/src/ATen/native/mps/operations/Indexing.mm +++ b/aten/src/ATen/native/mps/operations/Indexing.mm @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/aten/src/ATen/native/mps/operations/LinearAlgebra.mm b/aten/src/ATen/native/mps/operations/LinearAlgebra.mm index 7a3dde679c05f..66fe8d7b58aa3 100644 --- a/aten/src/ATen/native/mps/operations/LinearAlgebra.mm +++ b/aten/src/ATen/native/mps/operations/LinearAlgebra.mm @@ -6,9 +6,7 @@ #include #include #include -// For MTLLanguageVersion_3_1 #include -#include #include #ifndef AT_PER_OPERATOR_HEADERS diff --git a/aten/src/ATen/native/mps/operations/ReduceOps.mm b/aten/src/ATen/native/mps/operations/ReduceOps.mm index 4ace191b73b88..209c757e38f6d 100644 --- a/aten/src/ATen/native/mps/operations/ReduceOps.mm +++ b/aten/src/ATen/native/mps/operations/ReduceOps.mm @@ -4,7 +4,6 @@ #include #include #include -#include #include #include diff --git a/aten/src/ATen/native/mps/operations/Shape.mm b/aten/src/ATen/native/mps/operations/Shape.mm index 19f26023b3179..0e243c524377e 100644 --- a/aten/src/ATen/native/mps/operations/Shape.mm +++ b/aten/src/ATen/native/mps/operations/Shape.mm @@ -4,7 +4,6 @@ #include #include #include -#include #include #ifndef AT_PER_OPERATOR_HEADERS diff --git a/aten/src/ATen/native/mps/operations/Sort.mm b/aten/src/ATen/native/mps/operations/Sort.mm index 6ff47044df133..b6a07f14704cc 100644 --- a/aten/src/ATen/native/mps/operations/Sort.mm +++ b/aten/src/ATen/native/mps/operations/Sort.mm @@ -5,7 +5,6 @@ #include #include #include -#include #include #ifndef AT_PER_OPERATOR_HEADERS diff --git a/aten/src/ATen/native/mps/operations/UnaryOps.mm b/aten/src/ATen/native/mps/operations/UnaryOps.mm index d7ce40e5cbb4f..1a1e249b3f361 100644 --- a/aten/src/ATen/native/mps/operations/UnaryOps.mm +++ b/aten/src/ATen/native/mps/operations/UnaryOps.mm @@ -2,8 +2,6 @@ #define TORCH_ASSERT_ONLY_METHOD_OPERATORS #include #include -#include -#include #include #ifndef AT_PER_OPERATOR_HEADERS diff --git a/aten/src/ATen/native/mps/operations/Unique.mm b/aten/src/ATen/native/mps/operations/Unique.mm index 05fb733f5c2dd..7c7683caf4286 100644 --- a/aten/src/ATen/native/mps/operations/Unique.mm +++ b/aten/src/ATen/native/mps/operations/Unique.mm @@ -1,7 +1,6 @@ // Copyright © 2022 Apple Inc. #define TORCH_ASSERT_ONLY_METHOD_OPERATORS #include -#include #include #ifndef AT_PER_OPERATOR_HEADERS diff --git a/aten/src/ATen/native/mps/operations/UpSample.mm b/aten/src/ATen/native/mps/operations/UpSample.mm index addc70cf4334d..435af3ce7cf6a 100644 --- a/aten/src/ATen/native/mps/operations/UpSample.mm +++ b/aten/src/ATen/native/mps/operations/UpSample.mm @@ -1,7 +1,6 @@ // Copyright © 2023 Apple Inc. #define TORCH_ASSERT_ONLY_METHOD_OPERATORS #include -#include #include #include diff --git a/aten/src/ATen/native/mps/operations/View.mm b/aten/src/ATen/native/mps/operations/View.mm index 5efd4a3cfbdf3..1d373ee1cc2d8 100644 --- a/aten/src/ATen/native/mps/operations/View.mm +++ b/aten/src/ATen/native/mps/operations/View.mm @@ -4,8 +4,6 @@ #include #include #include -// For MTLLanguageVersion_3_1 -#include #include #include From 76e5df3866f11712a3e6306bb0e5055c223c78f3 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Sun, 14 Sep 2025 14:41:18 -0700 Subject: [PATCH 236/693] [BE] Use `fmt::format` to define Conv key (#162925) Also use `getArrayRefString` instead of having separate cases for 2D and 3D Conv Pull Request resolved: https://github.com/pytorch/pytorch/pull/162925 Approved by: https://github.com/Skylion007 ghstack dependencies: #162921 --- .../ATen/native/mps/operations/Convolution.mm | 91 ++++++------------- 1 file changed, 26 insertions(+), 65 deletions(-) diff --git a/aten/src/ATen/native/mps/operations/Convolution.mm b/aten/src/ATen/native/mps/operations/Convolution.mm index 943e20e993950..110927379ee53 100644 --- a/aten/src/ATen/native/mps/operations/Convolution.mm +++ b/aten/src/ATen/native/mps/operations/Convolution.mm @@ -6,6 +6,7 @@ #include #include #include +#include namespace at::native { @@ -172,18 +173,6 @@ static Tensor _mps_convolution_impl(const Tensor& input_t_, if (bias_defined) bias_shape = bias_opt.value().sizes(); - std::string mem_format_key; - switch (memory_format) { - case at::MemoryFormat::Contiguous: - mem_format_key = "Contiguous"; - break; - case at::MemoryFormat::ChannelsLast: - mem_format_key = "ChannelsLast"; - break; - default: - assert(0 && "Check should have been done earlier\n"); - } - std::string bias_shape_key; if (bias_defined) { bias_shape_key = std::to_string(bias_shape[0]); @@ -191,20 +180,16 @@ static Tensor _mps_convolution_impl(const Tensor& input_t_, bias_shape_key = "nobias"; } - std::string key; - if (is3DConv) { - key = "mps_3d_convolution:" + std::to_string(stride[0]) + ":" + std::to_string(stride[1]) + ":" + - std::to_string(stride[2]) + ":" + std::to_string(dilation[0]) + ":" + std::to_string(dilation[1]) + ":" + - std::to_string(dilation[2]) + ":" + std::to_string(padding[0]) + ":" + std::to_string(padding[1]) + ":" + - std::to_string(padding[2]) + ":" + std::to_string(groups) + ":" + mem_format_key + - mps::getTensorsStringKey({input_t, weight_t}) + ":" + std::to_string(bias_defined) + ":" + bias_shape_key; - - } else { - key = "mps_convolution:" + std::to_string(stride[0]) + ":" + std::to_string(stride[1]) + ":" + - std::to_string(dilation[0]) + ":" + std::to_string(dilation[1]) + ":" + std::to_string(padding[0]) + ":" + - std::to_string(padding[1]) + ":" + std::to_string(groups) + ":" + mem_format_key + - mps::getTensorsStringKey({input_t, weight_t}) + ":" + std::to_string(bias_defined) + ":" + bias_shape_key; - } + std::string key = fmt::format("mps_{}convolution:{}:{}:{}:{}:{}:{}:{}:{}", + is3DConv ? "3d_" : "", + getArrayRefString(stride), + getArrayRefString(dilation), + getArrayRefString(padding), + groups, + is_channels_last, + mps::getTensorsStringKey({input_t, weight_t}), + bias_defined, + bias_shape_key); MPSShape* inputShape = mps::getMPSShape(input_t, memory_format); MPSShape* outputShape = mps::getMPSShape(output_t, memory_format); @@ -386,33 +371,15 @@ static Tensor mps_convolution_backward_input(IntArrayRef input_size, @autoreleasepool { MPSStream* stream = getCurrentMPSStream(); - std::string mem_format_key; - switch (memory_format) { - case at::MemoryFormat::Contiguous: - mem_format_key = "Contiguous"; - break; - case at::MemoryFormat::ChannelsLast: - mem_format_key = "ChannelsLast"; - break; - default: - assert(0 && "Check should have been done earlier\n"); - } - MPSShape* mps_input_shape = getMPSShape(input_size); - std::string key; - if (is3DConv) { - key = "mps_3d_convolution_backward_input:" + std::to_string(stride[0]) + ":" + std::to_string(stride[1]) + ":" + - ":" + std::to_string(stride[2]) + std::to_string(dilation[0]) + ":" + std::to_string(dilation[1]) + ":" + - std::to_string(dilation[2]) + ":" + std::to_string(padding[0]) + ":" + std::to_string(padding[1]) + ":" + - std::to_string(padding[2]) + ":" + std::to_string(groups) + ":" + mem_format_key + - getTensorsStringKey({grad_output_t, weight_t}); - - } else { - key = "mps_convolution_backward_input:" + std::to_string(stride[0]) + ":" + std::to_string(stride[1]) + ":" + - std::to_string(dilation[0]) + ":" + std::to_string(dilation[1]) + ":" + std::to_string(padding[0]) + ":" + - std::to_string(padding[1]) + ":" + std::to_string(groups) + ":" + mem_format_key + - getTensorsStringKey({grad_output_t, weight_t}); - } + std::string key = fmt::format("mps_{}_convolution_backward_input:{}:{}:{}:{}:{}:{}", + is3DConv ? "3d_" : "", + getArrayRefString(stride), + getArrayRefString(dilation), + getArrayRefString(padding), + groups, + is_channels_last, + getTensorsStringKey({grad_output_t, weight_t})); auto cachedGraph = LookUpOrCreateCachedGraph(key, [&](auto mpsGraph, auto newCachedGraph) { auto gradOutputTensor = mpsGraphRankedPlaceHolder(mpsGraph, grad_output_t); auto weightTensor = mpsGraphRankedPlaceHolder(mpsGraph, weight_t); @@ -537,19 +504,13 @@ static Tensor mps_convolution_backward_weights(IntArrayRef weight_size, MPSStream* stream = getCurrentMPSStream(); MPSShape* mps_weight_shape = getMPSShape(weight_size); - std::string key; - if (is3DConv) { - key = "mps_3d_convolution_backward_weights:" + std::to_string(stride[0]) + ":" + std::to_string(stride[1]) + ":" + - std::to_string(stride[2]) + ":" + std::to_string(dilation[0]) + ":" + std::to_string(dilation[1]) + ":" + - std::to_string(dilation[2]) + ":" + std::to_string(padding[0]) + ":" + std::to_string(padding[1]) + ":" + - std::to_string(padding[2]) + ":" + std::to_string(groups) + ":" + - getTensorsStringKey({grad_output_t, input_t, grad_weight_t}); - } else { - key = "mps_convolution_backward_weights:" + std::to_string(stride[0]) + ":" + std::to_string(stride[1]) + ":" + - std::to_string(dilation[0]) + ":" + std::to_string(dilation[1]) + ":" + std::to_string(padding[0]) + ":" + - std::to_string(padding[1]) + ":" + std::to_string(groups) + ":" + - getTensorsStringKey({grad_output_t, input_t, grad_weight_t}); - } + std::string key = fmt::format("mps_{}convolution_backward_weights:{}:{}:{}:{}:{}", + is3DConv ? "3d_" : "", + getArrayRefString(stride), + getArrayRefString(dilation), + getArrayRefString(padding), + groups, + getTensorsStringKey({grad_output_t, input_t, grad_weight_t})); auto cachedGraph = LookUpOrCreateCachedGraph(key, [&](auto mpsGraph, auto newCachedGraph) { MPSShape* inputShape = getMPSShape(input_t); bool isDepthwiseConv = From 755cf906727a09385c8570977e8006cea3e584fb Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Sun, 14 Sep 2025 21:49:54 -0400 Subject: [PATCH 237/693] Redirect all use of filesystem to c10/utils/FileSystem.h (#162914) Signed-off-by: Edward Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/162914 Approved by: https://github.com/Skylion007, https://github.com/dcci, https://github.com/cyyever --- c10/util/FileSystem.h | 22 +++++++++++++++++++ torch/csrc/distributed/c10d/FileStore.cpp | 4 ++-- .../csrc/distributed/c10d/FlightRecorder.cpp | 5 +++-- .../distributed/c10d/FlightRecorderCuda.cpp | 1 - torch/csrc/distributed/c10d/UCCTracing.cpp | 8 +++---- .../c10d/control_plane/WorkerServer.cpp | 4 ++-- 6 files changed, 33 insertions(+), 11 deletions(-) create mode 100644 c10/util/FileSystem.h diff --git a/c10/util/FileSystem.h b/c10/util/FileSystem.h new file mode 100644 index 0000000000000..bc6dddec66a72 --- /dev/null +++ b/c10/util/FileSystem.h @@ -0,0 +1,22 @@ +// Shim header for filesystem for compilers that are too old to have it not +// in the experimental namespace + +#if __has_include() +#include +#elif __has_include() +#include +#else +#error "Neither nor is available." +#endif + +namespace c10 { + +#if __has_include() +// NOLINTNEXTLINE(misc-unused-alias-decls) +namespace filesystem = std::filesystem; +#elif __has_include() +// NOLINTNEXTLINE(misc-unused-alias-decls) +namespace filesystem = std::experimental::filesystem; +#endif + +} // namespace c10 diff --git a/torch/csrc/distributed/c10d/FileStore.cpp b/torch/csrc/distributed/c10d/FileStore.cpp index 7b0fc862e680d..9fbd86cbad709 100644 --- a/torch/csrc/distributed/c10d/FileStore.cpp +++ b/torch/csrc/distributed/c10d/FileStore.cpp @@ -7,10 +7,10 @@ #include #ifdef _WIN32 +#include #include #include #include -#include #else #include #include @@ -161,7 +161,7 @@ class File { #ifdef _WIN32 // if the parent folder doesn't exist it will never be able to create the // file so we can skip the retry - if (!std::filesystem::exists(std::filesystem::path(path).parent_path())) { + if (!c10::filesystem::exists(c10::filesystem::path(path).parent_path())) { break; } #endif diff --git a/torch/csrc/distributed/c10d/FlightRecorder.cpp b/torch/csrc/distributed/c10d/FlightRecorder.cpp index 2384448a06e75..8f8b993ebe6f4 100644 --- a/torch/csrc/distributed/c10d/FlightRecorder.cpp +++ b/torch/csrc/distributed/c10d/FlightRecorder.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -37,9 +38,9 @@ DebugInfoWriter& DebugInfoWriter::getWriter(int rank) { // Attempt to write to running user's HOME directory cache folder - if it // exists. auto homeDir = getCvarString({"HOME"}, "/tmp"); - auto cacheDirPath = std::filesystem::path(homeDir + "/.cache/torch"); + auto cacheDirPath = c10::filesystem::path(homeDir + "/.cache/torch"); // Create the .cache directory if it doesn't exist - std::filesystem::create_directories(cacheDirPath); + c10::filesystem::create_directories(cacheDirPath); auto defaultLocation = cacheDirPath / "comm_lib_trace_rank_"; // For internal bc compatibility, we keep the old the ENV check. diff --git a/torch/csrc/distributed/c10d/FlightRecorderCuda.cpp b/torch/csrc/distributed/c10d/FlightRecorderCuda.cpp index 25ac1279d62e9..1673e7a892e57 100644 --- a/torch/csrc/distributed/c10d/FlightRecorderCuda.cpp +++ b/torch/csrc/distributed/c10d/FlightRecorderCuda.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include #include diff --git a/torch/csrc/distributed/c10d/UCCTracing.cpp b/torch/csrc/distributed/c10d/UCCTracing.cpp index be4f2d3dfffec..66d62d662c259 100644 --- a/torch/csrc/distributed/c10d/UCCTracing.cpp +++ b/torch/csrc/distributed/c10d/UCCTracing.cpp @@ -1,5 +1,6 @@ #ifdef USE_C10D_UCC +#include #include #include #include @@ -10,7 +11,6 @@ #include #include #include -#include #include namespace c10d { @@ -34,15 +34,15 @@ void ProcessGroupUCCLogger::flushComms(int rank, int world_size) { "_", (1 + ltm->tm_mon), "_", ltm->tm_mday, "_", (1900 + ltm->tm_year)); } - std::filesystem::path fullpath = std::filesystem::path("/tmp") / dirname; + c10::filesystem::path fullpath = c10::filesystem::path("/tmp") / dirname; auto user_path = c10::utils::get_env("TORCH_UCC_COMMS_TRACE_OUTPUT_DIR"); if (user_path.has_value()) { fullpath = std::move(user_path.value()); } - std::filesystem::path trace_filename = + c10::filesystem::path trace_filename = fullpath / fmt::format("rank{}.json", rank); std::error_code ec{}; - if (!std::filesystem::create_directories(fullpath, ec)) { + if (!c10::filesystem::create_directories(fullpath, ec)) { LOG(INFO) << getLogPrefix() << "[INFO] failed to mkdir " << fullpath << " with error " << ec.message(); return; diff --git a/torch/csrc/distributed/c10d/control_plane/WorkerServer.cpp b/torch/csrc/distributed/c10d/control_plane/WorkerServer.cpp index 41dbd7391452f..a9a7722fe41f8 100644 --- a/torch/csrc/distributed/c10d/control_plane/WorkerServer.cpp +++ b/torch/csrc/distributed/c10d/control_plane/WorkerServer.cpp @@ -1,8 +1,8 @@ -#include #include #include #include +#include #include #include #include @@ -145,7 +145,7 @@ WorkerServer::WorkerServer(const std::string& hostOrFile, int port) { // using unix sockets server_.set_address_family(AF_UNIX); - if (std::filesystem::exists(hostOrFile)) { + if (c10::filesystem::exists(hostOrFile)) { throw std::runtime_error(fmt::format("{} already exists", hostOrFile)); } From b3ad8f4a9c56fd7967a18dfa2234a63d641fc9fe Mon Sep 17 00:00:00 2001 From: can-gaa-hou Date: Mon, 15 Sep 2025 05:44:15 +0000 Subject: [PATCH 238/693] [BUG] Fix nonzero_static crash on CUDA when the input is a empty tensor (#162578) Fixes #162473 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162578 Approved by: https://github.com/ngimel --- aten/src/ATen/native/cuda/Nonzero.cu | 11 +++++++++++ test/test_unary_ufuncs.py | 9 +++++++++ 2 files changed, 20 insertions(+) diff --git a/aten/src/ATen/native/cuda/Nonzero.cu b/aten/src/ATen/native/cuda/Nonzero.cu index 2d0e32d4e8c05..aa1291dc7842d 100644 --- a/aten/src/ATen/native/cuda/Nonzero.cu +++ b/aten/src/ATen/native/cuda/Nonzero.cu @@ -317,6 +317,17 @@ void nonzero_static_cuda_out_impl( out_temp = Tensor(at::detail::empty_cuda({self.dim(), size}, out.options())).t(); } + // If input has zero elements, avoid kernel grid calculations (which can + // produce zero divisors) and just fill the output with fill_value. + if (self.numel() == 0) { + if (need_to_copy) { + out_temp.fill_(fill_value); + out.copy_(out_temp); + } else { + out.fill_(fill_value); + } + return; + } int64_t* out_data_ptr = need_to_copy ? out_temp.mutable_data_ptr() : out.mutable_data_ptr(); diff --git a/test/test_unary_ufuncs.py b/test/test_unary_ufuncs.py index 9939e8e76ce94..15b967e570740 100644 --- a/test/test_unary_ufuncs.py +++ b/test/test_unary_ufuncs.py @@ -1654,6 +1654,15 @@ def test_nonzero_static(self, device): ), ) + # empty input + # https://github.com/pytorch/pytorch/issues/162473 + input_tensor = torch.tensor([], device=device) + static_size = 1 + self.assertEqual( + torch.nonzero_static(input_tensor, size=static_size), + torch.tensor([[-1]], device=device), + ) + # 1D input input_tensor = torch.tensor([0, 8], device=device) static_size = 1 From 06bb32d55eec4571668cac4f89d2492018054c79 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Sun, 14 Sep 2025 22:13:13 -0400 Subject: [PATCH 239/693] Skip empty tests, they don't make sense for numerics (#162932) Signed-off-by: Edward Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/162932 Approved by: https://github.com/dcci --- test/distributed/tensor/test_dtensor_ops.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/distributed/tensor/test_dtensor_ops.py b/test/distributed/tensor/test_dtensor_ops.py index 8c650f6b0ce02..6f981aee82ef0 100644 --- a/test/distributed/tensor/test_dtensor_ops.py +++ b/test/distributed/tensor/test_dtensor_ops.py @@ -126,10 +126,6 @@ def wrapped(fn): xfail("cummin"), xfail("diagonal_scatter"), xfail("dist"), - xfail("empty"), - xfail("empty_strided"), - xfail("empty_like"), - xfail("empty_permuted"), xfail("expand_copy"), xfail("exponential"), xfail("equal"), @@ -482,6 +478,11 @@ def wrapped(fn): skip("_segment_reduce", "offsets"), # TODO: fix the following ops skip("squeeze"), + # These must be skipped as their contents are nondeterministic + skip("empty"), + skip("empty_strided"), + skip("empty_like"), + skip("empty_permuted"), } From 814ba34fa61f4d95affa6ef9f7207cd3b45cbb75 Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Mon, 15 Sep 2025 06:24:55 +0000 Subject: [PATCH 240/693] [2/N] Port 5 _composable distributed test to Intel GPU (#159241) For https://github.com/pytorch/pytorch/issues/114850, we will port distributed tests to Intel GPU. This is the second PR for _composable cases, the first is https://github.com/pytorch/pytorch/pull/159118. We could enable Intel GPU with following methods and try the best to keep the original code styles: - Use "torch.accelerator.current_accelerator()" to determine the accelerator backend - Enabled XPU for some test path - Skip some test cases which Intel GPU does not support - Added "cpu:gloo,xpu:xccl" for distributed backend Pull Request resolved: https://github.com/pytorch/pytorch/pull/159241 Approved by: https://github.com/guangyey, https://github.com/d4l3k --- .../_composable/test_checkpoint.py | 21 ++-- .../test_2d_composability.py | 97 +++++++++++-------- .../test_pp_composability.py | 35 ++++--- .../distributed/_composable/test_replicate.py | 39 +++++--- .../test_replicate_with_compiler.py | 2 + .../distributed/_tensor/common_dtensor.py | 1 + 6 files changed, 120 insertions(+), 75 deletions(-) diff --git a/test/distributed/_composable/test_checkpoint.py b/test/distributed/_composable/test_checkpoint.py index f30f8c34f6137..7834328f1e359 100644 --- a/test/distributed/_composable/test_checkpoint.py +++ b/test/distributed/_composable/test_checkpoint.py @@ -10,10 +10,13 @@ import torch.nn as nn from torch.distributed._composable import checkpoint from torch.testing._internal.common_cuda import TEST_CUDA -from torch.testing._internal.common_utils import run_tests, TestCase +from torch.testing._internal.common_utils import run_tests, TEST_XPU, TestCase from torch.utils.checkpoint import CheckpointError +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + + class MemoryDelta(ContextDecorator): def __init__(self, device: torch.device): self.device: torch.device = device @@ -22,16 +25,16 @@ def __init__(self, device: torch.device): def __enter__(self): self.active_memory_enter = ( - torch.cuda.memory_stats()["active_bytes.all.current"] - if self.device.type == "cuda" + torch.accelerator.memory_stats()["active_bytes.all.current"] + if self.device.type == "cuda" or self.device.type == "xpu" else 0 ) return self def __exit__(self, *exc): self.active_memory_exit = ( - torch.cuda.memory_stats()["active_bytes.all.current"] - if self.device.type == "cuda" + torch.accelerator.memory_stats()["active_bytes.all.current"] + if self.device.type == "cuda" or self.device.type == "xpu" else 0 ) @@ -126,7 +129,7 @@ def _test_tensor_only( loss2 = net2(x2).sum() loss2.backward() - if x.is_cuda: + if x.is_cuda or x.is_xpu: self.assertTrue(mem2.delta() < mem1.delta()) for p1, p2 in zip(net1.parameters(), net2.parameters()): @@ -137,10 +140,10 @@ def test_tensor_only_cpu(self): net = ToyModel() self._test_tensor_only(net, x) - @unittest.skipIf(not TEST_CUDA, "no cuda") + @unittest.skipIf(not TEST_CUDA and not TEST_XPU, "no cuda/xpu") def test_tensor_only_gpu(self): - x = torch.randn(20, 100, device="cuda:0") - net = ToyModel().to("cuda:0") + x = torch.randn(20, 100, device=f"{device_type}:0") + net = ToyModel().to(f"{device_type}:0") self._test_tensor_only(net, x) def test_random_cpu(self): diff --git a/test/distributed/_composable/test_composability/test_2d_composability.py b/test/distributed/_composable/test_composability/test_2d_composability.py index bcaf06ea947a0..3fd84fbe9e739 100644 --- a/test/distributed/_composable/test_composability/test_2d_composability.py +++ b/test/distributed/_composable/test_composability/test_2d_composability.py @@ -47,6 +47,8 @@ instantiate_parametrized_tests, parametrize, run_tests, + TEST_XPU, + xfailIf, ) from torch.testing._internal.distributed._tensor.common_dtensor import ( DTensorTestBase, @@ -58,6 +60,9 @@ from torch.testing._internal.distributed.checkpoint_utils import with_temp_dir +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" + + class SimpleModel(nn.Module): def __init__(self): super().__init__() @@ -73,7 +78,7 @@ def forward(self, x): return x def get_input(self): - return torch.rand(4, 5, device="cuda") + return torch.rand(4, 5, device=device_type) class SimpleModelUneven(nn.Module): @@ -94,7 +99,7 @@ def forward(self, x): return x def get_input(self): - return torch.rand(4, 5, device="cuda") + return torch.rand(4, 5, device=device_type) class TestFullyShard2DTraining(FSDPTest): @@ -105,13 +110,15 @@ class TestFullyShard2DTraining(FSDPTest): @property def world_size(self) -> int: - return min(4, torch.cuda.device_count()) + return min(4, torch.accelerator.device_count()) def init_global_mesh(self) -> DeviceMesh: # Prefer to test with >=4 GPUs, but for 2 GPUs, use 2-way TP dp_size = 2 if self.world_size > 2 else 1 return init_device_mesh( - "cuda", (dp_size, self.world_size // dp_size), mesh_dim_names=("dp", "tp") + device_type, + (dp_size, self.world_size // dp_size), + mesh_dim_names=("dp", "tp"), ) @skip_if_lt_x_gpu(2) @@ -138,7 +145,7 @@ def _test_train_parity_2d_mlp( torch.manual_seed(42) model = MLPStack(mlp_dim) - ref_model = copy.deepcopy(model).cuda() + ref_model = copy.deepcopy(model).to(device_type) replicate(ref_model, device_ids=[self.rank], process_group=dp_pg) ref_optim = torch.optim.Adam(ref_model.parameters(), lr=1e-2, foreach=False) model.parallelize( @@ -150,9 +157,8 @@ def _test_train_parity_2d_mlp( optim = torch.optim.Adam(model.parameters(), lr=1e-2, foreach=False) torch.manual_seed(42 + dp_pg.rank() + 1) - device = torch.device("cuda") for iter_idx in range(10): - inp = torch.randn((8, mlp_dim), device=device) + inp = torch.randn((8, mlp_dim), device=device_type) losses: list[torch.Tensor] = [] for _model, _optim in ((ref_model, ref_optim), (model, optim)): _optim.zero_grad(set_to_none=(iter_idx % 2 == 0)) @@ -162,6 +168,7 @@ def _test_train_parity_2d_mlp( self.assertEqual(losses[0], losses[1]) @skip_if_lt_x_gpu(2) + @xfailIf(TEST_XPU) # https://github.com/intel/torch-xpu-ops/issues/1881 def test_train_parity_2d_transformer(self): self.run_subtests( {"use_shard_placement_fn": [False, True]}, @@ -172,12 +179,12 @@ def _test_train_parity_2d_transformer(self, use_shard_placement_fn: bool): torch.manual_seed(42) model_args = ModelArgs(n_layers=3, dropout_p=0.0) model = Transformer(model_args) - ref_model = copy.deepcopy(model).cuda() + ref_model = copy.deepcopy(model).to(device_type) ref_optim = torch.optim.AdamW(ref_model.parameters(), lr=1e-2) dp_size, tp_size = self.world_size // 2, 2 global_mesh = init_device_mesh( - "cuda", (dp_size, tp_size), mesh_dim_names=("dp", "tp") + device_type, (dp_size, tp_size), mesh_dim_names=("dp", "tp") ) model = Transformer.parallelize(model, global_mesh["tp"], use_seq_parallel=True) @@ -205,7 +212,7 @@ def _shard_placement_fn(param: nn.Parameter) -> Optional[Shard]: self.assertEqual(full_param, ref_param) torch.manual_seed(42 + global_mesh.get_local_rank("dp")) - inp = torch.randint(0, model_args.vocab_size, (2, 16), device="cuda") + inp = torch.randint(0, model_args.vocab_size, (2, 16), device=device_type) for iter_idx in range(5): ref_loss = ref_model(inp).sum() loss = model(inp).sum() @@ -242,15 +249,16 @@ def _shard_placement_fn(param: nn.Parameter) -> Optional[Shard]: self.assertEqual(full_param, ref_param) @skip_if_lt_x_gpu(2) + @xfailIf(TEST_XPU) # https://github.com/pytorch/pytorch/issues/156782 def test_tp_with_fsdp_offloading(self): global_mesh = init_device_mesh( - "cuda", (1, self.world_size), mesh_dim_names=("dp", "tp") + device_type, (1, self.world_size), mesh_dim_names=("dp", "tp") ) dp_mesh, tp_mesh = global_mesh["dp"], global_mesh["tp"] torch.manual_seed(42) mlp_dim = 16 model = MLPStack(mlp_dim) - ref_model = copy.deepcopy(model).cuda() + ref_model = copy.deepcopy(model).to(device_type) ref_optim = torch.optim.Adam(ref_model.parameters(), lr=1e-2, foreach=False) # Parallelize with N-way TP and 1-way FSDP model.parallelize( @@ -268,7 +276,7 @@ def test_tp_with_fsdp_offloading(self): # NOTE: We still see the FSDP all-gather/reduce-scatter c10d ops # called, but they will just be no-ops without issuing any kernels. # We prefer to keep the no-op check at the c10d level, not in FSDP. - inp = torch.randn((4, mlp_dim), device="cuda") # same on all ranks + inp = torch.randn((4, mlp_dim), device=device_type) # same on all ranks for _ in range(10): ref_optim.zero_grad() optim.zero_grad() @@ -297,6 +305,7 @@ def test_tp_with_fsdp_offloading(self): ref_optim.step() @skip_if_lt_x_gpu(2) + @xfailIf(TEST_XPU) # https://github.com/intel/torch-xpu-ops/issues/1881 @with_temp_dir def test_train_parity_2d_transformer_checkpoint_resume(self): """ @@ -352,7 +361,7 @@ def parallelize(_model: Transformer, mesh: DeviceMesh, use_seq_parallel: bool): ) torch.manual_seed(42 + global_mesh["dp"].get_local_rank() + 1) - inp = torch.randint(0, model_args.vocab_size, (3, 16), device="cuda") + inp = torch.randint(0, model_args.vocab_size, (3, 16), device=device_type) loss_no_cp1 = train_step(model_no_cp, optim_no_cp, inp) loss_no_cp2 = train_step(model_no_cp, optim_no_cp, inp) @@ -410,14 +419,14 @@ class TestFullyShard2DStateDict(DTensorTestBase): @property def backend(self): # need to specify gloo backend for testing cpu offload - return "cpu:gloo,cuda:nccl" + return "cpu:gloo,xpu:xccl" if TEST_XPU else "cpu:gloo,cuda:nccl" @with_comms @skip_if_lt_x_gpu(4) def test_fully_shard_tp_2d_set_full_state_dict(self): - dummy_model = SimpleModel().cuda() + dummy_model = SimpleModel().to(device_type) mesh_2d = init_device_mesh( - "cuda", + device_type, (2, self.world_size // 2), mesh_dim_names=("dp", "tp"), ) @@ -561,7 +570,7 @@ def test_2d_fsdp_state_enable_extension(self): self.device_type, (2, self.world_size // 2), mesh_dim_names=("dp", "tp") ) model = FSDP( - SimpleModel().cuda(), + SimpleModel().to(device_type), device_mesh=mesh_2d["dp"], ) fsdp_state = _get_module_fsdp_state(model) @@ -573,7 +582,7 @@ def _test_2d_e2e_training( recompute_activation=False, ) -> None: torch.manual_seed(0) - model = SimpleModel().cuda(self.rank) + model = SimpleModel().to(f"{device_type}:{self.rank}") model = FSDP(model, use_orig_params=use_orig_params) optim = torch.optim.Adam(model.parameters(), lr=0.01) @@ -587,7 +596,9 @@ def _test_2d_e2e_training( "net1": ColwiseParallel(), "net2": RowwiseParallel(), } - model_2d = parallelize_module(SimpleModel().cuda(), tp_mesh, parallelize_plan) + model_2d = parallelize_module( + SimpleModel().to(device_type), tp_mesh, parallelize_plan + ) model_2d = FSDP( model_2d, device_mesh=dp_mesh, @@ -615,7 +626,7 @@ def _test_2d_e2e_training( # Ensure all input across TP ranks are same. # TODO: add a get_group_rank() to DeviceMesh. torch.manual_seed(i + dist.get_rank(dp_mesh.get_group(mesh_dim=0))) - input = torch.rand(4, 5).cuda(self.rank) + input = torch.rand(4, 5).to(f"{device_type}:{self.rank}") output = model(input) output_2d = model_2d(input) self.assertEqual(output, output_2d) @@ -652,7 +663,7 @@ class TestNew2dParallelStateDict(DTensorTestBase): @property def backend(self): # need to specify gloo backend for testing cpu offload - return "cpu:gloo,cuda:nccl" + return "cpu:gloo,xpu:xccl" if TEST_XPU else "cpu:gloo,cuda:nccl" @with_comms @skip_if_lt_x_gpu(4) @@ -669,7 +680,7 @@ def test_fsdp_2d_extension(self): "net3": ColwiseParallel(), } model_2d = parallelize_module( - SimpleModel().cuda(), + SimpleModel().to(device_type), mesh_2d["tp"], parallelize_plan=parallelize_plan, ) @@ -679,8 +690,10 @@ def test_fsdp_2d_extension(self): isinstance(model_2d_fsdp_state._fsdp_extension, DTensorExtensions) ) - mesh_1d = init_device_mesh("cuda", (self.world_size,)) - model_1d = FSDP(SimpleModel().cuda(), device_mesh=mesh_1d, use_orig_params=True) + mesh_1d = init_device_mesh(device_type, (self.world_size,)) + model_1d = FSDP( + SimpleModel().to(device_type), device_mesh=mesh_1d, use_orig_params=True + ) model_1d_fsdp_state = _get_module_fsdp_state(model_1d) self.assertEqual(model_1d_fsdp_state._fsdp_extension, None) @@ -692,7 +705,7 @@ def test_2d_state_dict(self, is_even_sharded_model): # Create a model without wrapper torch.manual_seed(0) - no_wrap_model = simple_model().cuda(self.rank) + no_wrap_model = simple_model().to(f"{device_type}:{self.rank}") no_wrap_state_dict = no_wrap_model.state_dict() # Create a model and sharded it with 2D FSDP + TP @@ -706,7 +719,9 @@ def test_2d_state_dict(self, is_even_sharded_model): "net1": ColwiseParallel(), "net2": RowwiseParallel(), } - model_2d = parallelize_module(simple_model().cuda(), tp_mesh, parallelize_plan) + model_2d = parallelize_module( + simple_model().to(device_type), tp_mesh, parallelize_plan + ) model_2d = FSDP(model_2d, device_mesh=dp_mesh, use_orig_params=True) FSDP.set_state_dict_type( @@ -754,7 +769,9 @@ def test_2d_load_state_dict(self, is_even_sharded_model): "net1": ColwiseParallel(), "net2": RowwiseParallel(), } - model_2d = parallelize_module(simple_model().cuda(), tp_mesh, parallelize_plan) + model_2d = parallelize_module( + simple_model().to(device_type), tp_mesh, parallelize_plan + ) model_2d = FSDP(model_2d, device_mesh=dp_mesh, use_orig_params=True) optim_2d = torch.optim.Adam(model_2d.parameters(), lr=0.01) @@ -768,7 +785,7 @@ def test_2d_load_state_dict(self, is_even_sharded_model): ref_state_dict = deepcopy(model_2d.state_dict()) # Update the parameters so model.state_dict() will be different from ref_dtensor_sd. - model_2d(model_2d.get_input().cuda(self.rank)).sum().backward() + model_2d(model_2d.get_input().to(f"{device_type}:{self.rank}")).sum().backward() optim_2d.step() # Load ref_state_dict back. @@ -799,9 +816,11 @@ def test_2d_optim_state_dict(self, is_even_sharded_model): # Create a model without wrapper torch.manual_seed(0) - no_wrap_model = simple_model().cuda(self.rank) + no_wrap_model = simple_model().to(f"{device_type}:{self.rank}") no_wrap_optim = torch.optim.Adam(no_wrap_model.parameters(), lr=0.01) - no_wrap_model(no_wrap_model.get_input().cuda(self.rank)).sum().backward() + no_wrap_model( + no_wrap_model.get_input().to(f"{device_type}:{self.rank}") + ).sum().backward() no_wrap_optim.step() no_wrap_osd = get_optimizer_state_dict(no_wrap_model, optimizers=no_wrap_optim) @@ -815,7 +834,7 @@ def test_2d_optim_state_dict(self, is_even_sharded_model): "net2": RowwiseParallel(), } model_2d = parallelize_module( - simple_model().cuda(), mesh_2d["tp"], parallelize_plan + simple_model().to(device_type), mesh_2d["tp"], parallelize_plan ) model_2d = FSDP(model_2d, device_mesh=mesh_2d["dp"], use_orig_params=True) FSDP.set_state_dict_type( @@ -823,7 +842,7 @@ def test_2d_optim_state_dict(self, is_even_sharded_model): StateDictType.SHARDED_STATE_DICT, ) optim_2d = torch.optim.Adam(model_2d.parameters(), lr=0.01) - model_2d(model_2d.get_input().cuda(self.rank)).sum().backward() + model_2d(model_2d.get_input().to(f"{device_type}:{self.rank}")).sum().backward() optim_2d.step() optim_2d_osd = get_optimizer_state_dict(model_2d, optimizers=optim_2d) ref_optim_2d_osd = deepcopy(optim_2d_osd) @@ -842,7 +861,7 @@ def test_2d_optim_state_dict(self, is_even_sharded_model): # compare with no_wrap state. if isinstance(dist_state, DTensor): dist_state = ( - dist_state.cuda() + dist_state.to(device_type) .redistribute(placements=(Replicate(), Replicate())) .to_local() ) @@ -850,7 +869,7 @@ def test_2d_optim_state_dict(self, is_even_sharded_model): self.assertTrue(torch.allclose(state, dist_state)) # Update the parameters 2d optim states will be different from ref_optim_state_dict. - model_2d(model_2d.get_input().cuda(self.rank)).sum().backward() + model_2d(model_2d.get_input().to(f"{device_type}:{self.rank}")).sum().backward() optim_2d.step() set_optimizer_state_dict( @@ -892,8 +911,8 @@ def test_fsdp1_tp_2d_set_full_state_dict(self): 5) dcp.load the state dict from storage 6) load the state dict into the 2D model """ - dummy_model = SimpleModel().cuda() - mesh_1d = init_device_mesh("cuda", (self.world_size,)) + dummy_model = SimpleModel().to(device_type) + mesh_1d = init_device_mesh(device_type, (self.world_size,)) model = FSDP(dummy_model, device_mesh=mesh_1d) optim = torch.optim.Adam(model.parameters(), lr=0.01) model(model.get_input()).sum().backward() @@ -911,9 +930,9 @@ def test_fsdp1_tp_2d_set_full_state_dict(self): dcp.save(state_dict, checkpoint_id=self.temp_dir) # initialize 2d model - dummy_model = SimpleModel().cuda() + dummy_model = SimpleModel().to(device_type) mesh_2d = init_device_mesh( - "cuda", + device_type, (2, self.world_size // 2), mesh_dim_names=("dp", "tp"), ) diff --git a/test/distributed/_composable/test_composability/test_pp_composability.py b/test/distributed/_composable/test_composability/test_pp_composability.py index 8f0b938da41b0..e4daa81c456c0 100644 --- a/test/distributed/_composable/test_composability/test_pp_composability.py +++ b/test/distributed/_composable/test_composability/test_pp_composability.py @@ -30,7 +30,7 @@ from torch.testing._internal.common_cuda import TEST_MULTIGPU from torch.testing._internal.common_distributed import ( MultiProcessTestCase, - requires_nccl, + requires_accelerator_dist_backend, skip_if_lt_x_gpu, ) from torch.testing._internal.common_utils import ( @@ -38,6 +38,7 @@ parametrize, run_tests, skip_but_pass_in_sandcastle_if, + TEST_XPU, ) from torch.testing._internal.distributed.checkpoint_utils import with_temp_dir @@ -46,6 +47,10 @@ from torch.distributed.checkpoint.metadata import STATE_DICT_TYPE +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" +backend = torch.distributed.get_default_backend_for_device(device_type) + + # MLP Layer class MLPModule(torch.nn.Module): def __init__(self, d_hid: int): @@ -79,7 +84,7 @@ class ComposabilityTest(MultiProcessTestCase): @classmethod def backend_str(cls) -> str: # Testing with NCCL backend - return "nccl" + return backend def setUp(self): super().setUp() @@ -100,9 +105,11 @@ def world_size(self): def device(self): return self.rank - @requires_nccl() + @requires_accelerator_dist_backend(["nccl", "xccl"]) @skip_if_lt_x_gpu(4) - @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "Test requires 4+ GPUs") + @skip_but_pass_in_sandcastle_if( + not TEST_MULTIGPU and not TEST_XPU, "Test requires 4+ GPUs" + ) def test_pp_and_dcp(self): """ Test that pipeline parallelism and distributed checkpointing can be used together and @@ -143,11 +150,11 @@ def forward(self, x): x = layer(x) return x - device = torch.device("cuda", self.device) - torch.cuda.set_device(self.device) + device = torch.device(device_type, self.device) + torch.accelerator.set_device_index(self.device) store = torch.distributed.FileStore(self.file_name, self.world_size) torch.distributed.init_process_group( - backend="nccl", + backend=backend, store=store, rank=self.rank, world_size=self.world_size, @@ -192,9 +199,11 @@ def _dcp_test(self): _dcp_test(self) - @requires_nccl() + @requires_accelerator_dist_backend(["nccl", "xccl"]) @skip_if_lt_x_gpu(8) - @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "Test requires 8+ GPUs") + @skip_but_pass_in_sandcastle_if( + not TEST_MULTIGPU and not TEST_XPU, "Test requires 8+ GPUs" + ) @parametrize( "ScheduleClass", [ @@ -213,11 +222,11 @@ def _dcp_test(self): ], ) def test_3d_with_tp_dp_pp(self, ScheduleClass, MixedPrecisionParam): - _device_raii = torch.device("cuda", self.device) - torch.cuda.set_device(self.device) + _device_raii = torch.device(device_type, self.device) + torch.accelerator.set_device_index(self.device) store = torch.distributed.FileStore(self.file_name, self.world_size) torch.distributed.init_process_group( - backend="nccl", + backend=backend, store=store, rank=self.rank, world_size=self.world_size, @@ -228,7 +237,7 @@ def test_3d_with_tp_dp_pp(self, ScheduleClass, MixedPrecisionParam): num_microbatches = 8 dp_size = self.world_size // (tp_size * pp_size) device_mesh = init_device_mesh( - "cuda", + device_type, mesh_shape=(dp_size, pp_size, tp_size), mesh_dim_names=("dp", "pp", "tp"), ) diff --git a/test/distributed/_composable/test_replicate.py b/test/distributed/_composable/test_replicate.py index a793fe2fed4cc..8c1cb3d5df32b 100644 --- a/test/distributed/_composable/test_replicate.py +++ b/test/distributed/_composable/test_replicate.py @@ -1,6 +1,7 @@ # Owner(s): ["oncall: distributed"] import os +import unittest from copy import deepcopy import torch @@ -14,7 +15,11 @@ MultiProcessTestCase, skip_if_lt_x_gpu, ) -from torch.testing._internal.common_utils import run_tests +from torch.testing._internal.common_utils import run_tests, TEST_XPU + + +device_type = acc.type if (acc := torch.accelerator.current_accelerator()) else "cpu" +device_module = torch.get_device_module(device_type) class Net(nn.Module): @@ -154,6 +159,7 @@ def test_replicate_single_module(self): self._compare_module(model, replicate_model) @skip_if_lt_x_gpu(2) + @unittest.skipIf(TEST_XPU, "XPU does not support gloo backend") def test_replicate_move_args_kwargs_to_device(self): class MyNet(nn.Module): def __init__(self) -> None: @@ -166,24 +172,25 @@ def forward(self, inp, *, kwarg=None): return self.a(inp) self._init_pg() - torch.cuda.set_device(self.rank) - model = MyNet().cuda() - replicate(model, device_id=torch.cuda.current_device()) + torch.accelerator.set_device_index(self.rank) + model = MyNet().to(device_type) + replicate(model, device_id=torch.accelerator.current_device_index()) # CPU input ensures replicate can move arg and kwargs to device. a, b = torch.randn(2, 2), torch.randn(2, 2) model(a, kwarg=b).sum().backward() @skip_if_lt_x_gpu(2) + @unittest.skipIf(TEST_XPU, "XPU does not support gloo backend") def test_replicate_ignore_module(self): self._init_pg() - torch.cuda.set_device(self.rank) + torch.accelerator.set_device_index(self.rank) # Seed ensures diff input and thus different local grads across ranks. torch.manual_seed(self.rank) - torch.cuda.manual_seed(self.rank) - model = Net().cuda() + device_module.manual_seed(self.rank) + model = Net().to(device_type) replicate(model, ignored_modules=[model.fc1]) # CPU input ensures that replicate can move input to GPU as DDP does. - inp = torch.randn(5, 2, device="cuda") * (self.rank + 1) + inp = torch.randn(5, 2, device=device_type) * (self.rank + 1) out = model(inp) * 10 out.sum().backward() # FC1 grads should not be synchronized, FC2 and 3 should be. @@ -221,10 +228,11 @@ def test_replicate_with_kwargs(self): self._compare_module(model, replicate_model) @skip_if_lt_x_gpu(2) + @unittest.skipIf(TEST_XPU, "XPU does not support gloo backend") def test_replicate_device_id(self): self._init_pg() model = Net() - model_cuda = deepcopy(model).cuda() + model_cuda = deepcopy(model).to(device_type) model_cuda2 = deepcopy(model_cuda) replicate(model, device_id=torch.device("cpu")) # DDP instance is attached in first pre forward @@ -233,13 +241,15 @@ def test_replicate_device_id(self): # Should be None for CPU training self.assertEqual(None, replicate_ddp_weakref.device_ids) - replicate(model_cuda, device_id=torch.device(torch.cuda.current_device())) + replicate( + model_cuda, device_id=torch.device(torch.accelerator.current_device_index()) + ) # DDP instance is attached in first pre forward model_cuda(torch.randn(2, 2)) replicate_ddp_weakref = replicate.state(model_cuda)._ddp_weakref() self.assertEqual([0], replicate_ddp_weakref.device_ids) # Pass in int as device_id - replicate(model_cuda2, device_id=int(torch.cuda.current_device())) + replicate(model_cuda2, device_id=int(torch.accelerator.current_device_index())) # DDP instance is attached in first pre forward model_cuda2(torch.randn(2, 2)) replicate_ddp_weakref = replicate.state(model_cuda2)._ddp_weakref() @@ -256,6 +266,7 @@ def test_replicate_wrong_device_id_type(self): class ReplicateFullyShardInit(ReplicateTest): @skip_if_lt_x_gpu(2) + @unittest.skipIf(TEST_XPU, "XPU does not support gloo backend") def test_replicate_fully_shard_init(self): class ToyModel(nn.Module): def __init__(self, dim: int): @@ -273,14 +284,14 @@ def forward(self, x: torch.Tensor): return y self._init_pg() - torch.cuda.set_device(self.rank) + torch.accelerator.set_device_index(self.rank) dim = 3 bz = 2 - model = ToyModel(dim).cuda() + model = ToyModel(dim).to(device_type) for linear in model.linears: fully_shard(linear) fully_shard(model.linears) - replicate(model, device_id=torch.cuda.current_device()) + replicate(model, device_id=torch.accelerator.current_device_index()) for linear in model.linears: self.assertTrue(isinstance(linear.weight, DTensor)) inp = torch.rand(bz, dim) diff --git a/test/distributed/_composable/test_replicate_with_compiler.py b/test/distributed/_composable/test_replicate_with_compiler.py index 11eba3e5bb0c9..291b3a4268223 100644 --- a/test/distributed/_composable/test_replicate_with_compiler.py +++ b/test/distributed/_composable/test_replicate_with_compiler.py @@ -98,6 +98,8 @@ def _test_compile( self.create_pg(device) torch._dynamo.config.optimize_ddp = "python_reducer" torch.manual_seed(123) + if device_type == "xpu": + torch.use_deterministic_algorithms(True, warn_only=True) model = Net(checkpoint=checkpoint).to(device) input = torch.randn([1, DIM], device=device) diff --git a/torch/testing/_internal/distributed/_tensor/common_dtensor.py b/torch/testing/_internal/distributed/_tensor/common_dtensor.py index e25e08fbf5090..604ba9714f21d 100644 --- a/torch/testing/_internal/distributed/_tensor/common_dtensor.py +++ b/torch/testing/_internal/distributed/_tensor/common_dtensor.py @@ -388,6 +388,7 @@ def init_pg(self, eager_init, backend: Optional[str] = None) -> None: "hccl", "xccl", "fake", + "cpu:gloo,xpu:xccl", ]: raise RuntimeError(f"Backend {backend} not supported!") From bf6b40da3e3be7718b8ddc94eed2da8cabaa5e86 Mon Sep 17 00:00:00 2001 From: Natalia Gimelshein Date: Mon, 15 Sep 2025 06:50:00 +0000 Subject: [PATCH 241/693] fix deterministic scatter_add path for multi-d tensors (#162866) PReviously for more than 2d tensor `select` didn't work correctly. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162866 Approved by: https://github.com/valentinandrei --- aten/src/ATen/native/TensorAdvancedIndexing.cpp | 2 +- test/test_scatter_gather_ops.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/aten/src/ATen/native/TensorAdvancedIndexing.cpp b/aten/src/ATen/native/TensorAdvancedIndexing.cpp index 408faea1b7644..7d613fc023120 100644 --- a/aten/src/ATen/native/TensorAdvancedIndexing.cpp +++ b/aten/src/ATen/native/TensorAdvancedIndexing.cpp @@ -2174,7 +2174,7 @@ static void _scatter_via_index_put( if (self.dim() == 1 || broadcast_index) { Tensor squeezed = index; if (broadcast_index && index.dim() > 1) { - for (const auto d : c10::irange(index.dim())) { + for (int64_t d = index.dim() - 1; d >= 0; --d) { if (d == dim) { continue; } diff --git a/test/test_scatter_gather_ops.py b/test/test_scatter_gather_ops.py index d2a0e8bd1ccca..ba967c142f1e7 100644 --- a/test/test_scatter_gather_ops.py +++ b/test/test_scatter_gather_ops.py @@ -383,13 +383,14 @@ def helper(input_size, idx_size): @dtypes(torch.float32) def test_scatter_add_broadcasted_index_deterministic(self, device, dtype): for d in (0, 1): - inp = torch.randn(3, 4, device=device, dtype=dtype) + inp = torch.randn(3, 4, 5, device=device, dtype=dtype) idx_1d = torch.randint(3, (10,), device=device) src_shape = list(inp.shape) src_shape[d] = 10 src = torch.randn(src_shape, device=device, dtype=dtype) - idx = idx_1d.unsqueeze(1 - d).expand(src_shape) - print(idx.stride()) + idx_view_shape = [1] * inp.ndim + idx_view_shape[d] = 10 + idx = idx_1d.view(idx_view_shape).expand(src_shape) ref = inp.clone().scatter_add_(d, idx, src) with DeterministicGuard(True): res = inp.clone().scatter_add_(d, idx, src) From a8c80f3fa9cff14d13eea51ae68426c1e9ed9086 Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Mon, 15 Sep 2025 11:31:37 +0000 Subject: [PATCH 242/693] Update slow tests (#162946) This PR is auto-generated weekly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/weekly.yml). Update the list of slow tests. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162946 Approved by: https://github.com/pytorchbot --- test/slow_tests.json | 489 ++++++++++++++++++++++--------------------- 1 file changed, 247 insertions(+), 242 deletions(-) diff --git a/test/slow_tests.json b/test/slow_tests.json index cd9d6864f0ec4..5a35d23776a31 100644 --- a/test/slow_tests.json +++ b/test/slow_tests.json @@ -1,244 +1,249 @@ { - "EndToEndLSTM (__main__.RNNTest)": 194.9510040283203, - "MultiheadAttention (__main__.ModulesTest)": 140.13499959309897, - "test__adaptive_avg_pool2d (__main__.CPUReproTests)": 89.57710986667209, - "test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 64.31833351982965, - "test_after_aot_gpu_runtime_error (__main__.MinifierIsolateTests)": 66.09833272298177, - "test_aot_autograd_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 64.02314267839704, - "test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 72.13800048828125, - "test_aot_autograd_symbolic_exhaustive_masked_norm_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 63.19166692097982, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 153.9259999593099, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 214.78533426920572, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 158.7769978841146, - "test_aot_autograd_symbolic_exhaustive_nn_functional_unfold_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 60.201476414998375, - "test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 75.8566665649414, - "test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 158.88999938964844, - "test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 600.0303955078125, - "test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 143.89337348937988, - "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 494.34210883246527, - "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 504.5401102701823, - "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 135.9231694539388, - "test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 71.03799947102864, - "test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 73.23316764831543, - "test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 214.73055691189236, - "test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 150.5653305053711, - "test_cat_2k_args (__main__.TestTEFuserDynamic)": 121.138150700114, - "test_cat_2k_args (__main__.TestTEFuserStatic)": 117.27021219874874, - "test_checkpointing_without_reentrant_input_requires_grad_False (__main__.TestAutogradWithCompiledAutograd)": 332.1435546875, - "test_checkpointing_without_reentrant_input_requires_grad_True (__main__.TestAutogradWithCompiledAutograd)": 413.1364440917969, - "test_collect_callgrind (__main__.TestBenchmarkUtils)": 322.539549085829, - "test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 109.46066538492839, - "test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 110.44916661580403, - "test_comprehensive_diff_cuda_float32 (__main__.TestDecompCUDA)": 77.25650024414062, - "test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 75.41433461507161, - "test_comprehensive_grid_sampler_2d_cpu_bfloat16 (__main__.TestDecompCPU)": 111.43533325195312, - "test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestDecompCPU)": 113.98733520507812, - "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 485.4573465983073, - "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 464.56699625651044, - "test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 265.6348292032878, - "test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 314.0461654663086, - "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 1546.3898315429688, - "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 69.4828332265218, - "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 1384.938496907552, - "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 73.32633463541667, - "test_comprehensive_linalg_lu_solve_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 78.70183436075847, - "test_comprehensive_linalg_lu_solve_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 76.88016764322917, - "test_comprehensive_linalg_pinv_singular_cuda_complex128 (__main__.TestDecompCUDA)": 60.60533459981283, - "test_comprehensive_linalg_solve_triangular_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 83.5096664428711, - "test_comprehensive_linalg_solve_triangular_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 78.69066619873047, - "test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 92.91299947102864, - "test_comprehensive_linalg_svd_cuda_complex64 (__main__.TestDecompCUDA)": 73.34999974568684, - "test_comprehensive_linalg_vector_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 70.28683344523112, - "test_comprehensive_linalg_vector_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 69.44366518656413, - "test_comprehensive_logspace_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 77.09783299763997, - "test_comprehensive_logspace_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 70.4760004679362, - "test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 142.64183044433594, - "test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 137.7250010172526, - "test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 138.17566553751627, - "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex64 (__main__.TestDecompCUDA)": 69.95266660054524, - "test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestDecompCPU)": 60.835333506266274, - "test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float64 (__main__.TestDecompCPU)": 66.94753379821778, - "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 138.8831672668457, - "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 157.37983194986978, - "test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 148.48499552408853, - "test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 142.54666646321616, - "test_comprehensive_nn_functional_grid_sample_cuda_bfloat16 (__main__.TestDecompCUDA)": 66.76000086466472, - "test_comprehensive_nn_functional_grid_sample_cuda_float16 (__main__.TestDecompCUDA)": 70.30716641743977, - "test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 340.98316701253253, - "test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 314.614995320638, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestDecompCUDA)": 88.2018330891927, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 85.09549967447917, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestDecompCUDA)": 88.72550201416016, - "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 85.59499867757161, - "test_comprehensive_nn_functional_interpolate_trilinear_cpu_float32 (__main__.TestDecompCPU)": 61.82139994303385, - "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 141.1143341064453, - "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 142.72383499145508, - "test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 1356.413838704427, - "test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 1347.1215209960938, - "test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 1366.5043131510417, - "test_comprehensive_nn_functional_max_pool3d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 593.5763346354166, - "test_comprehensive_nn_functional_max_pool3d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 549.9474945068359, - "test_comprehensive_nn_functional_max_unpool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 74.53666687011719, - "test_comprehensive_nn_functional_max_unpool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 75.8316650390625, - "test_comprehensive_nn_functional_max_unpool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 74.80666669209798, - "test_comprehensive_nn_functional_unfold_cuda_complex128 (__main__.TestDecompCUDA)": 67.3658332824707, - "test_comprehensive_ormqr_cpu_complex64 (__main__.TestDecompCPU)": 67.6716677347819, - "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 120.74283218383789, - "test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 117.90700022379558, - "test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 74.16149965922038, - "test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 74.09249877929688, - "test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 68.72949981689453, - "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 76.05216598510742, - "test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 79.25549952189128, - "test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 124.02233123779297, - "test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 130.15816497802734, - "test_constructor_autograd_SparseCSC_cuda (__main__.TestSparseAnyCUDA)": 114.52783139546712, - "test_constructor_autograd_SparseCSR_cuda (__main__.TestSparseAnyCUDA)": 94.13066546122234, - "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 243.25878143310547, - "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 560.9872216118706, - "test_conv2d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 85.30400085449219, - "test_conv2d_binary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 60.0622667948405, - "test_conv2d_unary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 60.94093297322591, - "test_conv3d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 164.94733174641928, - "test_conv3d_binary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 67.41599782307942, - "test_conv_bn_fuse_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 80.62599987453885, - "test_conv_unary_fusion_nnc (__main__.TestMkldnnFusion)": 77.90822347005208, - "test_correctness_AdamW_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 88.02899932861328, - "test_correctness_Adam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 83.99416732788086, - "test_count_nonzero_all (__main__.TestBool)": 625.3162163628472, - "test_custom_module_lstm (__main__.TestQuantizedOps)": 691.5127597384983, - "test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 86.18333435058594, - "test_eager_sequence_nr_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 146.76594623766448, - "test_eig_check_magma_cuda_float32 (__main__.TestLinalgCUDA)": 341.765677134196, - "test_fail_arithmetic_ops.py (__main__.TestTyping)": 68.25488874647353, - "test_fail_random.py (__main__.TestTyping)": 69.70459224559643, - "test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 99.30016708374023, - "test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 90.32933298746745, - "test_fuse_large_params_cpu (__main__.CpuTests)": 100.9027509689331, - "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 156.06466674804688, - "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 154.44311014811197, - "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 140.33400217692056, - "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 108.87950007120769, - "test_grad_nn_Transformer_cpu_float64 (__main__.TestModuleCPU)": 78.21525671543219, - "test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 95.37383270263672, - "test_gradgrad_nn_LSTM_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 124.23833465576172, - "test_gradgrad_nn_LSTM_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 130.07466634114584, - "test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 228.14850107828775, - "test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 141.07866414388022, - "test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 155.69166564941406, - "test_gradgrad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 638.5084838867188, - "test_group_norm (__main__.TestQuantizedOps)": 235.64022382100424, - "test_indirect_device_assert (__main__.TritonCodeGenTests)": 328.87933349609375, - "test_inductor_dynamic_shapes_broadcasting_dynamic_shapes (__main__.DynamicShapesReproTests)": 116.18105255930047, - "test_inductor_no_recursionerror_on_for_loops_dynamic_shapes (__main__.DynamicShapesReproTests)": 70.07888836330838, - "test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 89.06283315022786, - "test_inputs_overlapping_with_mutation_stress_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 131.60088857014975, - "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 118.61966451009114, - "test_linalg_solve_triangular_large_cuda_complex128 (__main__.TestLinalgCUDA)": 131.74433390299478, - "test_linalg_solve_triangular_large_cuda_complex64 (__main__.TestLinalgCUDA)": 101.52466583251953, - "test_linear (__main__.TestStaticQuantizedModule)": 219.97832912868924, - "test_linear_binary_cpp_wrapper (__main__.TestCppWrapper)": 111.1229985555013, - "test_linear_binary_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 139.29833475748697, - "test_linear_relu (__main__.TestStaticQuantizedModule)": 222.60332700941296, - "test_lobpcg_ortho_cuda_float64 (__main__.TestLinalgCUDA)": 137.30917072296143, - "test_longformer_chunk_dynamic_shapes (__main__.DynamicShapesReproTests)": 106.62766689724393, - "test_low_memory_max_pool_dilation_1_dim_3_cpu_halide (__main__.HalideCpuTests)": 585.4219970703125, - "test_low_memory_max_pool_dilation_2_dim_3_cpu_halide (__main__.HalideCpuTests)": 504.6419982910156, - "test_lstm_cpu (__main__.TestMkldnnCPU)": 69.61133321126302, - "test_many_overlapping_inputs_does_not_explode_guards_dynamic_shapes (__main__.DynamicShapesReproTests)": 127.47244517008464, - "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 63.23977788289388, - "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 63.10499954223633, - "test_nan_assert_float16 (__main__.ProcessGroupNCCLGroupTest)": 105.55233224232991, - "test_pattern_matcher_multi_user_cpu (__main__.CpuTritonTests)": 148.99966939290366, - "test_proper_exit (__main__.TestDataLoader)": 195.07049942016602, - "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 238.3838322957357, - "test_qat_conv2d_unary (__main__.TestQuantizePT2EX86Inductor)": 180.44411044650607, - "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn1d)": 64.31058961917192, - "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn2d)": 62.13955030441284, - "test_qat_mobilenet_v2 (__main__.TestQuantizePT2EQATModels)": 141.32811228434244, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 92.34100087483723, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 84.88599904378255, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 77.63999938964844, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 91.23133341471355, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 88.41600036621094, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 75.7643305460612, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 85.55433400472005, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 86.17699940999348, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 76.47133382161458, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 98.72666676839192, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 102.08499908447266, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 79.43900044759114, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 87.4413324991862, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 88.52833302815755, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 91.18200174967448, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 91.71099853515625, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 75.84733327229817, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 89.47599792480469, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 89.17300160725911, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 96.56466674804688, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 92.08200073242188, - "test_qrnncell (__main__.TestDynamicQuantizedOps)": 200.46322377522787, - "test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 637.5349934895834, - "test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 1213.9888509114583, - "test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 759.4036661783854, - "test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 1672.4736735026042, - "test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 76.77566528320312, - "test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 292.51483662923175, - "test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 129.11066691080728, - "test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 260.64366658528644, - "test_quick_core_backward_select_scatter_cpu_float64 (__main__.TestDecompCPU)": 73.24966684977214, - "test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 157.60366821289062, - "test_quick_core_backward_split_cuda_float64 (__main__.TestDecompCUDA)": 78.70783360799153, - "test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 89.36199951171875, - "test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 193.34283447265625, - "test_quick_core_backward_std_cpu_float64 (__main__.TestDecompCPU)": 64.08739941914877, - "test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 126.64083353678386, - "test_register_spills_cuda (__main__.BenchmarkFusionCudaTest)": 106.82166735331218, - "test_replicatepad_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 64.22033437093098, - "test_rosenbrock_sparse_with_lrsched_False_SGD_cuda_float64 (__main__.TestOptimRenewedCUDA)": 65.57016626993816, - "test_rosenbrock_sparse_with_lrsched_True_SGD_cuda_float64 (__main__.TestOptimRenewedCUDA)": 76.09683354695638, - "test_runtime_checks_large_cpu (__main__.AOTInductorTestABICompatibleCpu)": 71.15816752115886, - "test_runtime_checks_large_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 74.32677883572049, - "test_runtime_checks_large_cuda (__main__.AOTInductorTestABICompatibleGpu)": 157.43183390299478, - "test_save_load_large_string_attribute (__main__.TestSaveLoad)": 131.13233439127603, - "test_sdpa_kernel_ctx_manager2_dynamic_shapes (__main__.DynamicShapesCtxManagerTests)": 160.5550011528863, - "test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 117.62710995144315, - "test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 114.96744452582465, - "test_std (__main__.TestQuantizedOps)": 275.08810419506494, - "test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 150.82900087038675, - "test_terminate_handler_on_crash (__main__.TestTorch)": 110.43555479579501, - "test_terminate_signal (__main__.ForkTest)": 130.07055732442274, - "test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 129.6981106830968, - "test_terminate_signal (__main__.SpawnTest)": 133.48411263359918, - "test_torchvision_smoke (__main__.TestTensorBoardPytorchGraph)": 90.4521090189616, - "test_train_parity_multi_group (__main__.TestFullyShard1DTrainingCore)": 164.04612350463867, - "test_triton_bsr_scatter_mm_blocksize_64_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 77.9958324432373, - "test_triton_bsr_scatter_mm_blocksize_64_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 78.84283447265625, - "test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 79.08466720581055, - "test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 127.43616739908855, - "test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 129.390500386556, - "test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 104.55349795023601, - "test_unary_ops (__main__.TestTEFuserDynamic)": 84.59466772609287, - "test_unary_ops (__main__.TestTEFuserStatic)": 87.30733429061041, - "test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 82.17999776204427, - "test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 79.73050053914388, - "test_views1_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 87.70950190226237, - "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cpu_float32 (__main__.TestOperatorsCPU)": 96.42566680908203, - "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cuda_float32 (__main__.TestOperatorsCUDA)": 78.90966542561848, - "test_vmapjvpvjp_linalg_lu_solve_cpu_float32 (__main__.TestOperatorsCPU)": 62.53285598754883, - "test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 91.11416816711426, - "test_vmapjvpvjp_linalg_multi_dot_cuda_float32 (__main__.TestOperatorsCUDA)": 86.59666760762532, - "test_vmapjvpvjp_linalg_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 93.32300059000652, - "test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 100.57566833496094, - "test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 116.00733248392741, - "test_vmapjvpvjp_nn_functional_conv2d_cpu_float32 (__main__.TestOperatorsCPU)": 62.26690483093262, - "test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 87.44200134277344, - "test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 133.6548334757487, - "test_vmapjvpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 114.57983334859212, - "test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 69.25033442179362, - "test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 124.68766911824544, - "test_vmapvjpvjp_linalg_lstsq_cuda_float32 (__main__.TestOperatorsCUDA)": 76.81024932861328, - "test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 140.70899963378906, - "test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 118.22750091552734, - "test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 181.27366256713867 + "EndToEndLSTM (__main__.RNNTest)": 197.77900187174478, + "MultiheadAttention (__main__.ModulesTest)": 137.42000325520834, + "test_AllenaiLongformerBase_repro_cpu_halide (__main__.HalideCpuTests)": 214.1816660563151, + "test__adaptive_avg_pool2d (__main__.CPUReproTests)": 91.37688869900174, + "test_adaptive_max_pool2d1_cpu_halide (__main__.HalideCpuTests)": 116.57933298746745, + "test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 66.92922253078885, + "test_after_aot_gpu_runtime_error (__main__.MinifierIsolateTests)": 65.68500010172527, + "test_alexnet_prefix_cpu_halide (__main__.HalideCpuTests)": 177.91966756184897, + "test_aot_autograd_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 87.69499969482422, + "test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 74.02233378092448, + "test_aot_autograd_symbolic_exhaustive_masked_norm_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 64.45699946085612, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 136.27599589029947, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 259.30466715494794, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 135.36400095621744, + "test_aot_autograd_symbolic_exhaustive_nn_functional_unfold_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 61.07166544596354, + "test_aot_autograd_symbolic_exhaustive_ormqr_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 64.8491905757359, + "test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 90.34733327229817, + "test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 140.09266916910806, + "test_associative_scan_partial_grad_combine_mode_generic_compile_mode_compile_dynamic_shape_reverse_False_cpu (__main__.AssociativeScanTests)": 65.17999935150146, + "test_associative_scan_partial_grad_combine_mode_generic_compile_mode_compile_dynamic_shape_reverse_True_cpu (__main__.AssociativeScanTests)": 73.75112533569336, + "test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 646.9324035644531, + "test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 142.86450004577637, + "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 493.49299791124133, + "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 498.72944810655383, + "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 133.2033322652181, + "test_avg_pool3d_backward_cpu_halide (__main__.HalideCpuTests)": 61.788333892822266, + "test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 69.57333119710286, + "test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 81.06516774495442, + "test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 215.5933346218533, + "test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 135.41816584269205, + "test_checkpointing_without_reentrant_input_requires_grad_False (__main__.TestAutogradWithCompiledAutograd)": 338.17533026801215, + "test_checkpointing_without_reentrant_input_requires_grad_True (__main__.TestAutogradWithCompiledAutograd)": 423.4767761230469, + "test_collect_callgrind (__main__.TestBenchmarkUtils)": 325.6485578748915, + "test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 111.10633341471355, + "test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 104.33766555786133, + "test_comprehensive_diff_cuda_float32 (__main__.TestDecompCUDA)": 69.72683334350586, + "test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 71.48199971516927, + "test_comprehensive_grid_sampler_2d_cpu_bfloat16 (__main__.TestDecompCPU)": 96.58033243815105, + "test_comprehensive_grid_sampler_2d_cpu_float16 (__main__.TestDecompCPU)": 96.65433247884114, + "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 464.92467244466144, + "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 460.3839925130208, + "test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 263.58483632405597, + "test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 298.0318349202474, + "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 1310.3350016276042, + "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 66.3976656595866, + "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 1316.084981282552, + "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 69.58183288574219, + "test_comprehensive_linalg_lu_solve_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 78.05749893188477, + "test_comprehensive_linalg_lu_solve_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 72.31333287556966, + "test_comprehensive_linalg_solve_triangular_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 74.53133392333984, + "test_comprehensive_linalg_solve_triangular_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 82.40500005086263, + "test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 69.91749890645345, + "test_comprehensive_linalg_svd_cuda_complex64 (__main__.TestDecompCUDA)": 70.98916562398274, + "test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 126.90333302815755, + "test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 112.40283330281575, + "test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 114.09550094604492, + "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex128 (__main__.TestDecompCUDA)": 63.223000049591064, + "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex64 (__main__.TestDecompCUDA)": 67.44083213806152, + "test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestDecompCPU)": 62.70066706339518, + "test_comprehensive_nn_functional_gaussian_nll_loss_cpu_float64 (__main__.TestDecompCPU)": 60.468666076660156, + "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 116.34999974568684, + "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 116.57566579182942, + "test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 115.4306640625, + "test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 114.67599741617839, + "test_comprehensive_nn_functional_grid_sample_cuda_bfloat16 (__main__.TestDecompCUDA)": 78.96566772460938, + "test_comprehensive_nn_functional_grid_sample_cuda_float16 (__main__.TestDecompCUDA)": 60.72616704305013, + "test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 270.3598327636719, + "test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 260.6623306274414, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestDecompCUDA)": 88.48316701253255, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 78.13166681925456, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestDecompCUDA)": 83.55450057983398, + "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 80.67749913533528, + "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 136.17766698201498, + "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 157.4010009765625, + "test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 1222.983662923177, + "test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 1228.281494140625, + "test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 1216.2643432617188, + "test_comprehensive_nn_functional_max_pool3d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 503.51465861002606, + "test_comprehensive_nn_functional_max_pool3d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 523.0736694335938, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 68.91749954223633, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 61.947166442871094, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 63.17983309427897, + "test_comprehensive_nn_functional_unfold_cuda_complex128 (__main__.TestDecompCUDA)": 77.92383321126302, + "test_comprehensive_nn_functional_unfold_cuda_complex64 (__main__.TestDecompCUDA)": 69.46137571334839, + "test_comprehensive_ormqr_cpu_complex64 (__main__.TestDecompCPU)": 62.2076670328776, + "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 139.3495012919108, + "test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 124.99983469645183, + "test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 73.96983273824056, + "test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 73.27383422851562, + "test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 80.94216791788737, + "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 73.65583419799805, + "test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 74.30566660563152, + "test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 112.75583267211914, + "test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 106.72283299763997, + "test_constructor_autograd_SparseCSC_cuda (__main__.TestSparseAnyCUDA)": 102.85349909464519, + "test_constructor_autograd_SparseCSR_cuda (__main__.TestSparseAnyCUDA)": 73.14683278401692, + "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 137.8197758992513, + "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 437.60955386691626, + "test_conv2d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 75.4076665242513, + "test_conv2d_binary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 62.40233357747396, + "test_conv3d_binary_broadcast_shapes_cpu (__main__.TestPatternMatcherGenericCPU)": 149.36666870117188, + "test_conv3d_binary_dynamic_shapes_cpu (__main__.TestDynamicPatternMatcherGenericCPU)": 72.90299987792969, + "test_conv_bn_fuse_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 81.56499862670898, + "test_conv_unary_fusion_nnc (__main__.TestMkldnnFusion)": 75.13744566175673, + "test_correctness_AdamW_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 82.20433298746745, + "test_correctness_Adam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 76.78600056966145, + "test_count_nonzero_all (__main__.TestBool)": 655.6186726888021, + "test_cpu_gpu_parity_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 80.43400009940652, + "test_custom_module_lstm (__main__.TestQuantizedOps)": 798.5362040201823, + "test_ddp_uneven_inputs (__main__.TestDistBackendWithSpawn)": 360.75275349617004, + "test_diff_hyperparams_sharding_strategy_str_no_shard (__main__.TestFSDPUseOrigParamsMultipleParamGroups)": 60.4433339436849, + "test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 85.3961664835612, + "test_dtensor_op_db_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestDTensorOpsCPU)": 93.10799916585286, + "test_eig_check_magma_cuda_float32 (__main__.TestLinalgCUDA)": 215.1919957002004, + "test_error_detection_and_propagation (__main__.NcclErrorHandlingTest)": 67.04866790771484, + "test_fail_arithmetic_ops.py (__main__.TestTyping)": 64.6271112230089, + "test_fail_creation_ops.py (__main__.TestTyping)": 71.04431086573108, + "test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 88.46849950154622, + "test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 107.12216822306316, + "test_fuse_large_params_cpu (__main__.CpuTests)": 80.30040054321289, + "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 162.87633260091147, + "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 160.84833441840277, + "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 153.62799580891928, + "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 120.26516850789388, + "test_grad_nn_Transformer_cpu_float64 (__main__.TestModuleCPU)": 62.87366739908854, + "test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 104.12133407592773, + "test_gradgrad_nn_LSTM_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 117.95999908447266, + "test_gradgrad_nn_LSTM_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 113.97000122070312, + "test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 248.1183293660482, + "test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 180.4351666768392, + "test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 160.81400299072266, + "test_gradgrad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 694.055165608724, + "test_grid_sampler_2d_cpu_halide (__main__.HalideCpuTests)": 194.28900146484375, + "test_group_norm (__main__.TestQuantizedOps)": 207.3484410179986, + "test_indirect_device_assert (__main__.TritonCodeGenTests)": 329.52866617838544, + "test_inductor_no_recursionerror_on_for_loops_dynamic_shapes (__main__.DynamicShapesReproTests)": 67.15944459703233, + "test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 84.40099970499675, + "test_inputs_overlapping_with_mutation_stress_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 132.7371097140842, + "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 118.91166687011719, + "test_linalg_solve_triangular_large_cuda_complex128 (__main__.TestLinalgCUDA)": 130.4806671142578, + "test_linalg_solve_triangular_large_cuda_complex64 (__main__.TestLinalgCUDA)": 101.25733184814453, + "test_linear (__main__.TestStaticQuantizedModule)": 131.34678183661566, + "test_linear_binary_cpp_wrapper (__main__.TestCppWrapper)": 124.32133229573567, + "test_linear_binary_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 126.89633433024089, + "test_linear_relu (__main__.TestStaticQuantizedModule)": 128.11266708374023, + "test_lobpcg_ortho_cuda_float64 (__main__.TestLinalgCUDA)": 75.69916741053264, + "test_longformer_chunk_dynamic_shapes (__main__.DynamicShapesReproTests)": 106.60366736518012, + "test_lstm_cpu (__main__.TestMkldnnCPU)": 66.15800094604492, + "test_many_overlapping_inputs_does_not_explode_guards_dynamic_shapes (__main__.DynamicShapesReproTests)": 130.17633226182727, + "test_max_autotune_addmm_max_autotune_gemm_backends_CK_x_shape2 (__main__.TestCKBackend)": 60.61724901199341, + "test_max_autotune_addmm_search_space_EXHAUSTIVE_dynamic_True (__main__.TestMaxAutotuneSubproc)": 82.76533508300781, + "test_max_autotune_precompile_matmul_max_autotune_gemm_backends_CKTILE_autotune_in_subproc_False_use_aoti_False (__main__.TestCKBackend)": 84.80249977111816, + "test_max_autotune_precompile_matmul_max_autotune_gemm_backends_CKTILE_autotune_in_subproc_True_use_aoti_False (__main__.TestCKBackend)": 82.48874931409955, + "test_max_pool2d2_cpu_halide (__main__.HalideCpuTests)": 421.6166585286458, + "test_max_pool2d3_cpu_halide (__main__.HalideCpuTests)": 133.6796671549479, + "test_max_pool2d5_cpu_halide (__main__.HalideCpuTests)": 357.6593322753906, + "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 63.8608890109592, + "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 64.60900031195746, + "test_proper_exit (__main__.TestDataLoader)": 223.7907740275065, + "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 213.6155548095703, + "test_qat_conv2d_unary (__main__.TestQuantizePT2EX86Inductor)": 168.48199971516928, + "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn1d)": 68.48926869834342, + "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn2d)": 68.39782928838963, + "test_qat_mobilenet_v2 (__main__.TestQuantizePT2EQATModels)": 99.70321994357639, + "test_qat_resnet18 (__main__.TestQuantizePT2EQATModels)": 61.103378822063576, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 99.00533294677734, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 100.10599772135417, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 75.0443344116211, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 91.9883321126302, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 100.07866668701172, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 68.79566701253255, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 90.1106669108073, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 88.92966969807942, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 75.10766855875652, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 103.41666666666667, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 96.1106669108073, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 77.91766866048177, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 92.16766611735027, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 92.9856669108073, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 93.22266642252605, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 95.57533264160156, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False (__main__.TestPatternMatcher)": 70.04799906412761, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 90.56433359781902, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 92.017333984375, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 94.46166737874348, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 95.06233215332031, + "test_qrnncell (__main__.TestDynamicQuantizedOps)": 204.8830050362481, + "test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 584.1243489583334, + "test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 1194.274678548177, + "test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 842.1573282877604, + "test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 1500.2438354492188, + "test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 80.01266479492188, + "test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 304.8406728108724, + "test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 123.26833089192708, + "test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 289.4941685994466, + "test_quick_core_backward_select_scatter_cpu_float64 (__main__.TestDecompCPU)": 78.4913330078125, + "test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 160.19433085123697, + "test_quick_core_backward_split_cuda_float64 (__main__.TestDecompCUDA)": 76.93316650390625, + "test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 95.25599924723308, + "test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 190.9510014851888, + "test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 115.96716562906902, + "test_register_spills_cuda (__main__.BenchmarkFusionCudaTest)": 85.82816696166992, + "test_replicatepad_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 64.81233215332031, + "test_runtime_checks_large_cpu (__main__.AOTInductorTestABICompatibleCpu)": 73.0594991048177, + "test_runtime_checks_large_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 78.28866704305013, + "test_runtime_checks_large_cuda (__main__.AOTInductorTestABICompatibleGpu)": 203.66749827067056, + "test_save_load_large_string_attribute (__main__.TestSaveLoad)": 118.92166392008464, + "test_sdpa_kernel_ctx_manager2_dynamic_shapes (__main__.DynamicShapesCtxManagerTests)": 161.21966722276477, + "test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 119.33677842881944, + "test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 122.50711229112413, + "test_sort_stable_cpu (__main__.CpuTritonTests)": 77.22933451334636, + "test_split_cumsum_cpu (__main__.CpuTritonTests)": 89.92000071207683, + "test_std (__main__.TestQuantizedOps)": 118.49511219395532, + "test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 149.61699732144675, + "test_tensor_split (__main__.TestVmapOperators)": 83.01314294423376, + "test_terminate_handler_on_crash (__main__.TestTorch)": 111.18021970325046, + "test_terminate_signal (__main__.ForkTest)": 131.81088901807865, + "test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 131.90911058253712, + "test_terminate_signal (__main__.SpawnTest)": 135.51344219843546, + "test_triton_bsr_scatter_mm_blocksize_64_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 71.71866671244304, + "test_triton_bsr_scatter_mm_blocksize_64_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 69.4015007019043, + "test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 75.85683250427246, + "test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 144.25, + "test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 142.70416514078775, + "test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 105.90866597493489, + "test_unary_ops (__main__.TestTEFuserDynamic)": 83.01277730200026, + "test_unary_ops (__main__.TestTEFuserStatic)": 84.06699878639645, + "test_upsample_bicubic2d_cpu_halide (__main__.HalideCpuTests)": 97.28433227539062, + "test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 96.625, + "test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 78.01066716512044, + "test_views1_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 82.23649978637695, + "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cpu_float32 (__main__.TestOperatorsCPU)": 100.44966379801433, + "test_vmapjvpvjp_linalg_lstsq_grad_oriented_cuda_float32 (__main__.TestOperatorsCUDA)": 78.67900085449219, + "test_vmapjvpvjp_linalg_lu_solve_cpu_float32 (__main__.TestOperatorsCPU)": 75.2140007019043, + "test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 100.80166753133138, + "test_vmapjvpvjp_linalg_multi_dot_cuda_float32 (__main__.TestOperatorsCUDA)": 96.56916745503743, + "test_vmapjvpvjp_linalg_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 99.54433314005534, + "test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 69.86966705322266, + "test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 103.45650100708008, + "test_vmapjvpvjp_nn_functional_conv2d_cpu_float32 (__main__.TestOperatorsCPU)": 69.28766759236653, + "test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 70.02966690063477, + "test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 100.93566703796387, + "test_vmapjvpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 94.60433260599773, + "test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 98.65516599019368, + "test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 105.35816828409831, + "test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 74.68983332316081, + "test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 152.76449966430664 } \ No newline at end of file From 456fbeaa6dd2736c50db5ee50963268c3f590c02 Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Mon, 15 Sep 2025 11:41:59 +0000 Subject: [PATCH 243/693] [xla hash update] update the pinned xla hash (#162947) This PR is auto-generated nightly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/nightly.yml). Update the pinned xla hash. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162947 Approved by: https://github.com/pytorchbot --- .github/ci_commit_pins/xla.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt index eb335eb9d64d5..504d924ec7641 100644 --- a/.github/ci_commit_pins/xla.txt +++ b/.github/ci_commit_pins/xla.txt @@ -1 +1 @@ -6c5478ff7c3d50dd1e3047d72ec5909bea474073 +c77852e117bdf056c8e9a087e51d6f65cf6ba53d From 09cbf34e9386821a2a72990a6b4870f27bc129fc Mon Sep 17 00:00:00 2001 From: albanD Date: Mon, 15 Sep 2025 13:29:43 +0000 Subject: [PATCH 244/693] [BE] Preserve caller source location in the error message (#162808) Summary: Currently the C10_CUDA_CHECK only shows source location in CUDAException like below: ``` Exception raised from c10_cuda_check_implementation at fbcode/caffe2/c10/cuda/CUDAException.cpp:44 ``` which is not terribly useful. By checking the original diff D39619861 that introduced c10_cuda_check_implementation, it seems the original macro would show the source location correctly but c10_cuda_check_implementation broke it. This diff will propagate caller source location to c10_cuda_check_implementation to fix the issue. Test Plan: CI Observed desired error message after the change: ``` CUDA error: an illegal memory access was encountered Search for `cudaErrorIllegalAddress' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information. CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1 Device-side assertion tracking was not enabled by user. Exception raised from operator() at fbcode/sigrid/predictor/aed/AedContainer.cpp:659 (most recent call first): ``` Note the last line reports actual caller location. Rollback Plan: Reviewed By: Raymo111 Differential Revision: D81880552 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162808 Approved by: https://github.com/janeyx99 --- c10/cuda/CUDAException.cpp | 8 ++++---- c10/cuda/CUDAException.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/c10/cuda/CUDAException.cpp b/c10/cuda/CUDAException.cpp index 457d35f020bbe..4e4419b4369a8 100644 --- a/c10/cuda/CUDAException.cpp +++ b/c10/cuda/CUDAException.cpp @@ -10,9 +10,9 @@ namespace c10::cuda { void c10_cuda_check_implementation( const int32_t err, - const char* /*filename*/, - const char* /*function_name*/, - const int /*line_number*/, + const char* filename, + const char* function_name, + const uint32_t line_number, const bool include_device_assertions) { const auto cuda_error = static_cast(err); const auto cuda_kernel_failure = include_device_assertions @@ -41,7 +41,7 @@ void c10_cuda_check_implementation( } #endif throw c10::AcceleratorError( - {__func__, __FILE__, int32_t(__LINE__)}, err, check_message); + {function_name, filename, line_number}, err, check_message); } } // namespace c10::cuda diff --git a/c10/cuda/CUDAException.h b/c10/cuda/CUDAException.h index 899d85e8a73f6..2503b22e4765b 100644 --- a/c10/cuda/CUDAException.h +++ b/c10/cuda/CUDAException.h @@ -91,7 +91,7 @@ C10_CUDA_API void c10_cuda_check_implementation( const int32_t err, const char* filename, const char* function_name, - const int line_number, + const uint32_t line_number, const bool include_device_assertions); } // namespace c10::cuda From 7d1bcd9aea8f48733ea46d496e945b7f2592a585 Mon Sep 17 00:00:00 2001 From: James Wu Date: Sun, 14 Sep 2025 12:47:38 -0700 Subject: [PATCH 245/693] [easy] Fix unsigned long issue in static cuda launcher (#162920) Fixes https://github.com/pytorch/pytorch/issues/162430 It's a bit hard to come up with a unit test where the stream exceeds a C++ long, so just using existing unit tests for now. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162920 Approved by: https://github.com/Skylion007, https://github.com/jansel --- torch/csrc/inductor/static_cuda_launcher.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/csrc/inductor/static_cuda_launcher.cpp b/torch/csrc/inductor/static_cuda_launcher.cpp index 35756b704faa9..59916b6763bfa 100644 --- a/torch/csrc/inductor/static_cuda_launcher.cpp +++ b/torch/csrc/inductor/static_cuda_launcher.cpp @@ -369,7 +369,7 @@ PyObject* launch_kernel(PyObject* self, PyObject* args) { // Parse the fixed arguments and the format string if (!PyArg_ParseTuple( args, - "KiiiiisOl", + "KiiiiisOK", &func_ptr, &gridX, &gridY, From 70337a066fd1de717318eaca0a377f4bd5717ece Mon Sep 17 00:00:00 2001 From: James Wu Date: Sun, 14 Sep 2025 19:58:52 +0000 Subject: [PATCH 246/693] [easy] Handle Autotuners in get_triton_source_codes_for_gm (#161914) Some triton kernels are autotuners, in that case, grab the function from the autotuner. Pull Request resolved: https://github.com/pytorch/pytorch/pull/161914 Approved by: https://github.com/oulgen --- torch/_functorch/_aot_autograd/autograd_cache.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/torch/_functorch/_aot_autograd/autograd_cache.py b/torch/_functorch/_aot_autograd/autograd_cache.py index 248c3a0ae673e..2ac1e0d34d088 100644 --- a/torch/_functorch/_aot_autograd/autograd_cache.py +++ b/torch/_functorch/_aot_autograd/autograd_cache.py @@ -306,6 +306,8 @@ def get_triton_source_codes_from_gm( self, gm: torch.fx.GraphModule, ): + assert has_triton_package(), "Triton is not available" + triton_kernels = [] for module in gm.modules(): if not isinstance(module, torch.fx.GraphModule): @@ -331,6 +333,11 @@ def get_triton_source_codes_from_gm( ) for kernel in triton_kernels: + from triton.runtime.autotuner import Autotuner + + if isinstance(kernel, Autotuner): + # Grab the Inner JITFunction + kernel = kernel.fn source_codes = user_defined_triton_kernel_transitive_closure_source_code( kernel ) @@ -355,7 +362,8 @@ def __init__( [], [], ) - self.triton_kernel_source_codes = self.get_triton_source_codes_from_gm(gm) + if has_triton_package(): + self.triton_kernel_source_codes = self.get_triton_source_codes_from_gm(gm) if hasattr(gm, "saved_tensors_hooks_pack_0"): From c9e57d7e9f326e427fc4ae5c318fd017cd4b75a9 Mon Sep 17 00:00:00 2001 From: atalman Date: Mon, 15 Sep 2025 15:27:25 +0000 Subject: [PATCH 247/693] [CI] Move libtorch-cpu-shared-with-deps-release-build to python 3.10 (#162877) Related to https://github.com/pytorch/pytorch/pull/162862 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162877 Approved by: https://github.com/malfet --- .ci/libtorch/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/libtorch/build.sh b/.ci/libtorch/build.sh index 54ddd905aad05..c2d67f8b1bb29 100644 --- a/.ci/libtorch/build.sh +++ b/.ci/libtorch/build.sh @@ -7,4 +7,4 @@ set -ex SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh +USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.10" ${SCRIPTPATH}/../manywheel/build.sh From 5dc4e78047bd4bdcf156ed7995e33b276ed2bd95 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Tue, 26 Aug 2025 11:16:13 -0700 Subject: [PATCH 248/693] Fix excess refcounting in ObjLoaderFunc (#161528) expectRef is preferred over expect because it doesn't copy a std::shared_ptr. Differential Revision: [D81053710](https://our.internmc.facebook.com/intern/diff/D81053710/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/161528 Approved by: https://github.com/Skylion007 --- torch/csrc/jit/serialization/import.cpp | 10 +++++----- torch/csrc/jit/serialization/pickler.cpp | 2 +- torch/csrc/jit/serialization/pickler_helper.cpp | 6 +++--- torch/csrc/jit/serialization/pickler_helper.h | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/torch/csrc/jit/serialization/import.cpp b/torch/csrc/jit/serialization/import.cpp index fa84ffcd68f08..dd27811d1028d 100644 --- a/torch/csrc/jit/serialization/import.cpp +++ b/torch/csrc/jit/serialization/import.cpp @@ -71,15 +71,15 @@ static void postSetStateValidate(const IValue& v) { c10::intrusive_ptr ObjLoaderFunc( const at::StrongTypePtr& type, IValue input) { - auto cls = type.type_->expect(); - auto qn = cls->name(); - size_t n = cls->numAttributes(); + const auto& cls = type.type_->expectRef(); + auto qn = cls.name(); + size_t n = cls.numAttributes(); if (checkHasValidSetGetState(cls)) { auto obj = c10::ivalue::Object::create(type, n); // XXX: Do not optimize __setstate__, so that we don't try to // specialize the class before it is initialized. GraphOptimizerEnabledGuard guard(false); - Function& set_state = cls->getMethod("__setstate__"); + Function& set_state = cls.getMethod("__setstate__"); // since we are in the middle of unpickling we might still have lists and // dicts that do not have accurate tags (e.g. they report they are // List[Any]). But we need to run __setstate__ which will check the input @@ -96,7 +96,7 @@ c10::intrusive_ptr ObjLoaderFunc( auto dict = std::move(input).toGenericDict(); auto obj = c10::ivalue::Object::create(type, n); for (const auto i : c10::irange(n)) { - obj->setSlot(i, dict.at(cls->getAttributeName(i))); + obj->setSlot(i, dict.at(cls.getAttributeName(i))); } return obj; } diff --git a/torch/csrc/jit/serialization/pickler.cpp b/torch/csrc/jit/serialization/pickler.cpp index 2dc3f138ff76d..0622dbb5cd98e 100644 --- a/torch/csrc/jit/serialization/pickler.cpp +++ b/torch/csrc/jit/serialization/pickler.cpp @@ -110,7 +110,7 @@ void Pickler::pushIValueImpl(const IValue& ivalue) { pushGlobal(type_name.prefix(), type_name.name()); push(PickleOpCode::EMPTY_TUPLE); push(PickleOpCode::NEWOBJ); - if (checkHasValidSetGetState(type)) { + if (checkHasValidSetGetState(*type)) { Function& getstate = type->getMethod("__getstate__"); pushIValue(getstate({obj})); } else { diff --git a/torch/csrc/jit/serialization/pickler_helper.cpp b/torch/csrc/jit/serialization/pickler_helper.cpp index 261ae15d36e0a..66b51b07f8074 100644 --- a/torch/csrc/jit/serialization/pickler_helper.cpp +++ b/torch/csrc/jit/serialization/pickler_helper.cpp @@ -34,9 +34,9 @@ WriteableTensorData getWriteableTensorData( return result; } -bool checkHasValidSetGetState(const std::shared_ptr& cls) { +bool checkHasValidSetGetState(const c10::ClassType& cls) { // Check that the schemas for __getstate__ and __setstate__ are correct - auto getstate = cls->findMethod("__getstate__"); + auto getstate = cls.findMethod("__getstate__"); if (getstate == nullptr) { return false; } @@ -56,7 +56,7 @@ bool checkHasValidSetGetState(const std::shared_ptr& cls) { // Check __setstate__ if the method exists // __setstate__ is expected to be (self, T) -> None - auto setstate = cls->findMethod("__setstate__"); + auto setstate = cls.findMethod("__setstate__"); if (!setstate) { return false; } diff --git a/torch/csrc/jit/serialization/pickler_helper.h b/torch/csrc/jit/serialization/pickler_helper.h index 9a52585254eb1..b27d974a10e90 100644 --- a/torch/csrc/jit/serialization/pickler_helper.h +++ b/torch/csrc/jit/serialization/pickler_helper.h @@ -112,7 +112,7 @@ getWriteableTensorData(const at::Tensor& tensor, bool to_cpu = true); // if the cls has __getstate__/__setstate__ // assert they have the right schema and return true, // otherwise return false -bool checkHasValidSetGetState(const std::shared_ptr& cls); +bool checkHasValidSetGetState(const c10::ClassType& cls); // Declare BackendMeta serialization and deserialization function pointer types. using BackendMetaPtr = std::function< From 0826aafa04be44e60ae15617dc0c2501ad91d755 Mon Sep 17 00:00:00 2001 From: Aaryaman Vasishta Date: Mon, 15 Sep 2025 16:13:03 +0000 Subject: [PATCH 249/693] [ROCm/Windows] Support aotriton for scaled_dot_product_attention on Windows. (#162330) Enables flash attention and/or memory efficient attention on Windows with scaled_dot_product_attention via. aotriton. Already tested to be working on Windows with TheRock. Steps to enable: simply set `USE_FLASH_ATTENTION=1` and `USE_MEM_EFF_ATTENTION=1` as usual. See https://github.com/ROCm/TheRock/blob/main/external-builds/pytorch/build_prod_wheels.py#L578-L604 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162330 Approved by: https://github.com/jeffdaily Co-authored-by: Scott Todd --- CMakeLists.txt | 4 +- .../native/transformers/cuda/attention.cu | 66 ++++++++++ .../transformers/hip/flash_attn/flash_api.h | 39 +----- cmake/External/aotriton.cmake | 113 +++++++++++++++++- tools/linter/dictionary.txt | 1 + 5 files changed, 179 insertions(+), 44 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b0e87b108e7a..f56a5653e70f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -874,7 +874,7 @@ cmake_dependent_option( "Whether to build the flash_attention kernel for scaled dot product attention.\ Will be disabled if not supported by the platform" ON - "USE_CUDA OR USE_ROCM;NOT MSVC" + "(USE_CUDA AND NOT MSVC) OR USE_ROCM" OFF) cmake_dependent_option( @@ -909,7 +909,7 @@ cmake_dependent_option( # USE_FLASH_ATTENTION -> USE_ROCM -> Dependencies.cmake -> aotriton.cmake # if(USE_ROCM) - if(UNIX AND (USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION)) + if(USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION) include(cmake/External/aotriton.cmake) endif() endif() diff --git a/aten/src/ATen/native/transformers/cuda/attention.cu b/aten/src/ATen/native/transformers/cuda/attention.cu index b8b43e0086c1a..c2193f2378dd5 100644 --- a/aten/src/ATen/native/transformers/cuda/attention.cu +++ b/aten/src/ATen/native/transformers/cuda/attention.cu @@ -95,6 +95,72 @@ #endif #endif +#if defined(USE_ROCM) && (defined(USE_FLASH_ATTENTION) || defined(USE_MEM_EFF_ATTENTION)) +namespace pytorch_flash +{ +std::tuple< + at::Tensor, + at::Tensor, + at::Tensor, + at::Tensor, + at::Tensor, + at::Tensor, + at::Tensor, + at::Tensor> +mha_fwd( + const at::Tensor& q, // batch_size x seqlen_q x num_heads x head_size + const at::Tensor& k, // batch_size x seqlen_k x num_heads_k x head_size + const at::Tensor& v, // batch_size x seqlen_k x num_heads_k x head_size + std::optional& + out_, // batch_size x seqlen_q x num_heads x head_size + std::optional& + alibi_slopes_, // num_heads or batch_size x num_heads + const float p_dropout, + const float softmax_scale, + bool is_causal, + std::optional window_size_left, + std::optional window_size_right, + const float softcap, + const bool return_softmax, + std::optional gen_) { +#if defined(USE_ROCM_CK_SDPA) + if (at::globalContext().getROCmFAPreferredBackend() == + at::ROCmFABackend::Ck) { + const int non_null_window_left = window_size_left.value_or(-1); + const int non_null_window_right = window_size_right.value_or(-1); + std::optional dummy_attn_bias = std::nullopt; + return mha_fwd_ck( + q, + k, + v, + out_, + p_dropout, + softmax_scale, + is_causal, + non_null_window_left, + non_null_window_right, + return_softmax, + gen_, + dummy_attn_bias); // Not used in flash attention + } +#endif + return mha_fwd_aot( + q, + k, + v, + out_, + alibi_slopes_, + p_dropout, + softmax_scale, + is_causal, + window_size_left, + window_size_right, + return_softmax, + gen_); +} +} +#endif + namespace at { namespace cuda::philox { diff --git a/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h b/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h index f6f2240d4f091..71a1959065970 100644 --- a/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h +++ b/aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h @@ -270,7 +270,7 @@ std::tuple mha_varle #endif TORCH_API -inline std::tuple< +std::tuple< at::Tensor, at::Tensor, at::Tensor, @@ -294,42 +294,7 @@ mha_fwd( std::optional window_size_right, const float softcap, const bool return_softmax, - std::optional gen_) { -#if defined(USE_ROCM_CK_SDPA) - if (at::globalContext().getROCmFAPreferredBackend() == - at::ROCmFABackend::Ck) { - const int non_null_window_left = window_size_left.value_or(-1); - const int non_null_window_right = window_size_right.value_or(-1); - std::optional dummy_attn_bias = std::nullopt; - return mha_fwd_ck( - q, - k, - v, - out_, - p_dropout, - softmax_scale, - is_causal, - non_null_window_left, - non_null_window_right, - return_softmax, - gen_, - dummy_attn_bias); // Not used in flash attention - } -#endif - return mha_fwd_aot( - q, - k, - v, - out_, - alibi_slopes_, - p_dropout, - softmax_scale, - is_causal, - window_size_left, - window_size_right, - return_softmax, - gen_); -} + std::optional gen_); inline std::tuple< at::Tensor, diff --git a/cmake/External/aotriton.cmake b/cmake/External/aotriton.cmake index 5d91587746540..4f7a79a78bfc6 100644 --- a/cmake/External/aotriton.cmake +++ b/cmake/External/aotriton.cmake @@ -45,13 +45,88 @@ if(NOT __AOTRITON_INCLUDED) ) set(__AOTRITON_BASE_URL "https://github.com/ROCm/aotriton/releases/download/") # @lint-ignore set(__AOTRITON_Z "gz") + # Set the default __AOTRITON_LIB path + set(__AOTRITON_LIB "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so") + if(WIN32) + set(__AOTRITON_LIB "${__AOTRITON_INSTALL_DIR}/lib/aotriton_v2.lib") + endif() + + function(aotriton_build_windows_dependencies dlfcn-win32_external xz_external dlfcn-win32_DIR liblzma_DIR) + # Windows-specific dependencies - build these first + if(NOT noimage) + message(FATAL_ERROR "noimage must be ON for Windows builds") + endif() + # Build dlfcn-win32 + set(__DLFCN_WIN32_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/dlfcn-win32") + set(__DLFCN_WIN32_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/dlfcn-win32-install") + + ExternalProject_Add(${dlfcn-win32_external} + GIT_REPOSITORY https://github.com/dlfcn-win32/dlfcn-win32.git + GIT_TAG v1.4.2 + PREFIX ${__DLFCN_WIN32_PREFIX} + INSTALL_DIR ${__DLFCN_WIN32_INSTALL_DIR} + CMAKE_ARGS + -DCMAKE_INSTALL_PREFIX=${__DLFCN_WIN32_INSTALL_DIR} + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=cl + -DCMAKE_CXX_COMPILER=cl + -DBUILD_SHARED_LIBS=ON + -DBUILD_TESTS=OFF + BUILD_BYPRODUCTS + "${__DLFCN_WIN32_INSTALL_DIR}/lib/dl.lib" + "${__DLFCN_WIN32_INSTALL_DIR}/bin/dl.dll" + ) + ExternalProject_Add_Step(${dlfcn-win32_external} copy_to_aotriton + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${__DLFCN_WIN32_INSTALL_DIR}/bin/dl.dll" + "${__AOTRITON_INSTALL_DIR}/lib/" + DEPENDEES install + ) + set(${dlfcn-win32_DIR} "${__DLFCN_WIN32_INSTALL_DIR}/share/dlfcn-win32" CACHE PATH "Path to dlfcn-win32 CMake config" FORCE) + + # Build xz/liblzma + set(__XZ_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/xz") + set(__XZ_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/xz-install") + + ExternalProject_Add(${xz_external} + GIT_REPOSITORY https://github.com/tukaani-project/xz.git + GIT_TAG v5.8.1 + PREFIX ${__XZ_PREFIX} + INSTALL_DIR ${__XZ_INSTALL_DIR} + CMAKE_ARGS + -DCMAKE_INSTALL_PREFIX=${__XZ_INSTALL_DIR} + -DCMAKE_BUILD_TYPE=Release + -DBUILD_SHARED_LIBS=ON + -DENABLE_NLS=OFF + -DXZ_TOOL_LZMAINFO=OFF + -DXZ_TOOL_XZ=OFF + -DXZ_TOOL_XZDEC=OFF + -DXZ_TOOL_LZMADEC=OFF + BUILD_BYPRODUCTS + "${__XZ_INSTALL_DIR}/lib/lzma.lib" + "${__XZ_INSTALL_DIR}/bin/liblzma.dll" + ) + ExternalProject_Add_Step(${xz_external} copy_to_aotriton + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${__XZ_INSTALL_DIR}/bin/liblzma.dll" + "${__AOTRITON_INSTALL_DIR}/lib/" + DEPENDEES install + ) + set(${liblzma_DIR} "${__XZ_INSTALL_DIR}/lib/cmake/liblzma" CACHE PATH "Path to xz/liblzma CMake config" FORCE) + endfunction() + function(aotriton_build_from_source noimage project) if(noimage) SET(RECURSIVE "OFF") else() SET(RECURSIVE "ON") endif() + if(WIN32) + message(STATUS "Building AOTriton Windows dependencies") + aotriton_build_windows_dependencies(dlfcn-win32_external xz_external dlfcn-win32_DIR liblzma_DIR) + endif() message(STATUS "PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}") + ExternalProject_Add(${project} GIT_REPOSITORY https://github.com/ROCm/aotriton.git GIT_SUBMODULES_RECURSE ${RECURSIVE} @@ -65,12 +140,19 @@ if(NOT __AOTRITON_INCLUDED) -DAOTRITON_GPU_BUILD_TIMEOUT=0 -DAOTRITON_NO_PYTHON=ON -DAOTRITON_NOIMAGE_MODE=${noimage} - BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so" + -DHIP_PLATFORM=amd + $<$:-Ddlfcn-win32_DIR=${dlfcn-win32_DIR}> + $<$:-Dliblzma_DIR=${liblzma_DIR}> + BUILD_BYPRODUCTS + "${__AOTRITON_LIB}" USES_TERMINAL_DOWNLOAD TRUE USES_TERMINAL_CONFIGURE TRUE USES_TERMINAL_BUILD TRUE USES_TERMINAL_INSTALL TRUE ) + if(WIN32) + add_dependencies(${project} dlfcn-win32_external xz_external) + endif() endfunction() set(__AOTRITON_ARCH ${CMAKE_HOST_SYSTEM_PROCESSOR}) @@ -95,7 +177,7 @@ if(NOT __AOTRITON_INCLUDED) INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_BINARY_DIR}/aotriton_runtime" "${__AOTRITON_INSTALL_DIR}" - BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so" + BUILD_BYPRODUCTS "${__AOTRITON_LIB}" ) message(STATUS "Using AOTriton Runtime from pre-compiled binary ${__AOTRITON_URL}.\ Set env variables AOTRITON_INSTALL_FROM_SOURCE=1 to build from source.") @@ -111,14 +193,35 @@ if(NOT __AOTRITON_INCLUDED) string(CONCAT __AOTRITON_URL "${__AOTRITON_BASE_URL}" "${__AOTRITON_VER}/${__AOTRITON_FILE}") + + # Set up directories + set(__AOTRITON_DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_download-${image}) + set(__AOTRITON_EXTRACT_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_image-${image}) + set(__AOTRITON_INSTALL_SOURCE_DIR ${__AOTRITON_EXTRACT_DIR}) + set(__DOWNLOAD_NO_EXTRACT "") + set(__BUILD_COMMANDS "") + + # On Windows, we need custom tar extraction with UTF-8 support + if(WIN32) + set(__DOWNLOAD_NO_EXTRACT "DOWNLOAD_NO_EXTRACT;TRUE") + set(__BUILD_COMMANDS + COMMAND ${CMAKE_COMMAND} -E make_directory "${__AOTRITON_EXTRACT_DIR}" + COMMAND tar --options hdrcharset=UTF-8 -xf "${__AOTRITON_DOWNLOAD_DIR}/${__AOTRITON_FILE}" -C "${__AOTRITON_EXTRACT_DIR}" + ) + set(__AOTRITON_INSTALL_SOURCE_DIR ${__AOTRITON_EXTRACT_DIR}/aotriton) + endif() + ExternalProject_Add(${project} URL "${__AOTRITON_URL}" URL_HASH SHA256=${__AOTRITON_SHA256} - SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/aotriton_image-${image} + DOWNLOAD_DIR ${__AOTRITON_DOWNLOAD_DIR} + ${__DOWNLOAD_NO_EXTRACT} + SOURCE_DIR ${__AOTRITON_EXTRACT_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" + ${__BUILD_COMMANDS} INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory - "${CMAKE_CURRENT_BINARY_DIR}/aotriton_image-${image}" + "${__AOTRITON_INSTALL_SOURCE_DIR}" "${__AOTRITON_INSTALL_DIR}" BUILD_BYPRODUCTS "${__AOTRITON_INSTALL_DIR}/lib/aotriton.images/${image}/__signature__" @@ -164,7 +267,7 @@ if(NOT __AOTRITON_INCLUDED) endforeach() endforeach() endif() - target_link_libraries(__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/lib/libaotriton_v2.so) + target_link_libraries(__caffe2_aotriton INTERFACE ${__AOTRITON_LIB}) target_include_directories(__caffe2_aotriton INTERFACE ${__AOTRITON_INSTALL_DIR}/include) set(AOTRITON_FOUND TRUE) endif() # __AOTRITON_INCLUDED diff --git a/tools/linter/dictionary.txt b/tools/linter/dictionary.txt index 706881a8f10f6..c4a250db04836 100644 --- a/tools/linter/dictionary.txt +++ b/tools/linter/dictionary.txt @@ -12,6 +12,7 @@ BU contiguities contiguity coo +DEPENDEES deser din dout From 9cd54d34438a64a16fc21aee989d4a6a3370915a Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 15 Sep 2025 16:14:40 +0000 Subject: [PATCH 250/693] Clean up 'torch.onnx' entries from public API allowlist (#162850) Clean up entries related to 'torch.onnx' from the allowlist as the apis in onnx are properly configured. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162850 Approved by: https://github.com/albanD --- test/allowlist_for_publicAPI.json | 51 ------------------------------- 1 file changed, 51 deletions(-) diff --git a/test/allowlist_for_publicAPI.json b/test/allowlist_for_publicAPI.json index 21335a3617b43..d01d41d37997e 100644 --- a/test/allowlist_for_publicAPI.json +++ b/test/allowlist_for_publicAPI.json @@ -658,13 +658,6 @@ "Iterable", "Optional" ], - "torch.onnx": [ - "Dict", - "OperatorExportTypes", - "Optional", - "TensorProtoDataType", - "TrainingMode" - ], "torch.overrides": [ "BaseTorchFunctionMode", "TorchFunctionMode", @@ -2207,21 +2200,6 @@ "Tuple", "abstractmethod" ], - "torch.onnx.verification": [ - "Any", - "Callable", - "Collection", - "Dict", - "FrozenSet", - "List", - "Mapping", - "Number", - "Optional", - "Sequence", - "Set", - "Tuple", - "Union" - ], "torch.quantization.fx": [ "convert", "fuse", @@ -2685,35 +2663,6 @@ "DeferredMtiaCallError", "StreamContext" ], - "torch.onnx.symbolic_helper": [ - "Any", - "Callable", - "List", - "Literal", - "NoReturn", - "Number", - "Optional", - "Sequence", - "Set", - "Tuple", - "Union" - ], - "torch.onnx.symbolic_opset18": [ - "amax", - "amin", - "aminmax", - "embedding_bag", - "linalg_vector_norm", - "max", - "maximum", - "min", - "minimum" - ], - "torch.onnx.symbolic_opset20": [ - "_affine_grid_generator", - "_grid_sampler", - "convert_grid_sample_mode" - ], "torch.utils.data.datapipes.dataframe.dataframe_wrapper": [ "Any", "Optional" From 19a4ef025673cba3d682b97661942992bc6ed261 Mon Sep 17 00:00:00 2001 From: fduwjj Date: Fri, 12 Sep 2025 13:49:30 -0700 Subject: [PATCH 251/693] [DeviceMesh] Make CuTe layout as mesh layout to be ready for using in DeviceMesh (#162414) We create a wrapper class named "_MeshLayout" acting as a layout for device mesh so that we can add new methods more specific to DeviceMesh and keep the core logic of CuTe manipulation inside pycute module. This PR create the main body of the code and then next PR will come with actual implementation and unit test for device mesh layout. (Actual implementation can be found in https://github.com/pytorch/pytorch/pull/161016) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162414 Approved by: https://github.com/ezyang, https://github.com/fegin ghstack dependencies: #162413, #162534 --- torch/distributed/_mesh_layout.py | 71 +++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 torch/distributed/_mesh_layout.py diff --git a/torch/distributed/_mesh_layout.py b/torch/distributed/_mesh_layout.py new file mode 100644 index 0000000000000..86969fccc55d5 --- /dev/null +++ b/torch/distributed/_mesh_layout.py @@ -0,0 +1,71 @@ +""" +Definition of CuTe inspired Layouts for DeviceMesh internal bookkeeping and functions to manipulate them +""" + +import math +from collections.abc import Iterator +from dataclasses import dataclass + +from torch.distributed._pycute import ( + coalesce, + complement, + composition, + flatten, + IntTuple, + is_int, + is_tuple, + Layout, +) + + +@dataclass(frozen=True, init=True) +class _MeshLayout(Layout): + shape: IntTuple + stride: IntTuple + + def __post_init__(self) -> None: + if not is_tuple(self.shape) and not is_int(self.shape): + raise TypeError(f"shape must be a tuple or int, got {type(self.shape)}") + if not is_tuple(self.stride) and not is_int(self.stride): + raise TypeError(f"stride must be a tuple or int, got {type(self.stride)}") + if ( + is_tuple(self.shape) + and is_tuple(self.stride) + and len(flatten(self.shape)) != len(flatten(self.stride)) + ): + raise ValueError( + f"sizes {len(flatten(self.shape))} and " + f"strides {len(flatten(self.stride))} must have the same length" + ) + + @property + def sizes(self) -> IntTuple: + return self.shape + + @property + def strides(self) -> IntTuple: + return self.stride + + @property + def sizes_and_strides(self) -> Iterator[tuple[int, int]]: + return zip(flatten(self.shape), flatten(self.stride)) + + def numel(self) -> int: + return math.prod(flatten(self.shape)) + + # # operator [] (get-i like tuples) + def __getitem__(self, i: int) -> "_MeshLayout": + layout = super().__getitem__(i) + return _MeshLayout(layout.shape, layout.stride) + + def coalesce(self) -> "_MeshLayout": + layout = coalesce(self) + return _MeshLayout(layout.shape, layout.stride) + + def composition(self, layout: "_MeshLayout") -> "_MeshLayout": + result = composition(self, layout) + return _MeshLayout(result.shape, result.stride) + + def complement(self, world_size: int) -> "_MeshLayout": + layout = complement(self, world_size) + return _MeshLayout(layout.shape, layout.stride) From 6b231af23d63ee543a81c32952138090bebcf61d Mon Sep 17 00:00:00 2001 From: Catherine Lee Date: Mon, 15 Sep 2025 17:29:31 +0000 Subject: [PATCH 252/693] [lint][CI] Don't checkout submodules for lintrunner-noclang (#162844) Shouldn't be needed? Pull Request resolved: https://github.com/pytorch/pytorch/pull/162844 Approved by: https://github.com/huydhn --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b1a6dfb390711..b8b994ab5c60e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -105,7 +105,7 @@ jobs: # NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout # to run git rev-parse HEAD~:.ci/docker when a new image is needed fetch-depth: 0 - submodules: true + submodules: false ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | CHANGED_FILES="${{ needs.get-changed-files.outputs.changed-files }}" From b334a5a37991b19e61b16fe284a07a3f5f1db8e6 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Mon, 15 Sep 2025 18:04:39 +0000 Subject: [PATCH 253/693] [ROCm][benchmark] Add HF LLM benchmark expected accuracy (#162965) PR #156967 added HF LLM benchmarks but did not add the ci expected accuracy files for ROCm. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162965 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily --- .../rocm/aot_eager_huggingface_inference.csv | 20 +++++++++++++++++++ .../rocm/aot_eager_huggingface_training.csv | 20 +++++++++++++++++++ .../aot_inductor_huggingface_inference.csv | 20 +++++++++++++++++++ ...ynamic_aot_eager_huggingface_inference.csv | 20 +++++++++++++++++++ ...dynamic_aot_eager_huggingface_training.csv | 20 +++++++++++++++++++ ...dynamic_inductor_huggingface_inference.csv | 20 +++++++++++++++++++ .../dynamic_inductor_huggingface_training.csv | 20 +++++++++++++++++++ .../dynamo_eager_huggingface_inference.csv | 20 +++++++++++++++++++ .../rocm/inductor_huggingface_inference.csv | 20 +++++++++++++++++++ .../rocm/inductor_huggingface_training.csv | 20 +++++++++++++++++++ 10 files changed, 200 insertions(+) diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_huggingface_inference.csv index 0f088e7892d8f..b759310805957 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,5 + + + +google/gemma-2-2b,pass,5 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass,6 + + + +Qwen/Qwen3-0.6B,pass,5 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_huggingface_training.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_huggingface_training.csv index 08061de428d71..7ec7c3c8482c9 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_huggingface_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_huggingface_training.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,5 YituTechConvBert,pass,5 + + + +meta-llama/Llama-3.2-1B,eager_fail_to_run,0 + + + +google/gemma-2-2b,eager_fail_to_run,0 + + + +google/gemma-3-4b-it,eager_fail_to_run,0 + + + +openai/whisper-tiny,eager_fail_to_run,0 + + + +Qwen/Qwen3-0.6B,eager_fail_to_run,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_inductor_huggingface_inference.csv index ce334e22c698b..a49a27a8223d6 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/aot_inductor_huggingface_inference.csv @@ -167,3 +167,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,fail_accuracy,0 + + + +google/gemma-2-2b,fail_accuracy,0 + + + +google/gemma-3-4b-it,fail_accuracy,0 + + + +openai/whisper-tiny,fail_to_run,0 + + + +Qwen/Qwen3-0.6B,fail_accuracy,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_huggingface_inference.csv index 0f088e7892d8f..b759310805957 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,5 + + + +google/gemma-2-2b,pass,5 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass,6 + + + +Qwen/Qwen3-0.6B,pass,5 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_huggingface_training.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_huggingface_training.csv index 08061de428d71..7ec7c3c8482c9 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_huggingface_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_huggingface_training.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,5 YituTechConvBert,pass,5 + + + +meta-llama/Llama-3.2-1B,eager_fail_to_run,0 + + + +google/gemma-2-2b,eager_fail_to_run,0 + + + +google/gemma-3-4b-it,eager_fail_to_run,0 + + + +openai/whisper-tiny,eager_fail_to_run,0 + + + +Qwen/Qwen3-0.6B,eager_fail_to_run,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_huggingface_inference.csv index 0f088e7892d8f..b5e1a0989e74a 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,5 + + + +google/gemma-2-2b,pass,5 + + + +google/gemma-3-4b-it,pass,0 + + + +openai/whisper-tiny,pass,6 + + + +Qwen/Qwen3-0.6B,pass,5 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_huggingface_training.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_huggingface_training.csv index f65909f3a24ea..37e1b792b3dc3 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_huggingface_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_inductor_huggingface_training.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,5 YituTechConvBert,pass,5 + + + +meta-llama/Llama-3.2-1B,eager_fail_to_run,0 + + + +google/gemma-2-2b,eager_fail_to_run,0 + + + +google/gemma-3-4b-it,eager_fail_to_run,0 + + + +openai/whisper-tiny,eager_fail_to_run,0 + + + +Qwen/Qwen3-0.6B,eager_fail_to_run,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_huggingface_inference.csv index 0f088e7892d8f..b759310805957 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,5 + + + +google/gemma-2-2b,pass,5 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass,6 + + + +Qwen/Qwen3-0.6B,pass,5 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_huggingface_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_huggingface_inference.csv index 0f088e7892d8f..b759310805957 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_huggingface_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_huggingface_inference.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,0 YituTechConvBert,pass,0 + + + +meta-llama/Llama-3.2-1B,pass,5 + + + +google/gemma-2-2b,pass,5 + + + +google/gemma-3-4b-it,pass_due_to_skip,0 + + + +openai/whisper-tiny,pass,6 + + + +Qwen/Qwen3-0.6B,pass,5 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_huggingface_training.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_huggingface_training.csv index f65909f3a24ea..37e1b792b3dc3 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_huggingface_training.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/inductor_huggingface_training.csv @@ -171,3 +171,23 @@ XLNetLMHeadModel,pass,5 YituTechConvBert,pass,5 + + + +meta-llama/Llama-3.2-1B,eager_fail_to_run,0 + + + +google/gemma-2-2b,eager_fail_to_run,0 + + + +google/gemma-3-4b-it,eager_fail_to_run,0 + + + +openai/whisper-tiny,eager_fail_to_run,0 + + + +Qwen/Qwen3-0.6B,eager_fail_to_run,0 From 8e05749d5cde0b57b6c3fa805a8d201f0c619079 Mon Sep 17 00:00:00 2001 From: PenXLa <921417536@qq.com> Date: Mon, 15 Sep 2025 18:07:16 +0000 Subject: [PATCH 254/693] Fix integer overflow bug in triu/tril for large diagonal values (#153240) This PR fixes a bug in the implementation of `apply_triu_tril_single` where using extremely large values for the diagonal argument (e.g. `diagonal=9223372036854775807`) could result in integer overflow and incorrect results. The masking logic is re-written to avoid this issue by always iterating over all columns, ensuring correctness even for large or extreme diagonal values. Example of the original incorrect behavior: ```python a = torch.ones(5,5) torch.triu(a, 9223372036854775807) # Before: # tensor([[0., 0., 0., 0., 0.], # [1., 1., 1., 1., 1.], # [1., 1., 1., 1., 1.], # [1., 1., 1., 1., 1.], # [1., 1., 1., 1., 1.]]) ``` The new implementation guards against overflow and produces correct results for all valid input values. Pull Request resolved: https://github.com/pytorch/pytorch/pull/153240 Approved by: https://github.com/albanD --- aten/src/ATen/native/TriangularOps.cpp | 1 + test/test_linalg.py | 33 ++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/aten/src/ATen/native/TriangularOps.cpp b/aten/src/ATen/native/TriangularOps.cpp index 47264c45205c0..08b666e296ed7 100644 --- a/aten/src/ATen/native/TriangularOps.cpp +++ b/aten/src/ATen/native/TriangularOps.cpp @@ -52,6 +52,7 @@ void apply_triu_tril_single( int64_t self_col_stride, bool upper) { constexpr int64_t zero = 0; + k = std::clamp(k, -n, m); // Clamp k to [-n, m] to prevent i + k arithmetic overflow, especially if k approaches INT64_MAX/INT64_MIN. if (upper) { parallel_for(0, n, 0, [&](int64_t start, int64_t end) { diff --git a/test/test_linalg.py b/test/test_linalg.py index 0f6c8f207421b..ffae8ac18da22 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -9840,6 +9840,39 @@ def test_matmul_mv(self, device, dtype): C = torch.matmul(A, B) self.assertEqual(C, B.sum().expand(B.shape)) + @dtypes(*all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16)) + def test_triu_tril_extreme_k_values(self, device, dtype): + """ + Test triu/tril with extreme k values to verify overflow fix. + Regression test for https://github.com/pytorch/pytorch/pull/153240 + """ + # Create test matrices + a = make_tensor((5, 5), dtype=dtype, device=device) + + # Test extreme positive k value + k_max = 9223372036854775807 + result_triu_max = torch.triu(a, k_max) + result_tril_max = torch.tril(a, k_max) + + # With k = INT64_MAX, triu should return all zeros (since i + k will exceed matrix bounds for all i,j) + # and tril should return the full matrix (since i + k + 1 will exceed matrix bounds for all i,j) + expected_triu_max = torch.zeros_like(a) + expected_tril_max = a.clone() + self.assertEqual(result_triu_max, expected_triu_max) + self.assertEqual(result_tril_max, expected_tril_max) + + # Test extreme negative k value + k_min = -9223372036854775808 + result_triu_min = torch.triu(a, k_min) + result_tril_min = torch.tril(a, k_min) + + # With k = INT64_MIN, triu should return the full matrix (since i + k will be negative for all i,j) + # and tril should return all zeros (since i + k + 1 will be negative for all i,j) + expected_triu_min = a.clone() + expected_tril_min = torch.zeros_like(a) + self.assertEqual(result_triu_min, expected_triu_min) + self.assertEqual(result_tril_min, expected_tril_min) + @dtypes(torch.float, torch.double) @precisionOverride({torch.float32: 1e-4}) def test_1_sized_with_0_strided(self, device, dtype): From fa919feab6a55ea9104e4ce61d38c3725f5728e6 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Mon, 15 Sep 2025 18:43:53 +0000 Subject: [PATCH 255/693] Revert "[lint][CI] Don't checkout submodules for lintrunner-noclang (#162844)" This reverts commit 6b231af23d63ee543a81c32952138090bebcf61d. Reverted https://github.com/pytorch/pytorch/pull/162844 on behalf of https://github.com/wdvr due to seems to be needed after all - failing lint ([comment](https://github.com/pytorch/pytorch/pull/162844#issuecomment-3293465058)) --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b8b994ab5c60e..b1a6dfb390711 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -105,7 +105,7 @@ jobs: # NB: A shallow checkout won't work here because calculate-docker-image requires a full checkout # to run git rev-parse HEAD~:.ci/docker when a new image is needed fetch-depth: 0 - submodules: false + submodules: true ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | CHANGED_FILES="${{ needs.get-changed-files.outputs.changed-files }}" From de3a863cd869599fb6aebb63a13c5100a2803a58 Mon Sep 17 00:00:00 2001 From: Arijit Mukhopadhyay Date: Mon, 15 Sep 2025 19:29:35 +0000 Subject: [PATCH 256/693] AMD CPU CI - Add freezing + fix label trigger (#162176) Added the following changes: 1. Added freezing by default for AMD CPU based CI (to follow pattern introduced by https://github.com/pytorch/pytorch/pull/152298 ) 2. Fixed issue with label based CI triggers Addresses code review comment in https://github.com/pytorch/pytorch/pull/161155 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162176 Approved by: https://github.com/malfet, https://github.com/jeffdaily --- .../workflows/inductor-perf-test-nightly-x86-zen.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml index a9a839df61af2..a7110b0fd9328 100644 --- a/.github/workflows/inductor-perf-test-nightly-x86-zen.yml +++ b/.github/workflows/inductor-perf-test-nightly-x86-zen.yml @@ -43,6 +43,11 @@ on: required: false type: boolean default: false + freezing: + description: Run freezing? + required: false + type: boolean + default: true benchmark_configs: description: The list of configs used the benchmark required: false @@ -102,7 +107,7 @@ jobs: if: github.event.schedule == '0 7 * * *' with: build-environment: linux-jammy-py3.10-gcc11-build - dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true + dashboard-tag: training-false-inference-true-default-true-dynamic-true-cppwrapper-true-aotinductor-true-freezing-true docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} timeout-minutes: 720 @@ -116,10 +121,9 @@ jobs: name: inductor-test uses: ./.github/workflows/_linux-test.yml needs: inductor-build - if: github.event_name == 'workflow_dispatch' with: build-environment: linux-jammy-py3.10-gcc11-build - dashboard-tag: training-${{ inputs.training }}-inference-${{ inputs.inference }}-default-${{ inputs.default }}-dynamic-${{ inputs.dynamic }}-cppwrapper-${{ inputs.cppwrapper }}-aotinductor-${{ inputs.aotinductor }} + dashboard-tag: training-${{ inputs.training || 'false' }}-inference-${{ inputs.inference || 'true' }}-default-${{ inputs.default || 'true' }}-dynamic-${{ inputs.dynamic || 'true' }}-cppwrapper-${{ inputs.cppwrapper || 'true' }}-aotinductor-${{ inputs.aotinductor || 'true' }}-freezing-${{ inputs.freezing || 'true' }} docker-image: ${{ needs.inductor-build.outputs.docker-image }} test-matrix: ${{ needs.inductor-build.outputs.test-matrix }} timeout-minutes: 720 From 1247dde1f254bca6f3c73a45500baec5c1c0c08a Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Mon, 15 Sep 2025 10:40:28 -0400 Subject: [PATCH 257/693] [BE] Improve pytest summary display for OpInfo tests (#162961) pytest summarizes test failures by printing a truncated first line of the test of the OUTERMOST wrapped exception. Prior to this PR, it looked like this: ``` FAILED [0.0454s] test/distributed/tensor/test_dtensor_ops.py::TestLocalDTensorOpsCPU::test_dtensor_op_db_H_cpu_float32 - Exception: Caused by sample input at index 0: SampleInput(input=Tensor[size=(12, 12), device="cpu", dtype=torch.float32], args=(), kwargs={}, ... ``` I argue this is not so useful. If I have a lot of test failures, I look to the test summary to understand what /kind/ of errors I have, so I can assess which ones I should look at first. In other words, this is better: ``` FAILED [0.1387s] test/distributed/tensor/test_dtensor_ops.py::TestLocalDTensorOpsCPU::test_dtensor_op_db__softmax_backward_data_cpu_float32 - Exception: Tensor-likes are not close! ``` Now I know specifically this is a numerics problem! This PR does it by prepending the old exception text to the wrapped exception. This is slightly redundant, as we are exception chaining, but it does the job. Open to bikeshedding. Signed-off-by: Edward Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/162961 Approved by: https://github.com/malfet --- test/distributed/tensor/test_dtensor_ops.py | 2 +- torch/testing/_internal/common_device_type.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/distributed/tensor/test_dtensor_ops.py b/test/distributed/tensor/test_dtensor_ops.py index 6f981aee82ef0..864245c80c023 100644 --- a/test/distributed/tensor/test_dtensor_ops.py +++ b/test/distributed/tensor/test_dtensor_ops.py @@ -636,7 +636,7 @@ def to_replicate(e: object) -> object: ) except Exception as e: raise RuntimeError( - f"failed to run: {resolve_name(func)}, with (*{dtensor_args}, **{dtensor_kwargs})" + f"{str(e)}\n\nfailed to run: {resolve_name(func)}, with (*{dtensor_args}, **{dtensor_kwargs})" ) from e return rs diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index 8971eca1bb24e..43c7741c69aab 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -1139,7 +1139,7 @@ def test_wrapper(*args, **kwargs): tracked_input = get_tracked_input() if PRINT_REPRO_ON_FAILURE and tracked_input is not None: e_tracked = Exception( # noqa: TRY002 - f"Caused by {tracked_input.type_desc} " + f"{str(e)}\n\nCaused by {tracked_input.type_desc} " f"at index {tracked_input.index}: " f"{_serialize_sample(tracked_input.val)}" ) From 01c3c891c1962ec1eaa7378fb4f56594a8953d55 Mon Sep 17 00:00:00 2001 From: Jagadish Krishnamoorthy Date: Mon, 15 Sep 2025 20:23:43 +0000 Subject: [PATCH 258/693] [ROCm] Enable test_fixed_striding (#162787) Enable the distributed test test_fixed_striding on gfx arch which supports fp8. Test command: python test/distributed/test_c10d_functional_native.py -k test_fixed_striding Pull Request resolved: https://github.com/pytorch/pytorch/pull/162787 Approved by: https://github.com/pruthvistony, https://github.com/jeffdaily --- test/distributed/test_c10d_functional_native.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/test/distributed/test_c10d_functional_native.py b/test/distributed/test_c10d_functional_native.py index bafc781b591c6..42d4514eace2f 100644 --- a/test/distributed/test_c10d_functional_native.py +++ b/test/distributed/test_c10d_functional_native.py @@ -21,7 +21,8 @@ reduce_scatter_tensor, reduce_scatter_tensor_coalesced, ) -from torch.testing._internal.common_cuda import SM90OrLater +from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_FP8 +from torch.testing._internal.common_device_type import e4m3_type from torch.testing._internal.common_distributed import ( MultiProcessTestCase, requires_nccl, @@ -29,7 +30,6 @@ ) from torch.testing._internal.common_utils import ( # type: ignore[attr-defined] run_tests, - skipIfRocm, TestCase, ) from torch.testing._internal.distributed.fake_pg import FakeStore @@ -501,10 +501,9 @@ def join(self): t.start() t.join() - @skipIfRocm @unittest.skipIf( - not SM90OrLater, - "_scaled_mm currently only supports sm>=90", + not PLATFORM_SUPPORTS_FP8, + "_scaled_mm currently only supports sm>=90 on cuda and gfx94/95 on ROCm", ) @skip_if_lt_x_gpu(2) @fresh_cache() @@ -513,10 +512,9 @@ def test_fixed_striding(self): def scale(t): scale = ( - torch.finfo(torch.float8_e4m3fn).max - / t.abs().amax(dim=-1, keepdim=True).float() + torch.finfo(e4m3_type).max / t.abs().amax(dim=-1, keepdim=True).float() ) - t = t.mul(scale).to(torch.float8_e4m3fn) + t = t.mul(scale).to(e4m3_type) return t, scale def fp8_rowwise_backward(in_, w, out_grad): From dae5beae8e09f823af917f1f94e3e2ebf519a193 Mon Sep 17 00:00:00 2001 From: Shivam Raikundalia Date: Mon, 15 Sep 2025 21:01:47 +0000 Subject: [PATCH 259/693] [RecordFunction] Add Scope for Record Function Fast (#162661) Differential Revision: D82164587 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162661 Approved by: https://github.com/davidberard98 --- test/profiler/test_profiler.py | 40 ++++++++++++++++------------- torch/csrc/profiler/python/init.cpp | 14 ++++++++-- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/test/profiler/test_profiler.py b/test/profiler/test_profiler.py index 46b21cb4dc097..28d337300a12f 100644 --- a/test/profiler/test_profiler.py +++ b/test/profiler/test_profiler.py @@ -1764,25 +1764,27 @@ def test_profiler_op_event_kwargs(self): with open(fname) as f: j = json.load(f) op_events = [ - e for e in j["traceEvents"] if e.get("cat", "") == "cpu_op" + e + for e in j["traceEvents"] + if e.get("name", "") == "add_test_kwinputs" ] + self.assertTrue(len(op_events) > 0) for e in op_events: - if e["name"] == "add_test_kwinputs": - # print(e["args"]) - args = e["args"] - self.assertTrue("stream" in args) - self.assertTrue("grid" in args) - self.assertTrue("boolean" in args) - self.assertTrue(args["stream"] == 0) - self.assertTrue(args["grid"] == "lambda x : x + 1") - self.assertTrue(args["debug"] == "None") - self.assertTrue(args["boolean"]) + args = e["args"] + self.assertTrue("stream" in args) + self.assertTrue("grid" in args) + self.assertTrue("boolean" in args) + self.assertTrue(args["stream"] == 0) + self.assertTrue(args["grid"] == "lambda x : x + 1") + self.assertTrue(args["debug"] == "None") + self.assertTrue(args["boolean"]) + self.assertTrue(e["cat"] == "cpu_op") with profile(record_shapes=True) as p1: cm = torch._C._profiler._RecordFunctionFast( "add_test_kwinputs", [x, y], - {"stream": "test", "grid": [1, 2]}, + {"stream": "test", "grid": [1, 2], "scope": "user_scope"}, ) for _ in range(4): with cm: @@ -1792,14 +1794,16 @@ def test_profiler_op_event_kwargs(self): with open(fname1) as f1: j = json.load(f1) op_events = [ - e for e in j["traceEvents"] if e.get("cat", "") == "cpu_op" + e + for e in j["traceEvents"] + if e.get("name", "") == "add_test_kwinputs" ] + self.assertTrue(len(op_events) > 0) for e in op_events: - if e["name"] == "add_test_kwinputs": - # print(e["args"]) - args = e["args"] - self.assertTrue("stream" not in args) - self.assertTrue("grid" not in args) + args = e["args"] + self.assertTrue("stream" not in args) + self.assertTrue("grid" not in args) + self.assertTrue(e["cat"] == "user_annotation") def test_is_profiler_enabled(self): self.assertFalse(torch.autograd.profiler._is_profiler_enabled) diff --git a/torch/csrc/profiler/python/init.cpp b/torch/csrc/profiler/python/init.cpp index aa7abe9433fe1..7c84172704264 100644 --- a/torch/csrc/profiler/python/init.cpp +++ b/torch/csrc/profiler/python/init.cpp @@ -220,8 +220,7 @@ PyObject* RecordFunctionFast_enter(PyObject* selfGeneric, PyObject* unused) { TORCH_INTERNAL_ASSERT( !self->guard, "Trying to enter a new record_function_fast context but the guard is unexpectedly already set"); - self->guard = - std::make_unique(at::RecordScope::FUNCTION); + auto scope = at::RecordScope::FUNCTION; std::vector args; std::unordered_map kwargs; bool profiler_need_input = torch::autograd::profiler::profilerEnabled() && @@ -262,6 +261,17 @@ PyObject* RecordFunctionFast_enter(PyObject* selfGeneric, PyObject* unused) { kwargs[key_str] = ivalue; } } + auto it = kwargs.find("scope"); + if (it != kwargs.end()) { + auto value = it->second; + if (value.isString()) { + auto value_str = value.toStringRef(); + if (value_str == "user_scope") { + scope = at::RecordScope::USER_SCOPE; + } + } + } + self->guard = std::make_unique(scope); self->guard->before(THPUtils_unpackString(self->name), &args, &kwargs); } Py_RETURN_NONE; From 8590c3a66b4adb77b59645dd12bf44695ac3b994 Mon Sep 17 00:00:00 2001 From: Davide Italiano Date: Mon, 15 Sep 2025 21:14:06 +0000 Subject: [PATCH 260/693] [DTensor] Add _foreach_pow to sharding propagation list. (#162895) Fixes #152696 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162895 Approved by: https://github.com/ezyang --- torch/distributed/tensor/_ops/_pointwise_ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch/distributed/tensor/_ops/_pointwise_ops.py b/torch/distributed/tensor/_ops/_pointwise_ops.py index 42964ff748972..084fa62706e0d 100644 --- a/torch/distributed/tensor/_ops/_pointwise_ops.py +++ b/torch/distributed/tensor/_ops/_pointwise_ops.py @@ -660,6 +660,8 @@ def common_pointwise_strategy( aten._foreach_mul_.ScalarList, aten._foreach_mul_.Tensor, aten._foreach_mul_.List, + aten._foreach_pow.List, + aten._foreach_pow.ScalarList, aten._foreach_neg.default, aten._foreach_neg_.default, aten._foreach_reciprocal_.default, From 0def79fdd9c0998c9309b058748915350d2b7366 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Mon, 15 Sep 2025 22:49:29 +0000 Subject: [PATCH 261/693] [ROCm] fix conv relu fusion (#162856) Fixes #162816. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162856 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily --- aten/src/ATen/native/miopen/Conv_miopen.cpp | 266 ++++++-------------- test/nn/test_convolution.py | 2 + 2 files changed, 80 insertions(+), 188 deletions(-) diff --git a/aten/src/ATen/native/miopen/Conv_miopen.cpp b/aten/src/ATen/native/miopen/Conv_miopen.cpp index 41226680c4b58..328daffa40861 100644 --- a/aten/src/ATen/native/miopen/Conv_miopen.cpp +++ b/aten/src/ATen/native/miopen/Conv_miopen.cpp @@ -1770,10 +1770,12 @@ std::tuple miopen_depthwise_convolution_back // fusions // --------------------------------------------------------------------- -void raw_miopen_convolution_relu_out( +void raw_miopen_convolution_add_relu_out( const Tensor& output, const Tensor& input, const Tensor& weight, + const Tensor& z, + float alpha, const Tensor& bias, IntArrayRef stride, IntArrayRef padding, @@ -1781,68 +1783,20 @@ void raw_miopen_convolution_relu_out( int64_t groups, bool benchmark, bool deterministic) { - auto dataType = getMiopenDataType(input); - miopenConvolutionMode_t c_mode = miopenConvolution; - ConvolutionArgs args{ input, output, weight }; - args.handle = getMiopenHandle(); - at::MemoryFormat memory_format = miopen_conv_suggest_memory_format(input, weight); - setConvolutionParams( - &args.params, - args.handle, + raw_miopen_convolution_forward_out( + output, input, weight, padding, stride, dilation, groups, - deterministic, - memory_format); - args.idesc.set(input, memory_format); - args.wdesc.set(weight, memory_format, 0); - args.odesc.set(output, memory_format); - args.cdesc.set( - dataType, - c_mode, - input.dim() - 2, - args.params.padding, - args.params.stride, - args.params.dilation, - args.params.groups, benchmark, deterministic); - - TensorDescriptor bdesc; - bdesc.set(bias.expand({1, bias.size(0)}), output.dim()); - - // Create the fusion plan - miopenFusionPlanDescriptor_t fusePlanDesc; - miopenFusionOpDescriptor_t convoOp; - miopenFusionOpDescriptor_t biasOp; - miopenFusionOpDescriptor_t activOp; - MIOPEN_CHECK(miopenCreateFusionPlan(&fusePlanDesc, miopenVerticalFusion, args.idesc.desc())); - MIOPEN_CHECK(miopenCreateOpConvForward(fusePlanDesc, &convoOp, args.cdesc.desc(), args.wdesc.desc())); - MIOPEN_CHECK(miopenCreateOpBiasForward(fusePlanDesc, &biasOp, bdesc.desc())); - MIOPEN_CHECK(miopenCreateOpActivationForward(fusePlanDesc, &activOp, miopenActivationRELU)); - - // compile fusion plan - MIOPEN_CHECK(miopenCompileFusionPlan(args.handle, fusePlanDesc)); - - // Set the Args - float alpha = static_cast(1); - float beta = static_cast(0); - float activ_alpha = static_cast(0); - float activ_beta = static_cast(0); - float activ_gamma = static_cast(0); - miopenOperatorArgs_t fusionArgs; - MIOPEN_CHECK(miopenCreateOperatorArgs(&fusionArgs)); - MIOPEN_CHECK(miopenSetOpArgsConvForward(fusionArgs, convoOp, &alpha, &beta, weight.const_data_ptr())); - MIOPEN_CHECK(miopenSetOpArgsBiasForward(fusionArgs, biasOp, &alpha, &beta, bias.const_data_ptr())); - MIOPEN_CHECK(miopenSetOpArgsActivForward(fusionArgs, activOp, &alpha, &beta, activ_alpha, activ_beta, activ_gamma)); - - miopenExecuteFusionPlan(args.handle, fusePlanDesc, args.idesc.desc(), input.const_data_ptr(), args.odesc.desc(), output.data_ptr(), fusionArgs); - - // Cleanup - miopenDestroyFusionPlan(fusePlanDesc); + at::Tensor alpha_mul_z_add_bias = + at::native::reshape_bias(input.dim(), bias).add(z, alpha); + output.add_(alpha_mul_z_add_bias); + output.relu_(); } static at::Tensor self_or_new_memory_format(at::Tensor& self, at::MemoryFormat memory_format) { @@ -1855,171 +1809,107 @@ static at::Tensor self_or_new_memory_format(at::Tensor& self, at::MemoryFormat m Tensor miopen_convolution_add_relu( const Tensor& input_t, const Tensor& weight_t, - const Tensor& z, + const Tensor& z_t, const std::optional& alpha, - const std::optional& bias, + const std::optional& bias_t, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, int64_t groups) { - - // MIOpen does not support fusion of add, the alpha2 * z step of the below cuDNN function: - // y = act ( alpha1 * conv(x) + alpha2 * z + bias ) - auto memory_format = miopen_conv_suggest_memory_format(input_t, weight_t); + const Tensor input = input_t.contiguous(memory_format); + const Tensor weight = weight_t.contiguous(memory_format); + Tensor z = z_t; + if (z.suggest_memory_format() != memory_format) { + z = z.to(memory_format); + } + z = z.contiguous(memory_format); - auto& ctx = at::globalContext(); - bool benchmark = ctx.benchmarkCuDNN(); - - TensorArg input { input_t, "input", 1 }, - weight { weight_t, "weight", 2 }; - + // FuseFrozenConvAddRelu performs some tensor shape checking Tensor output_t = at::detail::empty_cuda( conv_output_size( - input_t.sizes(), weight_t.sizes(), padding, stride, dilation), - input_t.options().memory_format(memory_format)); - if (output_t.numel() == 0){ + input.sizes(), weight.sizes(), padding, stride, dilation), + input.options().memory_format(memory_format)); + if (output_t.numel() == 0) { return output_t; } - // Avoid ambiguity of "output" when this is being used as backwards - TensorArg output{output_t, "result", 0}; - miopen_convolution_forward_out( - output, - "miopen_convolution_add_relu", + + auto& ctx = at::globalContext(); + bool benchmark = ctx.benchmarkCuDNN(); + auto _alpha = alpha.has_value() ? alpha.value().to() : 1.0; + auto _bias = bias_t.has_value() + ? bias_t.value() + : at::zeros( + {output_t.size(1)}, + optTypeMetaToScalarType(output_t.options().dtype_opt()), + output_t.options().layout_opt(), + output_t.options().device_opt(), + output_t.options().pinned_memory_opt()); + + raw_miopen_convolution_add_relu_out( + output_t, input, weight, - padding, + z, + _alpha, + _bias, stride, + padding, dilation, groups, benchmark, - false // deterministic - ); - - auto contig_output_t = self_or_new_memory_format(output_t, memory_format); + true); // deterministic - if (!output_t.is_same(contig_output_t)) { - contig_output_t.copy_(output_t); - } - - auto _alpha = alpha.has_value() ? alpha.value().to() : 1.0; - auto _bias = bias.has_value() - ? bias.value() - : at::zeros( - {contig_output_t.size(1)}, - optTypeMetaToScalarType(contig_output_t.options().dtype_opt()), - contig_output_t.options().layout_opt(), - contig_output_t.options().device_opt(), - contig_output_t.options().pinned_memory_opt()); - - at::Tensor alpha_mul_z_add_bias = at::native::reshape_bias(input_t.dim(), _bias).add(z, _alpha); - contig_output_t.add_(alpha_mul_z_add_bias); - contig_output_t.relu_(); - - return contig_output_t; + return output_t; } Tensor miopen_convolution_relu( const Tensor& input_t, const Tensor& weight_t, - const std::optional& bias, + const std::optional& bias_t, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, int64_t groups) { + auto memory_format = miopen_conv_suggest_memory_format(input_t, weight_t); + const Tensor input = input_t.contiguous(memory_format); + const Tensor weight = weight_t.contiguous(memory_format); - auto& ctx = at::globalContext(); - bool benchmark = ctx.benchmarkCuDNN(); - - // MIOpen currently only supports MemoryFormat::Contiguous and fp32 and 2d - if (input_t.suggest_memory_format() == at::MemoryFormat::Contiguous - && input_t.scalar_type() == at::kFloat - && input_t.ndimension() == 4) { - - // FuseFrozenConvAddRelu performs some tensor shape checking - Tensor output_t = at::detail::empty_cuda( - conv_output_size( - input_t.sizes(), weight_t.sizes(), padding, stride, dilation), - input_t.options().memory_format(input_t.suggest_memory_format())); - if (output_t.numel() == 0) { - return output_t; - } - - auto _bias = bias.has_value() - ? bias.value() - : at::zeros( - {output_t.size(1)}, - optTypeMetaToScalarType(output_t.options().dtype_opt()), - output_t.options().layout_opt(), - output_t.options().device_opt(), - output_t.options().pinned_memory_opt()); - - raw_miopen_convolution_relu_out( - output_t, - input_t, - weight_t, - _bias, - stride, - padding, - dilation, - groups, - benchmark, // benchmark - false // deterministic - ); - + // FuseFrozenConvAddRelu performs some tensor shape checking + Tensor output_t = at::detail::empty_cuda( + conv_output_size( + input.sizes(), weight.sizes(), padding, stride, dilation), + input.options().memory_format(memory_format)); + if (output_t.numel() == 0) { return output_t; } - else { - // fallback - - auto memory_format = miopen_conv_suggest_memory_format(input_t, weight_t); - - TensorArg input { input_t, "input", 1 }, - weight { weight_t, "weight", 2 }; - Tensor output_t = at::detail::empty_cuda( - conv_output_size( - input_t.sizes(), weight_t.sizes(), padding, stride, dilation), - input->options().memory_format(memory_format)); - if (output_t.numel() == 0){ - return output_t; - } - // Avoid ambiguity of "output" when this is being used as backwards - TensorArg output{output_t, "result", 0}; - miopen_convolution_forward_out( - output, - "miopen_convolution_relu", - input, - weight, - padding, - stride, - dilation, - groups, - benchmark, - false // deterministic - ); - - auto contig_output_t = self_or_new_memory_format(output_t, memory_format); - - if (!output_t.is_same(contig_output_t)) { - contig_output_t.copy_(output_t); - } - - auto _bias = bias.has_value() - ? bias.value() - : at::zeros( - {contig_output_t.size(1)}, - optTypeMetaToScalarType(contig_output_t.options().dtype_opt()), - contig_output_t.options().layout_opt(), - contig_output_t.options().device_opt(), - contig_output_t.options().pinned_memory_opt()); - - at::Tensor reshaped_bias = at::native::reshape_bias(input_t.dim(), _bias); - contig_output_t.add_(reshaped_bias); - contig_output_t.relu_(); + auto& ctx = at::globalContext(); + bool benchmark = ctx.benchmarkCuDNN(); + auto _bias = bias_t.has_value() + ? bias_t.value() + : at::zeros( + {output_t.size(1)}, + optTypeMetaToScalarType(output_t.options().dtype_opt()), + output_t.options().layout_opt(), + output_t.options().device_opt(), + output_t.options().pinned_memory_opt()); + + raw_miopen_convolution_add_relu_out( + output_t, + input, + weight, + output_t, // use output_t as z to satisfy MIOpen API + 0, // alpha + _bias, + stride, + padding, + dilation, + groups, + benchmark, // benchmark + true); // deterministic - return contig_output_t; - } + return output_t; } REGISTER_CUDA_DISPATCH(miopen_convolution_backward_stub, &miopen_convolution_backward) diff --git a/test/nn/test_convolution.py b/test/nn/test_convolution.py index 81a9cf1ae5bea..c04a6785b50fd 100644 --- a/test/nn/test_convolution.py +++ b/test/nn/test_convolution.py @@ -3865,6 +3865,7 @@ def test_conv2d_no_grad(self, device, dtype): @onlyCUDA @skipCUDAIfNoCudnn @dtypes(torch.float, torch.float16) + @torch.backends.cudnn.flags(enabled=True, deterministic=True, benchmark=False) @precisionOverride({torch.half: 0.002, torch.float: 1e-4}) def test_cudnn_convolution_relu(self, device, dtype): for batch, groups, image_size, kernel_size, memory_format in product( @@ -3898,6 +3899,7 @@ def test_cudnn_convolution_relu(self, device, dtype): @onlyCUDA @skipCUDAIfNoCudnn @dtypes(torch.float, torch.float16) + @torch.backends.cudnn.flags(enabled=True, deterministic=True, benchmark=False) @precisionOverride({torch.half: 0.002, torch.float: 1e-4}) def test_cudnn_convolution_add_relu(self, device, dtype): for batch, groups, image_size, kernel_size, memory_format in product( From cf7873ea8bb8203a1ffa9dcea9c0b4f4f3f90224 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Mon, 15 Sep 2025 20:08:46 +0000 Subject: [PATCH 262/693] Placement: make is_shard/is_replicate/is_partial more straightforward (#162619) We already have method dispatch based on actual type, so just provide appropriate base class and subclass method implementations. (This is not motivated by any particular performance profiling, just seems more straightforward to me. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162619 Approved by: https://github.com/ezyang, https://github.com/tianyu-l, https://github.com/zpcore --- torch/distributed/tensor/placement_types.py | 26 ++++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/torch/distributed/tensor/placement_types.py b/torch/distributed/tensor/placement_types.py index b37d49bd30744..a8538713a3812 100644 --- a/torch/distributed/tensor/placement_types.py +++ b/torch/distributed/tensor/placement_types.py @@ -32,19 +32,13 @@ class Placement: # convenient utils to check for placement types def is_shard(self, dim: Optional[int] = None) -> bool: - is_shard_instance = isinstance(self, Shard) - if dim is not None and is_shard_instance: - return cast(Shard, self).dim == dim - else: - return is_shard_instance + return False def is_replicate(self) -> bool: - return isinstance(self, Replicate) + return False def is_partial(self, reduce_op: Optional[str] = None) -> bool: - if reduce_op is None: - return isinstance(self, Partial) - return isinstance(self, Partial) and self.reduce_op == reduce_op + return False @dataclass(frozen=True) @@ -68,6 +62,12 @@ class Shard(Placement): dim: int + def is_shard(self, dim: Optional[int] = None) -> bool: + if dim is not None: + return self.dim == dim + else: + return True + def _split_tensor( self, tensor: torch.Tensor, @@ -650,6 +650,9 @@ def _replicate_tensor( mesh_broadcast(tensor, mesh, mesh_dim=mesh_dim, group_src=src_data_rank) return tensor + def is_replicate(self) -> bool: + return True + @dataclass(frozen=True) class Partial(Placement): @@ -729,6 +732,11 @@ def __str__(self) -> str: """ return "P" + def is_partial(self, reduce_op: Optional[str] = None) -> bool: + if reduce_op is None: + return True + return self.reduce_op == reduce_op + # We keep the old _Partial name for a while for BC reason _Partial = Partial From 090e6838a0b3518e838737cc56d6c77d8290fe58 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Mon, 15 Sep 2025 22:55:36 +0000 Subject: [PATCH 263/693] compile_kernel enable pch (#162972) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enabling automatic pre compiled headers per https://docs.nvidia.com/cuda/nvrtc/index.html#example-automatic-pch-cuda-12-8 I'm seeing large speedups in compilation times using PCH on average but the max compilation time with PCH is worst which is why I can't enable it by default. `load_inline()` also supports precompiled headers and does not enable them by default ``` Without PCH: 270.58 ms average With PCH: 115.27 ms average ``` ``` Without PCH: Max: 337.99 ms With PCH: Max: 383.82 ms ``` ```python source) [marksaroufim@devgpu005]~/pytorch% python simple_pch_benchmark.py ============================================================ Simple PCH Compilation Benchmark ============================================================ Device: NVIDIA B200 Iterations: 100 Testing WITHOUT PCH: ------------------------------ Compiling kernel 100 times WITHOUT PCH... Completed 10/100 compilations Completed 20/100 compilations Completed 30/100 compilations Completed 40/100 compilations Completed 50/100 compilations Completed 60/100 compilations Completed 70/100 compilations Completed 80/100 compilations Completed 90/100 compilations Completed 100/100 compilations Average: 270.58 ms (±6.99 ms) Min: 264.09 ms Max: 337.99 ms Testing WITH PCH: ------------------------------ Compiling kernel 100 times WITH PCH... Completed 10/100 compilations Completed 20/100 compilations Completed 30/100 compilations Completed 40/100 compilations Completed 50/100 compilations Completed 60/100 compilations Completed 70/100 compilations Completed 80/100 compilations Completed 90/100 compilations Completed 100/100 compilations Average: 115.27 ms (±27.32 ms) Min: 110.65 ms Max: 383.82 ms ``` ## Benchmarking script ```python #!/usr/bin/env python3 import argparse import os import sys import time from statistics import mean, stdev import torch from torch.cuda._utils import _nvrtc_compile def benchmark_compilation(use_pch, iterations=100): """Compile the same kernel many times with or without PCH.""" # CUB kernel that benefits from PCH kernel_source = """ #include #include #include extern "C" __global__ void test_kernel(const float* input, float* output, int n) { using BlockReduce = cub::BlockReduce; using BlockScan = cub::BlockScan; using WarpReduce = cub::WarpReduce; __shared__ union { typename BlockReduce::TempStorage reduce; typename BlockScan::TempStorage scan; typename WarpReduce::TempStorage warp[8]; } temp_storage; int idx = blockIdx.x * blockDim.x + threadIdx.x; float val = (idx < n) ? input[idx] : 0.0f; float sum = BlockReduce(temp_storage.reduce).Sum(val); __syncthreads(); float scan_result; BlockScan(temp_storage.scan).ExclusiveSum(val, scan_result); __syncthreads(); int warp_id = threadIdx.x / 32; float warp_sum = WarpReduce(temp_storage.warp[warp_id]).Sum(val); if (threadIdx.x == 0) { output[blockIdx.x] = sum + scan_result + warp_sum; } } """ device = torch.cuda.current_device() major, minor = torch.cuda.get_device_capability(device) compute_capability = f"{major}{minor}" compile_times = [] print( f"Compiling kernel {iterations} times {'WITH' if use_pch else 'WITHOUT'} PCH..." ) for i in range(iterations): # Use unique kernel name to avoid caching between iterations kernel_name = f"test_kernel_{i}" unique_source = kernel_source.replace("test_kernel", kernel_name) start = time.perf_counter() ptx, mangled_name = _nvrtc_compile( unique_source, kernel_name, compute_capability, header_code="", nvcc_options=["-std=c++17"], auto_pch=use_pch, ) elapsed = time.perf_counter() - start compile_times.append(elapsed * 1000) # Convert to ms # Progress indicator if (i + 1) % 10 == 0: print(f" Completed {i + 1}/{iterations} compilations") return compile_times def main(): parser = argparse.ArgumentParser(description="Simple PCH Compilation Benchmark") parser.add_argument("--pch", action="store_true", help="Test with PCH only") parser.add_argument("--no-pch", action="store_true", help="Test without PCH only") parser.add_argument( "--iterations", type=int, default=100, help="Number of compilations" ) args = parser.parse_args() print("=" * 60) print("Simple PCH Compilation Benchmark") print("=" * 60) print(f"Device: {torch.cuda.get_device_name()}") print(f"Iterations: {args.iterations}") print() # Determine what to test test_both = not args.pch and not args.no_pch results = {} # Test without PCH if args.no_pch or test_both: print("Testing WITHOUT PCH:") print("-" * 30) times_no_pch = benchmark_compilation(use_pch=False, iterations=args.iterations) if times_no_pch: avg_no_pch = mean(times_no_pch) std_no_pch = stdev(times_no_pch) if len(times_no_pch) > 1 else 0 print(f"Average: {avg_no_pch:.2f} ms (±{std_no_pch:.2f} ms)") print(f"Min: {min(times_no_pch):.2f} ms") print(f"Max: {max(times_no_pch):.2f} ms") results["no_pch"] = avg_no_pch print() # Test with PCH if args.pch or test_both: print("Testing WITH PCH:") print("-" * 30) times_with_pch = benchmark_compilation( use_pch=True, iterations=args.iterations ) if times_with_pch: avg_with_pch = mean(times_with_pch) std_with_pch = stdev(times_with_pch) if len(times_with_pch) > 1 else 0 print(f"Average: {avg_with_pch:.2f} ms (±{std_with_pch:.2f} ms)") print(f"Min: {min(times_with_pch):.2f} ms") print(f"Max: {max(times_with_pch):.2f} ms") results["pch"] = avg_with_pch print() if __name__ == "__main__": main() ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162972 Approved by: https://github.com/albanD, https://github.com/janeyx99 --- torch/cuda/_utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/torch/cuda/_utils.py b/torch/cuda/_utils.py index c09ccb70ab290..a252f45b719c5 100644 --- a/torch/cuda/_utils.py +++ b/torch/cuda/_utils.py @@ -117,6 +117,7 @@ def _nvrtc_compile( header_code: str = "", cuda_include_dirs: Optional[list] = None, nvcc_options: Optional[list] = None, + auto_pch: bool = False, ) -> tuple[bytes, str]: """ Compiles a CUDA kernel using NVRTC and returns the PTX code. @@ -129,6 +130,7 @@ def _nvrtc_compile( header_code (str, optional): Additional header code to prepend to the kernel source cuda_include_dirs (list, None): List of directories containing CUDA headers nvcc_options (list, None): Additional options to pass to NVRTC + auto_pch (bool): Enable automatic precompiled headers (CUDA 12.8+) Returns: Tuple[bytes, str]: The compiled PTX code and mangled kernel name @@ -190,6 +192,13 @@ def check_nvrtc(result: int) -> None: for directory in cuda_include_dirs: options.append(f"-I{directory}".encode()) + # Enable automatic precompiled headers (CUDA 12.8+) + if auto_pch: + assert str(torch.version.cuda) >= "12.8", "PCH requires CUDA 12.8+" + if nvcc_options is None: + nvcc_options = [] + nvcc_options.append("--pch") + # Add custom NVCC options if nvcc_options: for option in nvcc_options: From 6b608dfe81efa50c45a0df02ca651f3cb212e506 Mon Sep 17 00:00:00 2001 From: Scott Wolchok Date: Thu, 11 Sep 2025 16:51:51 -0700 Subject: [PATCH 264/693] Add DISABLE_JUSTKNOBS to torch/_utils_internal.py and use it for dynamo _maybe_set_eval_frame (#162298) If JustKnobs is disabled (as it always is in OSS), we can easily avoid an extra layer of Python function call. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162298 Approved by: https://github.com/ezyang --- torch/_dynamo/eval_frame.py | 26 +++++++++++++++----------- torch/_utils_internal.py | 3 +++ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py index 22e16f168565f..4f5288e1cbe39 100644 --- a/torch/_dynamo/eval_frame.py +++ b/torch/_dynamo/eval_frame.py @@ -67,7 +67,7 @@ from torch._dynamo.types import ConvertFrameReturn, FrameAction, FrameExecStrategy from torch._export.utils import _compiling_state_context from torch._subclasses.fake_tensor import unset_fake_temporarily -from torch._utils_internal import justknobs_check, log_export_usage +from torch._utils_internal import DISABLE_JUSTKNOBS, justknobs_check, log_export_usage from torch.export.dynamic_shapes import ( _combine_args, _DimHint, @@ -145,16 +145,20 @@ class Unset(Enum): unset = Unset.token -def _maybe_set_eval_frame(callback: DynamoCallback) -> DynamoCallback: - # A wrapper on set_eval_frame that is guarded by a Justknob. - # Users can disable torchDynamo by setting the JK to False. - if not justknobs_check("pytorch/compiler:enable_compiler_set_eval_frame"): - torch._dynamo.utils.warn_once( - "Dynamo disabled by Justknob: enable_compiler_set_eval_frame, skipping set_eval_frame" - ) - return callback - else: - return set_eval_frame(callback) +if DISABLE_JUSTKNOBS: + _maybe_set_eval_frame = set_eval_frame +else: + + def _maybe_set_eval_frame(callback: DynamoCallback) -> DynamoCallback: + # A wrapper on set_eval_frame that is guarded by a Justknob. + # Users can disable torchDynamo by setting the JK to False. + if not justknobs_check("pytorch/compiler:enable_compiler_set_eval_frame"): + torch._dynamo.utils.warn_once( + "Dynamo disabled by Justknob: enable_compiler_set_eval_frame, skipping set_eval_frame" + ) + return callback + else: + return set_eval_frame(callback) @dataclass diff --git a/torch/_utils_internal.py b/torch/_utils_internal.py index f20a88ce85402..839c50d12d565 100644 --- a/torch/_utils_internal.py +++ b/torch/_utils_internal.py @@ -176,6 +176,9 @@ def log_torch_jit_trace_exportability( return +DISABLE_JUSTKNOBS = True + + def justknobs_check(name: str, default: bool = True) -> bool: """ This function can be used to killswitch functionality in FB prod, From c77726b1d72b9238334c8ef17b6d140b73541775 Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Fri, 12 Sep 2025 16:10:25 +0000 Subject: [PATCH 265/693] [inductor] fix expand_shape when copy_shape is not a string (#162739) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162739 Approved by: https://github.com/eellison, https://github.com/mlazos --- torch/_inductor/codegen/triton.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py index 39bbbf668ba70..bcc7033a28485 100644 --- a/torch/_inductor/codegen/triton.py +++ b/torch/_inductor/codegen/triton.py @@ -2435,7 +2435,7 @@ def _get_expand_str(): if expand_shape is None: if need_dense or have_dense: - expand_shape = None if copy_shape else tuple(self.dense_size_list()) + _, expand_shape = _get_expand_str() else: expand_shape = () From 955e195c7d593c3994989940cae9bc5f5fa7c207 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Mon, 15 Sep 2025 23:23:01 +0000 Subject: [PATCH 266/693] [Triton] [Inductor] Add a Blackwell specific Template for persistent matmul (#162916) Summary: This adds the Triton Tutorial Matmul persistent matmul with device side TMA for Blackwell and adds it as a template option for blackwell. This uses newer Triton features such as automatic warp specialization and loop flattening, which while still containing flaws can improve performance on blackwell. This does not include the Epilogue subtiling section, as that will be a followup PR. This PR doesn't include any tuning. I am doing a larger benchmarking run to determine the best initial configs for tuning and will open a followup PR with better defaults soon. Test Plan: Tested on a Blackwell machine with test_max_autotune.py and confirmed the new tests pass. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162916 Approved by: https://github.com/NikhilAPatel --- test/inductor/test_max_autotune.py | 142 +++++++++++++++++- torch/_inductor/codegen/cuda/cuda_env.py | 10 ++ torch/_inductor/kernel/mm.py | 104 +++++++++++++ torch/_inductor/template_heuristics/triton.py | 62 ++++++++ torch/_inductor/utils.py | 16 ++ torch/utils/_triton.py | 15 ++ 6 files changed, 348 insertions(+), 1 deletion(-) diff --git a/test/inductor/test_max_autotune.py b/test/inductor/test_max_autotune.py index e34b2c7edd009..0922edc70bd88 100644 --- a/test/inductor/test_max_autotune.py +++ b/test/inductor/test_max_autotune.py @@ -52,7 +52,11 @@ TEST_WITH_ROCM, ) from torch.testing._internal.logging_utils import multiple_logs_to_string -from torch.utils._triton import has_triton_stable_tma_api, has_triton_tma_device +from torch.utils._triton import ( + has_datacenter_blackwell_tma_device, + has_triton_stable_tma_api, + has_triton_tma_device, +) aten = torch.ops.aten @@ -259,6 +263,69 @@ def next_multiple_16(a: int) -> int: check_str = "triton.language.make_tensor_descriptor" FileCheck().check("triton_tem_fused_mm").check(check_str).run(code[0]) + @unittest.skipIf( + not has_datacenter_blackwell_tma_device(), + "Need Blackwell with device-side TMA support in Triton", + ) + @parametrize("a_transposed", (False, True)) + @parametrize("b_transposed", (False, True)) + @parametrize("dynamic", (False, True)) + @parametrize("tma_store", (False, True)) + def test_blackwell_max_autotune_regular_mm_persistent_tma( + self, + a_transposed: bool, + b_transposed: bool, + dynamic: bool, + tma_store: bool, + ): + def mm(a, b): + # TMA requires 16-byte alignment: here we repeat the dims + # by the factor of 8, as float16 is 2-byte. All dims are + # repeated due to the possible transpositions below. + a = a.repeat(8, 8) + b = b.repeat(8, 8) + if a_transposed: + a = a.T + if b_transposed: + b = b.T + + return torch.mm(a, b) + + M, N, K = 32, 16, 48 + a = ( + torch.randn(*((K, M) if a_transposed else (M, K))) + .to(torch.float16) + .to(GPU_TYPE) + ) + b = ( + torch.randn(*((N, K) if b_transposed else (K, N))) + .to(torch.float16) + .to(GPU_TYPE) + ) + + with config.patch( + { + "max_autotune": True, + "triton.enable_persistent_tma_matmul": True, + "triton.enable_template_tma_store": tma_store, + "test_configs.autotune_choice_name_regex": "blackwell_ws_persistent_device_tma", + } + ): + c_actual, code = run_and_get_code(torch.compile(mm, dynamic=dynamic), a, b) + c_expected = mm(a, b) + + torch.testing.assert_close(c_actual, c_expected, atol=1e-2, rtol=1e-2) + if tma_store: + # Verify that we are using a TMA implementation + # Note: The tma_descriptor0 is generated by the kernel. If the + # code generation process changes this could change. + write_api = "tma_descriptor0.store" + else: + write_api = "tl.store" + FileCheck().check("triton_tem_fused_mm").check( + "triton.language.make_tensor_descriptor" + ).check("tl.load_tensor_descriptor").check(write_api).run(code[0]) + @unittest.skipIf( not has_triton_tma_device(), "Need device-side TMA support in Triton" ) @@ -451,6 +518,79 @@ def addmm(x, a, b): torch.testing.assert_close(c_actual, c_expected, atol=1e-2, rtol=1e-2) + @unittest.skipIf( + not has_datacenter_blackwell_tma_device(), + "Need Blackwell with device-side TMA support in Triton", + ) + @parametrize("a_transposed", (False, True)) + @parametrize("b_transposed", (False, True)) + @parametrize("dynamic", (False, True)) + @parametrize("tma_store", (False, True)) + def test_blackwell_max_autotune_addmm_persistent_tma( + self, + a_transposed: bool, + b_transposed: bool, + dynamic: bool, + tma_store: bool, + ): + def addmm(x, a, b): + # TMA requires 16-byte alignment: here we repeat the dims + # by the factor of 8, as float16 is 2-byte. All dims are + # repeated due to the possible transpositions below. + x = x.repeat(8) + a = a.repeat(8, 8) + b = b.repeat(8, 8) + + if a_transposed: + a = a.T + if b_transposed: + b = b.T + + return torch.addmm(x, a, b) + + M, N, K = 21, 31, 11 + a = ( + torch.randn(*((K, M) if a_transposed else (M, K))) + .to(torch.float16) + .to(GPU_TYPE) + ) + b = ( + torch.randn(*((N, K) if b_transposed else (K, N))) + .to(torch.float16) + .to(GPU_TYPE) + ) + x = torch.randn(N).to(torch.float16).to(GPU_TYPE) + + with config.patch( + { + "max_autotune": True, + "triton.enable_persistent_tma_matmul": True, + "triton.enable_template_tma_store": tma_store, + "test_configs.autotune_choice_name_regex": "blackwell_ws_persistent_device_tma", + } + ): + c_actual, code = run_and_get_code( + torch.compile(addmm, dynamic=dynamic), x, a, b + ) + c_expected = addmm(x, a, b) + + make_desc_api = "triton.language.make_tensor_descriptor" + read_api = "tl.load_tensor_descriptor" + if tma_store: + # Verify that we are using a TMA implementation + # Note: The tma_descriptor0 is generated by the kernel. If the + # code generation process changes this could change. + write_api = "tma_descriptor0.store" + else: + write_api = "tl.store" + + # Verify that we are using a TMA implementation + FileCheck().check("triton_tem_fused_addmm").check(make_desc_api).check( + read_api + ).check(write_api).run(code[0]) + + torch.testing.assert_close(c_actual, c_expected, atol=1e-2, rtol=1e-2) + @unittest.skipIf( not has_triton_tma_device(), "Need device-side TMA support in Triton" ) diff --git a/torch/_inductor/codegen/cuda/cuda_env.py b/torch/_inductor/codegen/cuda/cuda_env.py index a11462fc8a0b8..3eb65273285ea 100644 --- a/torch/_inductor/codegen/cuda/cuda_env.py +++ b/torch/_inductor/codegen/cuda/cuda_env.py @@ -27,6 +27,16 @@ def get_cuda_arch() -> Optional[str]: return None +@clear_on_fresh_cache +@functools.lru_cache(1) +def is_datacenter_blackwell_arch() -> bool: + arch = get_cuda_arch() + if arch is None: + return False + arch_number = int(arch) + return arch_number >= 100 and arch_number < 110 + + @clear_on_fresh_cache @functools.lru_cache(1) def get_cuda_version() -> Optional[str]: diff --git a/torch/_inductor/kernel/mm.py b/torch/_inductor/kernel/mm.py index 07474ed450dd3..a101bcb58643f 100644 --- a/torch/_inductor/kernel/mm.py +++ b/torch/_inductor/kernel/mm.py @@ -41,6 +41,7 @@ use_cpp_gemm_template, use_cutlass_template, use_decompose_k_choice, + use_triton_blackwell_tma_template, use_triton_template, use_triton_tma_template, ) @@ -563,6 +564,103 @@ def apply_scaling( source=device_tma + load_scales + apply_scaling, ) +_compute_blackwell_pid = r""" +@triton.jit +def _compute_pid(tile_id, num_pid_in_group, grid_m, GROUP_M: tl.constexpr, NUM_SMS: tl.constexpr): + group_id = tile_id // num_pid_in_group + first_pid_m = group_id * GROUP_M + GROUP_M = min(grid_m - first_pid_m, GROUP_M) + pid_m = first_pid_m + (tile_id % GROUP_M) + pid_n = (tile_id % num_pid_in_group) // GROUP_M + return pid_m, pid_n +""" + +_blackwell_ws_persistent_device_tma = r""" +{{def_kernel("A", "B")}} + M = {{size("A", 0)}} + N = {{size("B", 1)}} + K = {{size("A", 1)}} + if M * N == 0: + # early exit due to zero-size input(s) + return + start_pid = tl.program_id(0) + grid_m = tl.cdiv(M, BLOCK_M) + grid_n = tl.cdiv(N, BLOCK_N) + k_tiles = tl.cdiv(K, BLOCK_K) + num_tiles = grid_m * grid_n + + # Note: We require TMA_EXPERIMENTAL_API == False, which + # we will check before invoking this template. + stride_am = {{stride("A", 0)}} + stride_ak = {{stride("A", 1)}} + stride_bk = {{stride("B", 0)}} + stride_bn = {{stride("B", 1)}} + a_desc = triton.language.make_tensor_descriptor( + base=A, + shape=[M, K] if A_ROW_MAJOR else [K, M], + strides=[stride_am, 1] if A_ROW_MAJOR else [stride_ak, 1], + block_shape=[BLOCK_M, BLOCK_K] if A_ROW_MAJOR else [BLOCK_K, BLOCK_M], + ) + b_desc = triton.language.make_tensor_descriptor( + base=B, + shape=[K, N] if B_ROW_MAJOR else [N, K], + strides=[stride_bk, 1] if B_ROW_MAJOR else [stride_bn, 1], + block_shape=[BLOCK_K, BLOCK_N] if B_ROW_MAJOR else [BLOCK_N, BLOCK_K], + ) + + # tile_id_c is used in the epilogue to break the dependency between + # the prologue and the epilogue + tile_id_c = start_pid - NUM_SMS + num_pid_in_group = GROUP_M * grid_n + + for tile_id in tl.range( + start_pid, num_tiles, NUM_SMS, flatten=FLATTEN, warp_specialize=WARP_SPECIALIZE + ): + pid_m, pid_n = _compute_pid( + tile_id, num_pid_in_group, grid_m, GROUP_M, NUM_SMS + ) + offs_am = pid_m * BLOCK_M + offs_bn = pid_n * BLOCK_N + + accumulator = tl.zeros((BLOCK_M, BLOCK_N), dtype=tl.float32) + for ki in range(k_tiles): + offs_k = ki * BLOCK_K + a = tl.load_tensor_descriptor( + a_desc, + [offs_am, offs_k] if A_ROW_MAJOR else [offs_k, offs_am], + ) + b = tl.load_tensor_descriptor( + b_desc, + [offs_k, offs_bn] if B_ROW_MAJOR else [offs_bn, offs_k], + ) + accumulator += tl.dot( + a if A_ROW_MAJOR else a.T, + b if B_ROW_MAJOR else b.T, + allow_tf32=ALLOW_TF32, + ) + + tile_id_c += NUM_SMS + pid_m, pid_n = _compute_pid( + tile_id_c, num_pid_in_group, grid_m, GROUP_M, NUM_SMS + ) + offs_cm = pid_m * BLOCK_M + offs_cn = pid_n * BLOCK_N + # TODO: Add EPILOGUE_SUBTILE + {{store_output( + ("offs_cm", "offs_cn"), + "accumulator", + indent_width=8, + val_shape=("BLOCK_M", "BLOCK_N"), + block_indexing=True + )}} +""" + +blackwell_ws_persistent_device_tma_mm_template = TritonTemplate( + name="blackwell_ws_persistent_device_tma", + grid=persistent_mm_grid, + source=_blackwell_ws_persistent_device_tma + _compute_blackwell_pid, +) + # prevent duplication registration of extern functions @functools.cache @@ -777,6 +875,9 @@ def tuned_mm(mat1, mat2, *, layout=None): if use_triton_tma_template(mat1, mat2, output_layout=layout): templates_to_use.append(persistent_tma_mm_template) + if use_triton_blackwell_tma_template(mat1, mat2, output_layout=layout): + templates_to_use.append(blackwell_ws_persistent_device_tma_mm_template) + if use_decompose_k_choice(m, n, k): templates_to_use.append(decompose_k_subgraph_template) @@ -980,6 +1081,9 @@ def tuned_addmm(inp, mat1, mat2, *, alpha=1, beta=1, layout=None): if use_triton_tma_template(mat1, mat2, output_layout=layout): templates_to_use.append(persistent_tma_mm_template) + if use_triton_blackwell_tma_template(mat1, mat2, output_layout=layout): + templates_to_use.append(blackwell_ws_persistent_device_tma_mm_template) + templates_to_use.append(addmm_contiguous_subgraph_template) # Single unified call for all templates diff --git a/torch/_inductor/template_heuristics/triton.py b/torch/_inductor/template_heuristics/triton.py index 731baa650e37e..47cf33e5d9f5e 100644 --- a/torch/_inductor/template_heuristics/triton.py +++ b/torch/_inductor/template_heuristics/triton.py @@ -18,6 +18,7 @@ from .. import config, config as inductor_config from ..kernel.bmm import bmm_template from ..kernel.mm import ( + blackwell_ws_persistent_device_tma_mm_template, mm_template, persistent_tma_mm_template, scaled_mm_device_tma_template, @@ -1652,6 +1653,35 @@ def _get_template_configs_impl( yield {**template_kwargs, **tma_opts} +# TMA mixins for Blackwell templates +class BlackwellTMATemplateConfigMixin(TMATemplateConfigMixin): + def _get_template_configs_impl( + self, + kernel_inputs: KernelInputs, + op_name: str, + ) -> Generator[dict[str, Any], None, None]: + """ + Generate TMA template configs by calling super and adding TMA-specific options. + """ + base_ops = { + "NUM_SMS": get_num_sms(), + # TODO: Consider making this tunable. + "FLATTEN": True, + } + # Get base template configs from superclass + for template_kwargs in super()._get_template_configs_impl( + kernel_inputs, + op_name, + ): + # Some Triton versions requires num_warps >= 4 for WS + # to avoid compilation issues. Triton disables WS if num_warps < 4 + # or num_stages < 2. Similar issues have been seen with num_stages=1 + ws = ( + template_kwargs["num_warps"] >= 4 and template_kwargs["num_stages"] >= 2 + ) + yield {**template_kwargs, **base_ops, "WARP_SPECIALIZE": ws} + + # Scaled MM-specific mixin for scaled MM templates class BaseScaledMMConfigMixin(MMTemplateConfigMixin): """ @@ -1889,6 +1919,22 @@ def __init__(self) -> None: self.mm_configs = self.persistent_mm_configs +@register_template_heuristic( + blackwell_ws_persistent_device_tma_mm_template.uid, + "cuda", + register=torch.version.hip is None, +) +class CUDABlackwellPersistentTMATemplateConfigHeuristic( + BlackwellTMATemplateConfigMixin, CUDAConfigHeuristic +): + """Blackwell Persistent TMA template""" + + def __init__(self) -> None: + super().__init__() + # TODO: Tune mm_configs for blackwell. + self.mm_configs = self.persistent_mm_configs + + @register_template_heuristic( persistent_tma_mm_template.uid, "cuda", @@ -1901,6 +1947,22 @@ class CUDAAddmmPersistentTMATemplateConfigHeuristic( """Addmm specific mixin for CUDA""" +@register_template_heuristic( + blackwell_ws_persistent_device_tma_mm_template.uid, + "cuda", + register=torch.version.hip is None, +) +class CUDABlackwellAddmmPersistentTMATemplateConfigHeuristic( + AddMMConfigMixin, CUDABlackwellPersistentTMATemplateConfigHeuristic +): + """Addmm extension for DataCenter Blackwell Templates""" + + def __init__(self) -> None: + super().__init__() + # TODO: Tune mm_configs for blackwell. + self.mm_configs = self.persistent_mm_configs + + @register_template_heuristic( mm_template.uid, "cuda", register=torch.version.hip is None, op_name="scaled_mm" ) diff --git a/torch/_inductor/utils.py b/torch/_inductor/utils.py index b54c9dfe4965a..185704f00a9a7 100644 --- a/torch/_inductor/utils.py +++ b/torch/_inductor/utils.py @@ -1790,6 +1790,22 @@ def use_triton_tma_template( ) +def use_triton_blackwell_tma_template( + *matrices: IRNode, output_layout: Layout, add_guards: bool = False +) -> bool: + if not use_triton_tma_template( + *matrices, output_layout=output_layout, add_guards=add_guards + ): + return False + + from torch.utils._triton import has_triton_tensor_descriptor_host_tma + + from .codegen.cuda.cuda_env import is_datacenter_blackwell_arch + + # Blackwell template require the tensor descriptor API, not the experimental API. + return has_triton_tensor_descriptor_host_tma() and is_datacenter_blackwell_arch() + + def use_cutlass_template(layout: Layout, m: int, n: int, k: int) -> bool: from .virtualized import V diff --git a/torch/utils/_triton.py b/torch/utils/_triton.py index 7d545e8221643..9901fe58d1f21 100644 --- a/torch/utils/_triton.py +++ b/torch/utils/_triton.py @@ -105,6 +105,21 @@ def has_triton_tma_device() -> bool: return False +@functools.cache +def has_datacenter_blackwell_tma_device() -> bool: + import torch + + if ( + torch.cuda.is_available() + and torch.cuda.get_device_capability() >= (10, 0) + and torch.cuda.get_device_capability() < (11, 0) + and not torch.version.hip + ): + return has_triton_tma_device() and has_triton_tensor_descriptor_host_tma() + + return False + + @functools.lru_cache(None) def has_triton_stable_tma_api() -> bool: if has_triton_package(): From cfc539fe15375f83e2fbc5df8066243dfac0c272 Mon Sep 17 00:00:00 2001 From: Kushagra Rastogi Date: Mon, 15 Sep 2025 23:33:10 +0000 Subject: [PATCH 267/693] Improved error lr last epoch (#162368) Fixes #160626 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162368 Approved by: https://github.com/janeyx99 --- test/optim/test_lrscheduler.py | 10 ++++++++++ torch/optim/lr_scheduler.py | 6 ++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/test/optim/test_lrscheduler.py b/test/optim/test_lrscheduler.py index c36e7b2e21d62..2f0fe83329cc9 100644 --- a/test/optim/test_lrscheduler.py +++ b/test/optim/test_lrscheduler.py @@ -369,6 +369,16 @@ def test_get_last_lr_multi_step_lr(self): scheduler = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9]) self._test_get_last_lr(scheduler, targets, epochs) + def test_raise_error_when_last_epoch_is_greater_than_0_and_initial_lr_is_not_specified( + self, + ): + optimizer = SGD([Parameter(torch.randn(2, 2, requires_grad=True))], 0.1) + with self.assertRaisesRegex( + KeyError, + r"param \'initial_lr\' is not specified in param_groups\[0\] when resuming scheduler with last_epoch >= 0", + ): + StepLR(optimizer, step_size=3, gamma=0.1, last_epoch=1) + def test_multi_step_lr(self): # lr = 0.05 if epoch < 2 # lr = 0.005 if 2 <= epoch < 5 diff --git a/torch/optim/lr_scheduler.py b/torch/optim/lr_scheduler.py index 8703719dabc72..d7d00cf261c5b 100644 --- a/torch/optim/lr_scheduler.py +++ b/torch/optim/lr_scheduler.py @@ -106,8 +106,10 @@ def __init__( for i, group in enumerate(optimizer.param_groups): if "initial_lr" not in group: raise KeyError( - "param 'initial_lr' is not specified " - f"in param_groups[{i}] when resuming an optimizer" + f"param 'initial_lr' is not specified in param_groups[{i}] when resuming scheduler with last_epoch >= 0.\n" + "This typically happens when:\n" + "1. You're trying to resume training from a checkpoint but haven't properly loaded the optimizer state\n" + "2. You're using last_epoch >= 0 for a fresh training run (not recommended)" ) self.base_lrs: list[float] = [ group["initial_lr"] for group in optimizer.param_groups From dac6a4bf6c4505f0ed7099284bd6192d600db424 Mon Sep 17 00:00:00 2001 From: Chien-Chin Huang Date: Mon, 15 Sep 2025 14:30:56 -0700 Subject: [PATCH 268/693] [CP] Fix the CP FlexAttention test (#162518) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162518 Approved by: https://github.com/XilunWu, https://github.com/drisspg --- test/distributed/tensor/test_attention.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/test/distributed/tensor/test_attention.py b/test/distributed/tensor/test_attention.py index a2543d443e4fe..0b48118c24609 100644 --- a/test/distributed/tensor/test_attention.py +++ b/test/distributed/tensor/test_attention.py @@ -28,6 +28,7 @@ from torch.nn.attention import sdpa_kernel, SDPBackend from torch.nn.attention.flex_attention import ( _mask_mod_signature, + AuxRequest, create_block_mask, flex_attention, ) @@ -574,8 +575,8 @@ def _test_ring_flex_attention( device=self.device_type, ) - expect_out, expect_lse = compiled_flex_attention( - q, k, v, block_mask=block_mask, return_lse=True + expect_out, expect_aux = compiled_flex_attention( + q, k, v, block_mask=block_mask, return_aux=AuxRequest(lse=True) ) expect_out.sum().backward() @@ -635,12 +636,12 @@ def _test_ring_flex_attention( cp_k.requires_grad = True cp_v.requires_grad = True - cp_out, cp_lse = compiled_flex_attention( + cp_out, cp_aux = compiled_flex_attention( cp_q, cp_k, cp_v, block_mask=cp_block_mask, - return_lse=True, + return_aux=AuxRequest(lse=True), ) # check block_mask rewrite doesn't escape to the outside @@ -657,9 +658,11 @@ def _test_ring_flex_attention( cp_v.requires_grad = False # unshard the output - cp_out, cp_lse = context_parallel_unshard(device_mesh, [cp_out, cp_lse], [2, 2]) + cp_out, cp_lse = context_parallel_unshard( + device_mesh, [cp_out, cp_aux.lse], [2, 2] + ) torch.testing.assert_close(cp_out, expect_out, atol=atol, rtol=rtol) - torch.testing.assert_close(cp_lse, expect_lse, atol=atol, rtol=rtol) + torch.testing.assert_close(cp_lse, expect_aux.lse, atol=atol, rtol=rtol) # unshard the gradient cp_q_grad, cp_k_grad, cp_v_grad = context_parallel_unshard( From d08cabe31475dbe307c49781bae6558ac8eafa52 Mon Sep 17 00:00:00 2001 From: drisspg Date: Mon, 15 Sep 2025 12:05:41 -0700 Subject: [PATCH 269/693] [BC Breaking] Remove flex + njt code paths (#161734) Pull Request resolved: https://github.com/pytorch/pytorch/pull/161734 Approved by: https://github.com/jbschlosser --- docs/source/nn.attention.flex_attention.md | 3 - test/test_nestedtensor.py | 121 ------------ torch/_higher_order_ops/flex_attention.py | 7 - torch/nested/_internal/ops.py | 138 -------------- torch/nn/attention/flex_attention.py | 208 --------------------- 5 files changed, 477 deletions(-) diff --git a/docs/source/nn.attention.flex_attention.md b/docs/source/nn.attention.flex_attention.md index 4cfb51c5945c0..8c51cee276514 100644 --- a/docs/source/nn.attention.flex_attention.md +++ b/docs/source/nn.attention.flex_attention.md @@ -30,9 +30,6 @@ .. autofunction:: create_mask ``` ```{eval-rst} -.. autofunction:: create_nested_block_mask -``` -```{eval-rst} .. autofunction:: and_masks ``` ```{eval-rst} diff --git a/test/test_nestedtensor.py b/test/test_nestedtensor.py index ac97f2beda8e8..5affbb74cca09 100644 --- a/test/test_nestedtensor.py +++ b/test/test_nestedtensor.py @@ -26,7 +26,6 @@ NestedTensor, ViewNestedFromBuffer, ) -from torch.nn.attention.flex_attention import create_nested_block_mask, flex_attention from torch.testing._internal.common_cuda import ( PLATFORM_SUPPORTS_FUSED_ATTENTION, SM70OrLater, @@ -36,7 +35,6 @@ from torch.testing._internal.common_device_type import ( dtypes, dtypesIfCUDA, - flex_attention_supported_platform, instantiate_device_type_tests, onlyCPU, onlyCUDA, @@ -60,7 +58,6 @@ parametrize, run_tests, serialTest, - skipIfRocm, skipIfSlowGradcheckEnv, skipIfTorchDynamo, subtest, @@ -7285,124 +7282,6 @@ def _rand_nt(noncontig_with_holes=noncontig_with_holes): return query, key, value - @unittest.skip( - "Temporarily skip - nested tensor backward pass broken after return-max-scores commit" - ) - @onlyCUDA - @flex_attention_supported_platform - @dtypes(torch.float32) - # non-contiguous with holes not supported yet - @decorateIf(unittest.skip, lambda params: params["noncontig_with_holes"]) - @parametrize("noncontig_with_holes", [False, True]) - @parametrize("cross_attention", [False, True]) - @skipIfRocm - def test_flex_attention(self, device, dtype, noncontig_with_holes, cross_attention): - query, key, value = self._rand_qkv( - device, dtype, noncontig_with_holes, q_and_kv_match=(not cross_attention) - ) - - # Run FlexAttention with a causal mask - def causal_mask(b, h, q_idx, kv_idx): - return q_idx >= kv_idx - - if cross_attention: - block_mask = create_nested_block_mask( - causal_mask, 1, 1, query, key, _compile=True - ) - else: - block_mask = create_nested_block_mask( - causal_mask, 1, 1, query, _compile=True - ) - - out_flex = flex_attention(query, key, value, block_mask=block_mask) - grad_out = torch.randn_like(out_flex) - grads_flex = torch.autograd.grad( - out_flex, inputs=(query, key, value), grad_outputs=(grad_out,) - ) - flex_outs = [out_flex, *grads_flex] - - # Run FlexAttention with a score_mod that represents causal attention - def causal_score_mod(score, b, h, q_idx, kv_idx): - return torch.where(q_idx >= kv_idx, score, float("-inf")) - - out_flex2 = flex_attention(query, key, value, score_mod=causal_score_mod) - grads_flex2 = torch.autograd.grad( - out_flex2, inputs=(query, key, value), grad_outputs=(grad_out,) - ) - flex_outs2 = [out_flex2, *grads_flex2] - - # Run causal SDPA for comparison - out_sdpa = F.scaled_dot_product_attention(query, key, value, is_causal=True) - grads_sdpa = torch.autograd.grad( - out_sdpa, inputs=(query, key, value), grad_outputs=(grad_out,) - ) - sdpa_outs = [out_sdpa, *grads_sdpa] - - # Compare flex vs. SDPA output and grads - for flex, flex2, sdpa in zip(flex_outs, flex_outs2, sdpa_outs): - self.assertTrue(flex.is_nested and flex2.is_nested and sdpa.is_nested) - self.assertEqual(flex, sdpa, atol=1e-2, rtol=1e-2) - self.assertEqual(flex2, sdpa, atol=1e-2, rtol=1e-2) - - @onlyCUDA - @flex_attention_supported_platform - @dtypes(torch.float32) - def test_flex_attention_converts_stacked_seq_indices(self, device, dtype): - # This test verifies that a score_mod function written to operate within - # NJT sequence index space, such as a lookup table, works correctly. This - # validates that FlexAttention properly converts indices within the - # "stacked sequence" space used for NJT -> sequence-relative indices. - query, key, value = self._rand_qkv(device, dtype) - - # Test with score_mod - score_mod_table = torch.randn(query._max_seqlen, device=device, dtype=dtype) - - def my_score_mod(score, b, h, q_idx, kv_idx): - return score_mod_table[q_idx] - - flex_attention(query, key, value, score_mod=my_score_mod) - - # Test with batch-specific score_mod - batch_size = query.size(0) - batch_table = torch.randn(batch_size, device=device, dtype=dtype) - # Keep score the same for batch index == 0 - batch_table[0].zero_() - - def batch_specific_score_mod(score, b, h, q_idx, kv_idx): - return score + batch_table[b] - - def identity_score_mod(score, b, h, q_idx, kv_idx): - return score - - output = flex_attention(query, key, value, score_mod=batch_specific_score_mod) - output_identity = flex_attention( - query, key, value, score_mod=identity_score_mod - ) - - # Guard against a bug where the batch index passed to score_mod is always b == 0. - # Output would be equivalent to applying an identity score_mod. - # See https://github.com/pytorch/pytorch/issues/143788 - self.assertFalse(torch.allclose(output._values, output_identity._values)) - - # Test with mask_mod - mask_mod_table = score_mod_table > 0.0 - - def my_mask_mod(b, h, q_idx, kv_idx): - return mask_mod_table[q_idx] - - def my_mask_mod2(b, h, q_idx, kv_idx): - return mask_mod_table[q_idx] & (b == 0) - - block_mask = create_nested_block_mask(my_mask_mod, 1, 1, query, _compile=True) - output = flex_attention(query, key, value, block_mask=block_mask) - - block_mask2 = create_nested_block_mask(my_mask_mod2, 1, 1, query, _compile=True) - output2 = flex_attention(query, key, value, block_mask=block_mask2) - - # Guard against a bug where the batch index passed to mask_mod is always b == 0. - # See https://github.com/pytorch/pytorch/issues/143788 - self.assertFalse(torch.allclose(output._values, output2._values)) - @dtypes(torch.float32) def test_apply_(self, device, dtype): nt = random_nt_from_dims( diff --git a/torch/_higher_order_ops/flex_attention.py b/torch/_higher_order_ops/flex_attention.py index 2d352ae03a45c..b52bab0e32722 100644 --- a/torch/_higher_order_ops/flex_attention.py +++ b/torch/_higher_order_ops/flex_attention.py @@ -505,13 +505,6 @@ def flex_attention_fake_impl( ): return NotImplemented - # TODO: Figure out a better way to handle this for NJT than using sum() - if query.is_nested: - out = torch.empty_like(query, memory_format=torch.contiguous_format) - logsumexp = query.sum(dim=-1) - max_scores = query.max(dim=-1)[0] - return out, logsumexp, max_scores - v_head_dim = value.size(-1) batch_size, num_heads, seq_len_q, _q_head_dim = query.shape logsumexp = query.new_empty(batch_size, num_heads, seq_len_q, dtype=torch.float32) diff --git a/torch/nested/_internal/ops.py b/torch/nested/_internal/ops.py index 19b1fe670835f..8cec2634a30fc 100644 --- a/torch/nested/_internal/ops.py +++ b/torch/nested/_internal/ops.py @@ -2665,144 +2665,6 @@ def matmul_backward_default(func, *args, **kwargs): return (grad_self, grad_other) -from torch._higher_order_ops.flex_attention import ( - flex_attention as flex_attention_hop, - flex_attention_backward as flex_attention_backward_hop, -) -from torch.fx.graph_module import GraphModule - - -@flex_attention_hop.py_impl(NestedTensor) # type: ignore[misc] -def flex_njt( - query: torch.Tensor, - key: torch.Tensor, - value: torch.Tensor, - score_mod: Callable, - block_mask: Tuple, - scale: float, - kernel_options: Dict[str, Any], - score_mod_other_buffers: Tuple = (), - mask_mod_other_buffers: Tuple = (), -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: - assert query.dim() == 4 and key.dim() == 4 and value.dim() == 4 - - # TODO: Support this if needed; determine if NJT buffers need be unwrapped as dense. - if any( - isinstance(buf, torch.Tensor) and buf.is_nested - for buf in score_mod_other_buffers + mask_mod_other_buffers - ): - raise RuntimeError( - "flex_attention(): Nested tensor score_mod / mask_mod buffers are not " - "currently supported. Please file an issue if this is important to you." - ) - - # Always set them since 0 sized elements are not handled gracefully - kernel_options = {**kernel_options, "OUTPUT_MAX": True, "OUTPUT_LOGSUMEXP": True} - - # need to pass dense tensor of shape (B, n_heads, sum(seq_len), D) - output = flex_attention_hop( - query.values().unsqueeze(0), - key.values().unsqueeze(0), - value.values().unsqueeze(0), - score_mod=score_mod, - block_mask=block_mask, - scale=scale, - kernel_options=kernel_options, - score_mod_other_buffers=score_mod_other_buffers, - mask_mod_other_buffers=mask_mod_other_buffers, - ) - - # wrap outputs as NJT - output_njt = torch.nested.nested_tensor_from_jagged( - output[0].transpose(1, 2).squeeze(0), - query._offsets, # type: ignore[attr-defined] - query._lengths, # type: ignore[attr-defined] - min_seqlen=query._maybe_min_seqlen, # type: ignore[attr-defined] - max_seqlen=query._maybe_max_seqlen, # type: ignore[attr-defined] - ).transpose(1, 2) - - logsumexp_njt = torch.nested.nested_tensor_from_jagged( - output[1].transpose(1, 2).squeeze(0), - query._offsets, # type: ignore[attr-defined] - query._lengths, # type: ignore[attr-defined] - min_seqlen=query._maybe_min_seqlen, # type: ignore[attr-defined] - max_seqlen=query._maybe_max_seqlen, # type: ignore[attr-defined] - ).transpose(1, 2) - - max_scores_njt = torch.nested.nested_tensor_from_jagged( - output[2].transpose(1, 2).squeeze(0), - query._offsets, # type: ignore[attr-defined] - query._lengths, # type: ignore[attr-defined] - min_seqlen=query._maybe_min_seqlen, # type: ignore[attr-defined] - max_seqlen=query._maybe_max_seqlen, # type: ignore[attr-defined] - ).transpose(1, 2) - - return (output_njt, logsumexp_njt, max_scores_njt) - - -@flex_attention_backward_hop.py_impl(NestedTensor) # type: ignore[misc] -def flex_njt_backward( - query: torch.Tensor, - key: torch.Tensor, - value: torch.Tensor, - out: torch.Tensor, - logsumexp: torch.Tensor, - grad_out: torch.Tensor, - grad_logsumexp: torch.Tensor, - fw_graph: Union[Callable, GraphModule], - joint_graph: GraphModule, - block_mask: Tuple, - scale: float, - kernel_options: Dict[str, Any], - score_mod_other_buffers: Tuple = (), - mask_mod_other_buffers: Tuple = (), -) -> Tuple[ - torch.Tensor, torch.Tensor, torch.Tensor, Tuple[Optional[torch.Tensor], ...] -]: - output = flex_attention_backward_hop( - query.values().unsqueeze(0), - key.values().unsqueeze(0), - value.values().unsqueeze(0), - out=out.values().unsqueeze(0), - logsumexp=logsumexp.values().unsqueeze(0), - grad_out=grad_out.values().unsqueeze(0), - grad_logsumexp=grad_logsumexp.values().unsqueeze(0), - fw_graph=fw_graph, - joint_graph=joint_graph, - block_mask=block_mask, - scale=scale, - kernel_options=kernel_options, - score_mod_other_buffers=score_mod_other_buffers, - mask_mod_other_buffers=mask_mod_other_buffers, - ) - - # wrap grads as NJTs - dense_q_grad, dense_k_grad, dense_v_grad, score_mod_other_buffer_grads = output - njt_q_grad = torch.nested.nested_tensor_from_jagged( - dense_q_grad.transpose(1, 2).squeeze(0), - query._offsets, # type: ignore[attr-defined] - query._lengths, # type: ignore[attr-defined] - min_seqlen=query._maybe_min_seqlen, # type: ignore[attr-defined] - max_seqlen=query._maybe_max_seqlen, # type: ignore[attr-defined] - ).transpose(1, 2) - njt_k_grad = torch.nested.nested_tensor_from_jagged( - dense_k_grad.transpose(1, 2).squeeze(0), - key._offsets, # type: ignore[attr-defined] - key._lengths, # type: ignore[attr-defined] - min_seqlen=key._maybe_min_seqlen, # type: ignore[attr-defined] - max_seqlen=key._maybe_max_seqlen, # type: ignore[attr-defined] - ).transpose(1, 2) - njt_v_grad = torch.nested.nested_tensor_from_jagged( - dense_v_grad.transpose(1, 2).squeeze(0), - value._offsets, # type: ignore[attr-defined] - value._lengths, # type: ignore[attr-defined] - min_seqlen=value._maybe_min_seqlen, # type: ignore[attr-defined] - max_seqlen=value._maybe_max_seqlen, # type: ignore[attr-defined] - ).transpose(1, 2) - - return (njt_q_grad, njt_k_grad, njt_v_grad, score_mod_other_buffer_grads) - - # Make the dummy available on the C++ side. @register_jagged_func(torch.ops.aten._nested_get_jagged_dummy.default, "self: any") def _nested_get_jagged_dummy(func, *args, **kwargs): diff --git a/torch/nn/attention/flex_attention.py b/torch/nn/attention/flex_attention.py index ccd5697aa49c5..a6d6e1228a324 100644 --- a/torch/nn/attention/flex_attention.py +++ b/torch/nn/attention/flex_attention.py @@ -74,7 +74,6 @@ def _warn_once( "FlexKernelOptions", "create_block_mask", "create_mask", - "create_nested_block_mask", "or_masks", "and_masks", "noop_mask", @@ -1111,179 +1110,6 @@ def _create_empty_block_mask(query: Tensor, key: Tensor) -> BlockMask: ) -def _nested_mod_func_adapter( - orig_mod_func: Union[_score_mod_signature, _mask_mod_signature], - q_nt: torch.Tensor, - kv_nt: torch.Tensor, - is_score_mod: bool, -) -> Union[_score_mod_signature, _mask_mod_signature]: - r"""Adapter to convert a score_mod / mask_mod to be NJT-compatible. The given mod func - should be written as if operating over a single sequence at a item. This adapter will - handle conversion from indices operating over a "stacked sequence" of length ``sum(S)`` - for sequence length ``S`` in the NJT to "sequence relative" indices in range ``[0, S)``. - - Args: - orig_mod_func (Callable): Function to modify attention scores. It takes four or five - arguments, depending on whether a mask_mod or score_mod func is passed. - q_nt (torch.Tensor): Jagged layout nested tensor (NJT) that defines the sequence length - structure for query. - kv_nt (torch.Tensor): Jagged layout nested tensor (NJT) that defines the sequence length - structure for key / value. - is_score_mod (bool): Indicates whether the mod function is a score_mod. - - Returns: - nt_score_mod: An NJT-compatible version of orig_score_mod - """ - - # Used to convert indices within the "stacked" sequence (range [0, sum(*))) - # to "sequence local" indices (range [0, S) for each S). - def _build_seq_idx(offsets, total_length): - range_tensor = torch.arange( - total_length, device=offsets.device, dtype=torch.int32 - ) - - # Use searchsorted to find the index for each position - # NB: This assumes offsets[0] to offsets[-1] spans the packed dim of values. - # If we ever loosen this restriction, this logic will need to be updated. - seq_idx = torch.searchsorted(offsets, range_tensor, right=True) - 1 - return seq_idx - - q_offsets = q_nt._offsets # type: ignore[attr-defined] - kv_offsets = kv_nt._offsets # type: ignore[attr-defined] - q_seq_idx = _build_seq_idx(q_offsets, q_nt._values.shape[q_nt._ragged_idx - 1]) # type: ignore[attr-defined] - if q_nt is kv_nt: - kv_seq_idx = q_seq_idx - else: - # cross attention case - kv_seq_idx = _build_seq_idx( - kv_offsets, - kv_nt._values.shape[kv_nt._ragged_idx - 1], # type: ignore[attr-defined] - ) - - # Converts q_idx / kv_idx from [0, total_length) -> [0, S), where S refers - # to the sequence length for each sequence in the NJT, for use in given - # score_mod. This allows the user to write a score_mod as if it were - # operating on a single sequence and the "stacked sequence" is split - # automatically into individual sequences for them. - if is_score_mod: - - def nt_score_mod(score, b, h, q_idx, kv_idx): - b_nested = q_seq_idx[q_idx] - q_nested = q_idx - q_offsets[q_seq_idx[q_idx]] - kv_nested = kv_idx - kv_offsets[kv_seq_idx[kv_idx]] - is_same_sequence = q_seq_idx[q_idx] == kv_seq_idx[kv_idx] - return torch.where( - is_same_sequence, - orig_mod_func(score, b_nested, h, q_nested, kv_nested), # type: ignore[call-arg] - # don't allow inter-sequence attention - float("-inf"), - ) - - return nt_score_mod - else: - - def nt_mask_mod(b, h, q_idx, kv_idx): - b_nested = q_seq_idx[q_idx] - q_nested = q_idx - q_offsets[q_seq_idx[q_idx]] - kv_nested = kv_idx - kv_offsets[kv_seq_idx[kv_idx]] - # don't allow inter-sequence attention - is_same_sequence = q_seq_idx[q_idx] == kv_seq_idx[kv_idx] - return orig_mod_func(b_nested, h, q_nested, kv_nested) & is_same_sequence # type: ignore[call-arg] - - return nt_mask_mod - - -def create_nested_block_mask( - mask_mod: _mask_mod_signature, - B: Optional[int], - H: Optional[int], - q_nt: torch.Tensor, - kv_nt: Optional[torch.Tensor] = None, - BLOCK_SIZE: Union[int, tuple[int, int]] = _DEFAULT_SPARSE_BLOCK_SIZE, - _compile=False, -) -> BlockMask: - r"""This function creates a nested tensor compatible block mask tuple from a mask_mod - function. The returned BlockMask will be on the device specified by the input nested tensor. - - Args: - mask_mod (Callable): mask_mod function. This is a callable that defines the - masking pattern for the attention mechanism. It takes four arguments: - b (batch size), h (number of heads), q_idx (query index), and kv_idx (key/value index). - It should return a boolean tensor indicating which attention connections are allowed - (True) or masked out (False). - B (int): Batch size. - H (int): Number of query heads. - q_nt (torch.Tensor): Jagged layout nested tensor (NJT) that defines the sequence length - structure for query. The block mask will be constructed to operate on a "stacked - sequence" of length ``sum(S)`` for sequence length ``S`` from the NJT. - kv_nt (torch.Tensor): Jagged layout nested tensor (NJT) that defines the sequence length - structure for key / value, allowing for cross attention. The block mask will be - constructed to operate on a "stacked sequence" of length ``sum(S)`` for sequence - length ``S`` from the NJT. If this is None, ``q_nt`` is used to define the structure - for key / value as well. Default: None - BLOCK_SIZE (int or tuple[int, int]): Block size for the block mask. If a single int is - provided it is used for both query and key/value. - - Returns: - BlockMask: A BlockMask object that contains the block mask information. - - Example Usage: - .. code-block:: python - - # shape (B, num_heads, seq_len*, D) where seq_len* varies across the batch - query = torch.nested.nested_tensor(..., layout=torch.jagged) - key = torch.nested.nested_tensor(..., layout=torch.jagged) - value = torch.nested.nested_tensor(..., layout=torch.jagged) - - - def causal_mask(b, h, q_idx, kv_idx): - return q_idx >= kv_idx - - - block_mask = create_nested_block_mask( - causal_mask, 1, 1, query, _compile=True - ) - output = flex_attention(query, key, value, block_mask=block_mask) - - .. code-block:: python - - # shape (B, num_heads, seq_len*, D) where seq_len* varies across the batch - query = torch.nested.nested_tensor(..., layout=torch.jagged) - key = torch.nested.nested_tensor(..., layout=torch.jagged) - value = torch.nested.nested_tensor(..., layout=torch.jagged) - - - def causal_mask(b, h, q_idx, kv_idx): - return q_idx >= kv_idx - - - # cross attention case: pass both query and key/value NJTs - block_mask = create_nested_block_mask( - causal_mask, 1, 1, query, key, _compile=True - ) - output = flex_attention(query, key, value, block_mask=block_mask) - """ - # use same structure for kv as for q by default - if kv_nt is None: - kv_nt = q_nt - if q_nt.device != kv_nt.device: - raise ValueError( - "create_nested_block_mask(): Expected q_nt and kv_nt to be on the same device" - ) - return create_block_mask( - _nested_mod_func_adapter(mask_mod, q_nt, kv_nt, is_score_mod=False), # type: ignore[arg-type] - B, - H, - q_nt._values.shape[q_nt._ragged_idx - 1], # type: ignore[attr-defined] - kv_nt._values.shape[kv_nt._ragged_idx - 1], # type: ignore[attr-defined] - device=q_nt.device, # type: ignore[arg-type] - # compile is important so we don't materialize a mask_tensor of - # shape (1, 1, total_seqlen, total_seqlen) - BLOCK_SIZE=BLOCK_SIZE, - _compile=_compile, - ) - - def _apply_kernel_options( query: Tensor, key: Tensor, @@ -1359,25 +1185,6 @@ def _validate_device(query: Tensor, key: Tensor, value: Tensor): ) -def _validate_nestedness(query: Tensor, key: Tensor, value: Tensor): - # Currently, inputs can only be all nested or no nested. - if query.is_nested != key.is_nested or key.is_nested != value.is_nested: - raise ValueError( - "FlexAttention does not support mixed nested tensor / non-nested tensor inputs. " - "Please file an issue requesting this if it is important to you." - ) - - if ( - (query.is_nested and query._lengths is not None) # type: ignore[attr-defined] - or (key.is_nested and key._lengths is not None) # type: ignore[attr-defined] - or (value.is_nested and value._lengths is not None) # type: ignore[attr-defined] - ): - raise ValueError( - "FlexAttention does not support nested tensors that are non-contiguous with holes. " - "Please file an issue requesting this if it is important to you." - ) - - def _enforce_mem_layouts( query: Tensor, key: Tensor, value: Tensor ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: @@ -1517,7 +1324,6 @@ def score_mod( _validate_sdpa_input(query, key, value) _validate_embed_dim(query, key, value) _validate_device(query, key, value) - _validate_nestedness(query, key, value) query, key, value = _enforce_mem_layouts(query, key, value) if query.dim() != 4 or key.dim() != 4 or value.dim() != 4: raise NotImplementedError("NYI: query, key, and value must be 4D tensors") @@ -1552,14 +1358,6 @@ def score_mod( if score_mod is None: score_mod = _identity - elif query.is_nested: - # use same NJT if the ragged structures for sequence lengths match between q and kv - kv = ( - query - if query.size(query._ragged_idx) == key.size(query._ragged_idx) # type: ignore[attr-defined] - else key - ) - score_mod = _nested_mod_func_adapter(score_mod, query, kv, is_score_mod=True) # type: ignore[assignment] if block_mask is None: block_mask = _create_empty_block_mask(query, key) @@ -1570,12 +1368,6 @@ def score_mod( ): # This corresponds to the case where we essentially have a "no-op" block mask. pass - elif query.is_nested: - if block_mask.shape[-2] != query._values.size(query._ragged_idx - 1): # type: ignore[attr-defined] - raise RuntimeError( - f"block_mask of shape {block_mask.shape} is not compatible with nested tensor input " - f"with total sequence length of {query._values.size(query._ragged_idx - 1)}" # type: ignore[attr-defined] - ) else: block_mask_q_len = block_mask.shape[-2] block_mask_kv_len = block_mask.shape[-1] From e900a274e5c7fd7f0c76a991ad182c635f807c83 Mon Sep 17 00:00:00 2001 From: Michael Kelly Date: Tue, 16 Sep 2025 00:23:44 +0000 Subject: [PATCH 270/693] Add `CUDA_KERNEL_ASSERT_PRINTF`, a more flexible `CUDA_KERNEL_ASSERT_MSG` (#160129) This new assertion helper bundles a printf call with the assertion. The goal is to make changes to instrument asserts with device-side information more intuitive and less error-prone. (See the printf call in ATen/native/cuda/Repeat.cu.) Parametrized error messages are a substantial improvement in debuggability because they show the mismatched device-side values. This lets us avoid a whole cycle of rebuilding + re-running failing training workflows. We include file, line number, function, and failing condition in the printf (along with the message provided by the user). The format matches the format of the message output by `__assert_fail`. There's also an easy-to-grep-for keyword `CUDA_KERNEL_ASSERT` in the message. I'm following the existing patterns of arch-specific macros - e.g., on ROCm, this is just a call to abort(), just like the other `CUDA_KERNEL_ASSERT*` variations. I'd appreciate any thoughts on architecture-specific testing (most likely on the OSS side). # Alternatives * We could just update `CUDA_KERNEL_ASSERT_MSG`. That would mean introducing `printf` calls from the kernel where there weren't any before, though. This seems like a bad idea because of the performance sensitivity. * If we want to move more slowly here, I could instrument more `CUDA_KERNEL_ASSERT` callsites without a macro, similar to https://github.com/pytorch/pytorch/pull/157996. But the main downside here is the performance hit, so let's have an organized way of doing it first. # Risks/Problems * We're shoving a lot of stuff into this printf. If a filename (at compile-time) contains `%s`, we will end up dereferencing whatever value was pushed in. On a CPU this can cause a segfault. I don't know how it behaves on a GPU. * Adding printf calls can have a performance impact because of increased register and stack usage. I did not see this play out in practice (see "benchmarks" below). However, there are changes to the generated PTX that could result in performance problems later (see "changes in generated PTX" below). # Benchmarks * I ran the following benchmarks a several times on a host with an A100: https://gist.github.com/mjkatmeta/e5494d949204a2afe2d43c452b99424f * Results are here -- I couldn't find a significant difference before or after https://gist.github.com/mjkatmeta/0f99ec27bb91214fb2cc7f612938d431 # Change in generated PTX This is the easiest way I found to run nvcc over just Repeat.cu (this is a buck2 target that includes just a copy of Repeat.cu): ``` buck2 build --show-output scripts/mjk/ai_training/cuda_benchmarks:repeat_cuda # then use the printed .so file like this: ~/fbsource/third-party/cuda/cuda_12.8.0/x64-linux/bin/cuobjdump -ptx ../buck-out/v2/gen/fbcode/028bde1acfaba823/scripts/mjk/ai_training/cuda_benchmarks/__repeat_cuda__/libscripts_mjk_ai_training_cuda_benchmarks_repeat_cuda.so ``` ## with printf This is the version of the code that appears in this diff: https://gist.github.com/mjkatmeta/5d18d48282d46b2240d946b335052b9a ## without printf I recompiled, replacing `CUDA_KERNEL_ASSERT_PRINTF(...)` in Repeat.cu with: ``` CUDA_KERNEL_ASSERT(result_size == cumsum_ptr[size - 1]); ``` https://gist.github.com/mjkatmeta/480df4b3a122e7b326554dd15ebb7c9d (Both of these are annotated with `// CHAR ARRAY:` comments to make the string constants easier to read.) Test Plan: Running this minimal test case: ``` import torch def main(): x = torch.ones(10, dtype=torch.int64, device="cuda:0") torch.repeat_interleave(x, x, output_size=0) ``` Now we see the new message (from printf) alongside the assert failure: ``` $ buck2 run fbcode//scripts/darshanr/repeat_interleave_errors:repeat_interleave_errors [...] [CUDA_KERNEL_ASSERT] fbcode/caffe2/aten/src/ATen/native/cuda/Repeat.cu:25: compute_cuda_kernel: block: [0,0,0], thread: [31,0,0]: Assertion failed: `result_size == cumsum_ptr[size - 1]`: Invalid input! In `repeat_interleave`, the `output_size` argument (0) must be the same as the sum of the elements in the `repeats` tensor (10). fbcode/caffe2/aten/src/ATen/native/cuda/Repeat.cu:25: compute_cuda_kernel: block: [0,0,0], thread: [384,0,0] Assertion `result_size == cumsum_ptr[size - 1]` failed. [...[ ``` Rollback Plan: Reviewed By: mradmila Differential Revision: D79310684 Pull Request resolved: https://github.com/pytorch/pytorch/pull/160129 Approved by: https://github.com/ngimel --- aten/src/ATen/native/cuda/Repeat.cu | 9 +++---- torch/headeronly/macros/Macros.h | 42 +++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/aten/src/ATen/native/cuda/Repeat.cu b/aten/src/ATen/native/cuda/Repeat.cu index 1e2364ae50913..e131081af153d 100644 --- a/aten/src/ATen/native/cuda/Repeat.cu +++ b/aten/src/ATen/native/cuda/Repeat.cu @@ -17,12 +17,11 @@ __global__ static void compute_cuda_kernel( index_t* result_ptr, int64_t size, int64_t result_size) { - if (C10_UNLIKELY((result_size != cumsum_ptr[size - 1]))) { - printf("%s:%d:%s: block: [%d,%d,%d], thread: [%d,%d,%d] " + CUDA_KERNEL_ASSERT_PRINTF( + result_size == cumsum_ptr[size - 1], "Invalid input! In `repeat_interleave`, the `output_size` argument (%ld) must be the same as the sum of the elements in the `repeats` tensor (%ld).\n", - __FILE__, __LINE__, __func__,blockIdx.x, blockIdx.y, blockIdx.z, threadIdx.x, threadIdx.y, threadIdx.z, result_size, cumsum_ptr[size - 1 ]); - CUDA_KERNEL_ASSERT(result_size == cumsum_ptr[size - 1]) - } + result_size, + cumsum_ptr[size - 1]); int64_t idx = ((int64_t) blockIdx.x) * blockDim.x + threadIdx.x; int64_t stride = (blockDim.x * gridDim.x) / C10_WARP_SIZE; diff --git a/torch/headeronly/macros/Macros.h b/torch/headeronly/macros/Macros.h index 558edb175ae29..e340e7626a094 100644 --- a/torch/headeronly/macros/Macros.h +++ b/torch/headeronly/macros/Macros.h @@ -359,6 +359,7 @@ static inline int C10_WARP_SIZE_INTERNAL() { // Those platforms do not support assert() #define CUDA_KERNEL_ASSERT(cond) #define CUDA_KERNEL_ASSERT_MSG(cond, msg) +#define CUDA_KERNEL_ASSERT_PRINTF(cond, msg, ...) #define SYCL_KERNEL_ASSERT(cond) #elif defined(_MSC_VER) #if defined(NDEBUG) @@ -396,6 +397,26 @@ __host__ __device__ static_cast(__LINE__)), \ 0); \ } +#define CUDA_KERNEL_ASSERT_PRINTF(cond, msg, ...) \ + if (C10_UNLIKELY(!(cond))) { \ + (void)(printf( \ + "[CUDA_KERNEL_ASSERT] " __FILE__ ":" C10_STRINGIZE( \ + __LINE__) ": %s: block: [%d,%d,%d], thread: [%d,%d,%d]: " \ + "Assertion failed: `" #cond "`: " msg "\n", \ + __func__, \ + blockIdx.x, \ + blockIdx.y, \ + blockIdx.z, \ + threadIdx.x, \ + threadIdx.y, \ + threadIdx.z, \ + ##__VA_ARGS__)); \ + (void)(_wassert( \ + _CRT_WIDE(#cond), \ + _CRT_WIDE(__FILE__), \ + static_cast(__LINE__)), \ + 0); \ + } #define SYCL_KERNEL_ASSERT(cond) \ if (C10_UNLIKELY(!(cond))) { \ (void)(_wassert( \ @@ -455,6 +476,10 @@ __host__ __device__ if C10_UNLIKELY (!(cond)) { \ abort(); \ } +#define CUDA_KERNEL_ASSERT_PRINTF(cond, msg, ...) \ + if C10_UNLIKELY (!(cond)) { \ + abort(); \ + } #define SYCL_KERNEL_ASSERT(cond) \ if C10_UNLIKELY (!(cond)) { \ abort(); \ @@ -470,6 +495,23 @@ __host__ __device__ __assert_fail( \ msg, __FILE__, static_cast(__LINE__), __func__); \ } +#define CUDA_KERNEL_ASSERT_PRINTF(cond, msg, ...) \ + if (C10_UNLIKELY(!(cond))) { \ + printf( \ + "[CUDA_KERNEL_ASSERT] " __FILE__ ":" C10_STRINGIZE( \ + __LINE__) ": %s: block: [%d,%d,%d], thread: [%d,%d,%d]: " \ + "Assertion failed: `" #cond "`: " msg "\n", \ + __func__, \ + blockIdx.x, \ + blockIdx.y, \ + blockIdx.z, \ + threadIdx.x, \ + threadIdx.y, \ + threadIdx.z, \ + ##__VA_ARGS__); \ + __assert_fail( \ + #cond, __FILE__, static_cast(__LINE__), __func__); \ + } #define SYCL_KERNEL_ASSERT(cond) \ if (C10_UNLIKELY(!(cond))) { \ __assert_fail( \ From 05ee8114f818a95745c812c3cd7aa8e784e61a9a Mon Sep 17 00:00:00 2001 From: PaliC Date: Tue, 16 Sep 2025 00:37:06 +0000 Subject: [PATCH 271/693] [BE] Make PyObjectSlot use a global PyInterpreter (#162659) This pr gets rid of the pyobj_interpreter_ variable from PyObjectSlot and saves a word in the process Gonna ask for review from @huydhn as there are some changes to CI. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162659 Approved by: https://github.com/albanD, https://github.com/huydhn --- .github/workflows/pull.yml | 2 + c10/core/TensorImpl.h | 2 +- c10/core/impl/PyInterpreterHooks.h | 7 ++-- c10/core/impl/PyObjectSlot.cpp | 10 ++--- c10/core/impl/PyObjectSlot.h | 50 +++++++------------------ functorch/csrc/dim/dim.cpp | 3 +- torch/csrc/Module.cpp | 6 +-- torch/csrc/PyInterpreter.cpp | 6 +-- torch/csrc/Storage.cpp | 14 +++---- torch/csrc/autograd/python_variable.cpp | 17 ++++----- 10 files changed, 43 insertions(+), 74 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 3f13fbf276882..ff6e9ed107117 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -127,6 +127,8 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: + # More memory is needed to build with asan + runner: linux.2xlarge.memory runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-py3.10-clang18-asan docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan diff --git a/c10/core/TensorImpl.h b/c10/core/TensorImpl.h index 972181327b1f6..98867da60a7ff 100644 --- a/c10/core/TensorImpl.h +++ b/c10/core/TensorImpl.h @@ -3269,7 +3269,7 @@ class C10_TensorImpl_Size_Check_Dummy_Class : private TensorImpl { is_le(); is_le(); are_equal(); - are_equal(); + are_equal(); are_equal(); are_equal(); are_equal(); diff --git a/c10/core/impl/PyInterpreterHooks.h b/c10/core/impl/PyInterpreterHooks.h index 32a17ad9a8a0c..4fe025d2e778b 100644 --- a/c10/core/impl/PyInterpreterHooks.h +++ b/c10/core/impl/PyInterpreterHooks.h @@ -13,11 +13,10 @@ struct C10_API PyInterpreterHooksInterface { // Get the PyInterpreter instance // Stub implementation throws error when Python is not available + // We return nullptr rather than throwing an error since there are bits of c10 + // that expect an empty PyObjectSlot when python is not available. virtual PyInterpreter* getPyInterpreter() const { - TORCH_CHECK( - false, - "PyTorch was compiled without Python support. " - "Cannot access Python interpreter from C++."); + return nullptr; } }; diff --git a/c10/core/impl/PyObjectSlot.cpp b/c10/core/impl/PyObjectSlot.cpp index 0f1bfb2110747..7476ac1d4c39b 100644 --- a/c10/core/impl/PyObjectSlot.cpp +++ b/c10/core/impl/PyObjectSlot.cpp @@ -2,7 +2,7 @@ namespace c10::impl { -PyObjectSlot::PyObjectSlot() : pyobj_interpreter_(nullptr), pyobj_(nullptr) {} +PyObjectSlot::PyObjectSlot() : pyobj_(nullptr) {} PyObjectSlot::~PyObjectSlot() { maybe_destroy_pyobj(); @@ -10,9 +10,9 @@ PyObjectSlot::~PyObjectSlot() { void PyObjectSlot::maybe_destroy_pyobj() { if (owns_pyobj()) { - TORCH_INTERNAL_ASSERT(pyobj_interpreter_ != nullptr); + TORCH_INTERNAL_ASSERT(getGlobalPyInterpreter() != nullptr); TORCH_INTERNAL_ASSERT(pyobj_ != nullptr); - (*pyobj_interpreter_.load(std::memory_order_acquire)) + (*getGlobalPyInterpreter()) ->decref(_unchecked_untagged_pyobj(), /*has_pyobj_slot*/ true); // NB: this destructor can only be entered when there are no // references to this C++ object (obviously), NOR any references @@ -25,7 +25,7 @@ void PyObjectSlot::maybe_destroy_pyobj() { } PyInterpreter* PyObjectSlot::pyobj_interpreter() { - return pyobj_interpreter_.load(std::memory_order_acquire); + return getGlobalPyInterpreter(); } PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const { @@ -35,7 +35,7 @@ PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const { } PyInterpreter& PyObjectSlot::load_pyobj_interpreter() const { - auto interpreter = pyobj_interpreter_.load(std::memory_order_acquire); + auto interpreter = getGlobalPyInterpreter(); if (interpreter) { return *interpreter; } diff --git a/c10/core/impl/PyObjectSlot.h b/c10/core/impl/PyObjectSlot.h index 58b2490eba001..e7d78f8360c33 100644 --- a/c10/core/impl/PyObjectSlot.h +++ b/c10/core/impl/PyObjectSlot.h @@ -6,10 +6,17 @@ #include #include -#include - namespace c10::impl { +// Function pointer type for getting the global interpreter +using GetPyInterpreterFn = PyInterpreter* (*)(); + +// Global function pointer (set by csrc initialization) +C10_API extern GetPyInterpreterFn g_get_pyinterpreter_fn; + +// Helper function to get the global interpreter +C10_API PyInterpreter* getGlobalPyInterpreter(); + struct C10_API PyObjectSlot { public: PyObjectSlot(); @@ -26,8 +33,6 @@ struct C10_API PyObjectSlot { // NB: THIS FUNCTION CAN RAISE AN EXCEPTION. Make sure to clean up after // PyObject if necessary! void init_pyobj(PyObject* pyobj) { - pyobj_interpreter_.store( - getGlobalPyInterpreter(), std::memory_order_relaxed); pyobj_ = pyobj; } @@ -55,18 +60,15 @@ struct C10_API PyObjectSlot { // @todo alban: I'm not too sure what's going on here, we can probably delete // it but it's worthwhile making sure - std::optional check_pyobj(bool ignore_hermetic_tls = false) const { - impl::PyInterpreter* interpreter = - pyobj_interpreter_.load(std::memory_order_acquire); - if (interpreter == nullptr) { + std::optional check_pyobj() const { + impl::PyInterpreter* interpreter = getGlobalPyInterpreter(); + if (interpreter == nullptr || pyobj_ == nullptr) { return std::nullopt; } - - if (!ignore_hermetic_tls && c10::impl::HermeticPyObjectTLS::get_state()) { + if (c10::impl::HermeticPyObjectTLS::get_state()) { return std::nullopt; - } else { - return _unchecked_untagged_pyobj(); } + return _unchecked_untagged_pyobj(); } PyInterpreter& load_pyobj_interpreter() const; @@ -76,30 +78,6 @@ struct C10_API PyObjectSlot { void set_owns_pyobj(bool b); private: - // This field contains the interpreter tag for this object. See - // Note [Python interpreter tag] for general context - // - // Note [Memory ordering on Python interpreter tag] - // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - // What memory_order do we need when accessing this atomic? We don't - // need a single total modification order (as provided by - // memory_order_seq_cst) as pyobj_interpreter_ is monotonic: it can only - // transition from -1 to some positive integer and never changes afterwards. - // Because there is only one modification, it trivially already has a total - // modification order (e.g., we don't need fences or locked instructions on - // x86) - // - // In fact, one could make a reasonable argument that relaxed reads are OK, - // due to the presence of external locking (GIL) to ensure that interactions - // with other data structures are still correctly synchronized, so that - // we fall in the "Single-Location Data Structures" case as described in - // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf - // However, on x86, it doesn't matter if I use acquire or relaxed on the load - // as I get the same assembly in both cases. So I just use the more - // conservative acquire (which will impede compiler optimizations but I don't - // care) - std::atomic pyobj_interpreter_; - // This field contains a reference to a PyObject representing this Tensor. // If pyobj is nullptr, when we transfer Tensor to Python, we allocate a new // PyObject for it and set this field. This field does not have to be diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp index 8f1e561e2051b..5258ba52f99c5 100644 --- a/functorch/csrc/dim/dim.cpp +++ b/functorch/csrc/dim/dim.cpp @@ -1187,8 +1187,7 @@ int64_t _Tensor_ndim(mpy::handle h) { mpy::handle handle_from_tensor(Arena& A, TensorRef t) { // fast case: tensor is live in python std::optional mb_obj = - t->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/false); + t->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); if (mb_obj.has_value() && !t->unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj()) { return *mb_obj; diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index ac2b03d2651cc..3a3e8bfef0478 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -403,11 +403,9 @@ static PyObject* THPModule_swap_tensor_impl(PyObject* _unused, PyObject* args) { // The TensorImpls contain PyObjectSlots that have a reference to the PyObject // associated with the TensorImpl. Swap this field as well. std::optional mb_obj_a = - a->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/false); + a->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); std::optional mb_obj_b = - b->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/false); + b->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); TORCH_INTERNAL_ASSERT( mb_obj_a.has_value() && mb_obj_b.has_value(), "Both tensors should have PyObjects tagged by the current python interpreter"); diff --git a/torch/csrc/PyInterpreter.cpp b/torch/csrc/PyInterpreter.cpp index e6016a7721e8b..993f8b8216a6b 100644 --- a/torch/csrc/PyInterpreter.cpp +++ b/torch/csrc/PyInterpreter.cpp @@ -614,8 +614,7 @@ static void set_tensor_attr_with_capsule( const c10::TensorImpl* tensor, py::capsule& capsule, const char* attr_name) { - std::optional mb_obj = tensor->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/false); + std::optional mb_obj = tensor->pyobj_slot()->check_pyobj(); TORCH_CHECK( mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value"); auto obj = mb_obj.value(); @@ -642,8 +641,7 @@ static c10::ArrayRef get_set_cached_attr( const c10::TensorImpl* tensor, const char* base_attr_name, const py::object& obj) { - std::optional mb_obj = - tensor->pyobj_slot()->check_pyobj(getPyInterpreter()); + std::optional mb_obj = tensor->pyobj_slot()->check_pyobj(); TORCH_CHECK( mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value"); auto tensor_obj = mb_obj.value(); diff --git a/torch/csrc/Storage.cpp b/torch/csrc/Storage.cpp index 08112b41aaaed..f6638bbd10c19 100644 --- a/torch/csrc/Storage.cpp +++ b/torch/csrc/Storage.cpp @@ -41,8 +41,8 @@ PyObject* THPStorage_NewWithStorage( "Creating a Storage subclass from a class that does not inherit from ", "Storage is not possible. Make sure your class inherits from Storage."); - auto maybe_pyobj = _storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/false); + auto maybe_pyobj = + _storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj(); if (maybe_pyobj.has_value() && maybe_pyobj.value()) { TORCH_CHECK( allow_preexisting_pyobj, @@ -93,8 +93,7 @@ PyObject* THPStorage_Wrap(c10::Storage storage) { } c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot(); - std::optional maybe_pyobj = pyobj_slot->check_pyobj( - /*ignore_hermetic_tls=*/false); + std::optional maybe_pyobj = pyobj_slot->check_pyobj(); if (maybe_pyobj.has_value()) { auto obj = *maybe_pyobj; if (obj) { @@ -127,8 +126,8 @@ static bool THPStorage_isPreservable(THPStorage* self) { return false; } - if (storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/true) != (PyObject*)self) { + if (storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj() != + (PyObject*)self) { return false; } if (storage.use_count() <= 1) { @@ -145,8 +144,7 @@ static bool THPStorage_tryPreserve(THPStorage* self) { const auto& storage = THPStorage_Unpack(self); c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl(); - auto maybe_pyobj = storage_impl->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/true); + auto maybe_pyobj = storage_impl->pyobj_slot()->check_pyobj(); // NOTE: It is possible to just set the PyObjectSlot here, but the point is // that we should have already set PyObjectSlot when the storage PyObject // was created. diff --git a/torch/csrc/autograd/python_variable.cpp b/torch/csrc/autograd/python_variable.cpp index 7ec4bf28e1604..bbda3adc2b275 100644 --- a/torch/csrc/autograd/python_variable.cpp +++ b/torch/csrc/autograd/python_variable.cpp @@ -265,8 +265,7 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) { } std::optional mb_obj = - var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/false); + var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); if (mb_obj.has_value()) { auto obj = *mb_obj; if (obj) { @@ -329,8 +328,8 @@ static bool isResurrectable(THPVariable* self) { return false; } // Check if this is hermetic. If it is, no resurrection. - if (tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/false) != (PyObject*)self) { + if (tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj() != + (PyObject*)self) { return false; } return true; @@ -355,8 +354,7 @@ static bool THPVariable_tryResurrect(THPVariable* self) { !tensor.unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj()); c10::TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl(); - auto maybe_pyobj = tensor_impl->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/false); + auto maybe_pyobj = tensor_impl->pyobj_slot()->check_pyobj(); TORCH_INTERNAL_ASSERT( maybe_pyobj.has_value(), @@ -1934,8 +1932,8 @@ static int THPVariable_subclass_clear(THPVariable* self) { // because Tensor asked us to (it's already destructing). if (!self->cdata.unsafeIsBorrowed() && - tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/false) == (PyObject*)self) { + tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj() == + (PyObject*)self) { // TODO: empirically, on OS X this assert appears to be untrue // In test_py_tensors_multi_async_call - ProcessGroupRpcTestWithSpawn // distributed/rpc/test_process_group_agent.py @@ -2121,8 +2119,7 @@ static PyObject* THPVariable_NewWithVar( // This function overwrite the Tensor's pyobj field without extra checks // Make sure it is not set otherwise we would leak memory - auto mb_obj = _var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( - /*ignore_hermetic_tls=*/false); + auto mb_obj = _var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); // Under some circumstances, we may attempt to create a new Python // object for a variable that already has a Python object. The most common From 505458db803e1ffabac08a2fc150b566d3ea3a57 Mon Sep 17 00:00:00 2001 From: Simon Fan Date: Mon, 15 Sep 2025 11:22:48 -0700 Subject: [PATCH 272/693] [dynamo][hop] Introduce Local Map HOP (#161458) Can't actually deploy it because of: https://github.com/pytorch/pytorch/issues/161456 Pull Request resolved: https://github.com/pytorch/pytorch/pull/161458 Approved by: https://github.com/ydwu4 --- test/dynamo/test_higher_order_ops.py | 1 + test/higher_order_ops/test_local_map.py | 203 ++++++++++++ test/inductor/test_compiled_autograd.py | 2 +- torch/_dynamo/variables/builder.py | 7 +- torch/_dynamo/variables/higher_order_ops.py | 111 +++++++ torch/_higher_order_ops/__init__.py | 2 + torch/_higher_order_ops/local_map.py | 327 ++++++++++++++++++++ torch/distributed/tensor/_ops/_view_ops.py | 4 +- torch/testing/_internal/hop_db.py | 38 +++ 9 files changed, 692 insertions(+), 3 deletions(-) create mode 100644 test/higher_order_ops/test_local_map.py create mode 100644 torch/_higher_order_ops/local_map.py diff --git a/test/dynamo/test_higher_order_ops.py b/test/dynamo/test_higher_order_ops.py index 9f093d4dc0cea..78943b41bc262 100644 --- a/test/dynamo/test_higher_order_ops.py +++ b/test/dynamo/test_higher_order_ops.py @@ -7197,6 +7197,7 @@ def false_branch(x): # aot_eager "map", # assert type(args[1].realize()) is TensorVariable "scan", # scan is not an OpOverload + "local_map_hop", # can't retrace # inductor "while_loop", # LoweringException: AssertionError "flex_attention", # LoweringException: AssertionError diff --git a/test/higher_order_ops/test_local_map.py b/test/higher_order_ops/test_local_map.py new file mode 100644 index 0000000000000..46ecacc2b330c --- /dev/null +++ b/test/higher_order_ops/test_local_map.py @@ -0,0 +1,203 @@ +# Owner(s): ["module: higher order operators"] +# flake8: noqa: B950 + + +import unittest + +import torch +import torch._dynamo +import torch._functorch +import torch._inductor +import torch._inductor.decomposition +import torch.nn.functional as F +from torch import nn +from torch._dynamo.variables.higher_order_ops import LocalMapWrappedHigherOrderVariable + + +if torch.distributed.is_available(): + from torch.distributed._tensor.experimental import local_map + from torch.distributed.tensor.placement_types import Replicate, Shard + +from torch.testing._internal.common_utils import run_tests, TEST_WITH_CROSSREF, TestCase +from torch.testing._internal.triton_utils import requires_cuda_and_triton + + +nested_compile_region = torch.compiler.nested_compile_region + + +class MyTransform(torch.autograd.Function): + @staticmethod + def forward(ctx, x): + return x + 100 + + @staticmethod + def backward(ctx, grad): + return grad + 100 + + +def context_parallel_attention(query, key, value): + out = F.scaled_dot_product_attention( + query=query, key=key, value=value, is_causal=False + ) + return out + + +def create_model(attention_fn, nheads, dim1, dim2): + class LocalMapTransformerBlock(nn.Module): + def __init__(self, nheads, dim1, dim2): + super().__init__() + self.nheads = nheads + bias = False + self.wq = nn.Linear(dim1, dim1, bias=bias) + self.wk = nn.Linear(dim1, dim1, bias=bias) + self.wv = nn.Linear(dim1, dim1, bias=bias) + self.wo = nn.Linear(dim1, dim1, bias=bias) + self.w1 = nn.Linear(dim1, dim2, bias=bias) + self.w2 = nn.Linear(dim2, dim1, bias=bias) + + def forward(self, x): + q = self.wq(x) + k = self.wk(x) + v = self.wv(x) + + q = q.unflatten(-1, (self.nheads, -1)).permute(0, 2, 1, 3) + k = k.unflatten(-1, (self.nheads, -1)).permute(0, 2, 1, 3) + v = v.unflatten(-1, (self.nheads, -1)).permute(0, 2, 1, 3) + + o = attention_fn(q, k, v) + o = o.permute(0, 2, 1, 3).flatten(-2) + + o = self.wo(o) + + o0 = o + x + + o = self.w1(o0) + o = torch.nn.functional.relu(o) + o = self.w2(o) + + o = o0 + o + return o + + return LocalMapTransformerBlock(nheads, dim1, dim2) + + +class TestLocalMap(TestCase): + @requires_cuda_and_triton + @unittest.skipIf( + not torch.distributed.is_available(), "Torch distributed not available." + ) + def test_simple(self): + @local_map( + out_placements=((Shard(0), Shard(1), Shard(2)),), + in_placements=( + (Shard(0), Shard(1), Shard(2)), # query + (Shard(0), Shard(1), Replicate()), # key + (Shard(0), Shard(1), Replicate()), # value + ), + redistribute_inputs=True, + in_grad_placements=None, + device_mesh=None, + ) + def cp_decorated(query, key, value): + return context_parallel_attention(query, key, value) + + cp_function = local_map( + context_parallel_attention, + out_placements=(Shard(0), Shard(1), Shard(2)), + in_placements=( + (Shard(0), Shard(1), Shard(2)), # query + (Shard(0), Shard(1), Replicate()), # key + (Shard(0), Shard(1), Replicate()), # value + ), + redistribute_inputs=True, + in_grad_placements=None, + device_mesh=None, + ) + bs = 8 * 1 + dim1 = 96 + dim2 = dim1 * 4 + nheads = 16 + seq_len = 16 + + from torch._dynamo.testing import EagerAndRecordGraphs, normalize_gm + + backend = EagerAndRecordGraphs() + + model = create_model(cp_decorated, nheads, dim1, dim2).cuda() + inputs = (torch.randn(bs, seq_len, dim1, requires_grad=True).cuda(),) + with LocalMapWrappedHigherOrderVariable.enable(): + out = torch.compile(model, backend=backend)(*inputs) + out.sum().backward() + + model = create_model(cp_function, nheads, dim1, dim2).cuda() + inputs = (torch.randn(bs, seq_len, dim1, requires_grad=True).cuda(),) + with LocalMapWrappedHigherOrderVariable.enable(): + out = torch.compile(model, backend=backend)(*inputs) + out.sum().backward() + + if not TEST_WITH_CROSSREF: + self.assertEqual(len(backend.graphs), 2) + # should see local_map_hop in both + self.assertExpectedInline( + normalize_gm(backend.graphs[0].print_readable(print_output=False)), + """\ +class GraphModule(torch.nn.Module): + def forward(self, L_self_modules_wq_parameters_weight_: "f32[96, 96]", L_x_: "f32[8, 16, 96]", L_self_modules_wk_parameters_weight_: "f32[96, 96]", L_self_modules_wv_parameters_weight_: "f32[96, 96]", L_self_modules_wo_parameters_weight_: "f32[96, 96]", L_self_modules_w1_parameters_weight_: "f32[384, 96]", L_self_modules_w2_parameters_weight_: "f32[96, 384]"): + l_self_modules_wq_parameters_weight_ = L_self_modules_wq_parameters_weight_ + l_x_ = L_x_ + l_self_modules_wk_parameters_weight_ = L_self_modules_wk_parameters_weight_ + l_self_modules_wv_parameters_weight_ = L_self_modules_wv_parameters_weight_ + l_self_modules_wo_parameters_weight_ = L_self_modules_wo_parameters_weight_ + l_self_modules_w1_parameters_weight_ = L_self_modules_w1_parameters_weight_ + l_self_modules_w2_parameters_weight_ = L_self_modules_w2_parameters_weight_ + + q: "f32[8, 16, 96]" = torch._C._nn.linear(l_x_, l_self_modules_wq_parameters_weight_, None); l_self_modules_wq_parameters_weight_ = None + + k: "f32[8, 16, 96]" = torch._C._nn.linear(l_x_, l_self_modules_wk_parameters_weight_, None); l_self_modules_wk_parameters_weight_ = None + + v: "f32[8, 16, 96]" = torch._C._nn.linear(l_x_, l_self_modules_wv_parameters_weight_, None); l_self_modules_wv_parameters_weight_ = None + + unflatten: "f32[8, 16, 16, 6]" = q.unflatten(-1, (16, -1)); q = None + q_1: "f32[8, 16, 16, 6]" = unflatten.permute(0, 2, 1, 3); unflatten = None + + unflatten_1: "f32[8, 16, 16, 6]" = k.unflatten(-1, (16, -1)); k = None + k_1: "f32[8, 16, 16, 6]" = unflatten_1.permute(0, 2, 1, 3); unflatten_1 = None + + unflatten_2: "f32[8, 16, 16, 6]" = v.unflatten(-1, (16, -1)); v = None + v_1: "f32[8, 16, 16, 6]" = unflatten_2.permute(0, 2, 1, 3); unflatten_2 = None + + subgraph_0 = self.subgraph_0 + local_map_hop = torch.ops.higher_order.local_map_hop(subgraph_0, q_1, k_1, v_1); subgraph_0 = q_1 = k_1 = v_1 = None + o: "f32[8, 16, 16, 6]" = local_map_hop[0]; local_map_hop = None + + permute_3: "f32[8, 16, 16, 6]" = o.permute(0, 2, 1, 3); o = None + o_1: "f32[8, 16, 96]" = permute_3.flatten(-2); permute_3 = None + + o_2: "f32[8, 16, 96]" = torch._C._nn.linear(o_1, l_self_modules_wo_parameters_weight_, None); o_1 = l_self_modules_wo_parameters_weight_ = None + + o0: "f32[8, 16, 96]" = o_2 + l_x_; o_2 = l_x_ = None + + o_3: "f32[8, 16, 384]" = torch._C._nn.linear(o0, l_self_modules_w1_parameters_weight_, None); l_self_modules_w1_parameters_weight_ = None + + o_4: "f32[8, 16, 384]" = torch.nn.functional.relu(o_3); o_3 = None + + o_5: "f32[8, 16, 96]" = torch._C._nn.linear(o_4, l_self_modules_w2_parameters_weight_, None); o_4 = l_self_modules_w2_parameters_weight_ = None + + o_6: "f32[8, 16, 96]" = o0 + o_5; o0 = o_5 = None + return (o_6,) + + class subgraph_0(torch.nn.Module): + def forward(self, q_1: "f32[8, 16, 16, 6]", k_1: "f32[8, 16, 16, 6]", v_1: "f32[8, 16, 16, 6]"): + out: "f32[8, 16, 16, 6]" = torch._C._nn.scaled_dot_product_attention(query = q_1, key = k_1, value = v_1, is_causal = False); q_1 = k_1 = v_1 = None + return (out,) +""", + ) + + self.assertEqual( + normalize_gm(backend.graphs[0].print_readable(print_output=False)), + normalize_gm(backend.graphs[1].print_readable(print_output=False)), + ) + + +if __name__ == "__main__": + run_tests() diff --git a/test/inductor/test_compiled_autograd.py b/test/inductor/test_compiled_autograd.py index 6014a6e698607..e0cd8b99a6b3d 100644 --- a/test/inductor/test_compiled_autograd.py +++ b/test/inductor/test_compiled_autograd.py @@ -5354,7 +5354,7 @@ def wrap_test_class(orig_cls): test_dtensor.TestDTensorCompile ) -xfail_hops = {} +xfail_hops = {"local_map_hop"} class TestCompiledAutogradOpInfo(TestCase): diff --git a/torch/_dynamo/variables/builder.py b/torch/_dynamo/variables/builder.py index 20b88759ef324..660042b33b875 100644 --- a/torch/_dynamo/variables/builder.py +++ b/torch/_dynamo/variables/builder.py @@ -206,7 +206,10 @@ UserMethodVariable, WrapperUserFunctionVariable, ) -from .higher_order_ops import TorchHigherOrderOperatorVariable +from .higher_order_ops import ( + LocalMapWrappedHigherOrderVariable, + TorchHigherOrderOperatorVariable, +) from .iter import ItertoolsVariable from .lazy import LazyVariableTracker from .lists import ( @@ -850,6 +853,8 @@ def build_key_value(i, k, v): return build_checkpoint_variable(source=self.source) elif is_invoke_subgraph(value): return build_invoke_subgraph_variable(source=self.source) + elif LocalMapWrappedHigherOrderVariable.should_wrap_in_hop(value): + return LocalMapWrappedHigherOrderVariable.build(source=self.source) elif isinstance(value, functools.partial): func_src = AttrSource(self.get_source(), "func") func_obj = VariableBuilder(self.tx, func_src)(value.func) diff --git a/torch/_dynamo/variables/higher_order_ops.py b/torch/_dynamo/variables/higher_order_ops.py index 5ac883c7d3932..7c20578b51bf7 100644 --- a/torch/_dynamo/variables/higher_order_ops.py +++ b/torch/_dynamo/variables/higher_order_ops.py @@ -3383,6 +3383,7 @@ def _call_function( lambda a: a.node.meta["example_value"], body_r.as_proxy(), ) + p_kwargs = {key: value.as_proxy() for key, value in kwargs.items()} return _call_function_and_unflatten_output( tx, self.value, p_args, p_kwargs, flat_example_value, treespec @@ -3497,6 +3498,115 @@ def _call_function( ) +class LocalMapWrappedHigherOrderVariable(WrapHigherOrderVariable): + supports_input_mutation = False + supports_aliasing = False + + # Subclasses aren't supported by speculate_subgraph yet + # So this HOP is only usable with plain tensors + _enabled = False + + @classmethod + @contextlib.contextmanager + def enable(cls): + """Context manager to temporarily enable local map wrapping. + Will be removed when speculate_subgraph supports subclass inputs: + https://github.com/pytorch/pytorch/issues/161456. + + Usage: + with LocalMapWrappedHigherOrderVariable.enable_wrapping(): + # Code where should_wrap_in_hop will return True + pass + """ + old_value = cls._enabled + cls._enabled = True + try: + yield + finally: + cls._enabled = old_value + + @classmethod + def should_wrap_in_hop(cls, value): + if not torch.distributed.is_available(): + return False + + from torch.distributed.tensor.experimental._func_map import _local_map_wrapped + + # check is important to avoid subclass dispatch + if type(value) != type(_local_map_wrapped): + return False + + return value == _local_map_wrapped and cls._enabled + + @staticmethod + def build(**options): + return TorchHigherOrderOperatorVariable.make( + torch._higher_order_ops.local_map_hop, + **options, + ) + + def python_type(self): + return type(self.value) + + def _call_function( + self, + tx: "InstructionTranslator", + args: "list[VariableTracker]", + kwargs: "dict[str, VariableTracker]", + ) -> "VariableTracker": + """ + Goal of this function is to rewrite local_map usage as a HOP: + local_map(func, ...) -> local_map_hop(gm, ...) + """ + + ( + user_func, + out_placements, + in_placements, + in_grad_placements, + device_mesh, + redistribute_inputs, + *user_args, + ) = args + + ( + p_args, + p_kwargs, + example_value, + body_r, + treespec, + body_gmod, + body_name, + ) = self.create_wrapped_node( + tx, user_func, user_args, kwargs, self.value._name, subgraph_name="subgraph" + ) + + # Treat as const, so we don't have to deal with Placement types in fx IR + # Guarded with EQUALS_MATCH on local_map call's arguments + body_gmod.meta["local_map_kwargs"] = { + "out_placements": out_placements.value, + "in_placements": in_placements.value, + "redistribute_inputs": redistribute_inputs.value, + "in_grad_placements": in_grad_placements.value, + "device_mesh": device_mesh.value, + } + + assert len(p_kwargs) == 0 + + flat_example_value = pytree.tree_map_only( + torch.fx.Proxy, + lambda a: a.node.meta["example_value"], + body_r.as_proxy(), + ) + + p_kwargs = {key: value.as_proxy() for key, value in kwargs.items()} + out = _call_function_and_unflatten_output( + tx, self.value, p_args, p_kwargs, flat_example_value, treespec + ) + + return out + + # Map operator names to their corresponding variable for fast TorchHigherOrderOperatorVariable.make() _hop_name_to_variable_class = { "cond": CondHigherOrderVariable, @@ -3525,4 +3635,5 @@ def _call_function( "auto_functionalized_v2": AutoFunctionalizeHigherOrderVariable, "invoke_subgraph": InvokeSubgraphHigherOrderVariable, "custom_function_call": CustomFunctionHigherOrderOperatorVariable, + "local_map_hop": LocalMapWrappedHigherOrderVariable, } diff --git a/torch/_higher_order_ops/__init__.py b/torch/_higher_order_ops/__init__.py index e809c729dc424..516d58bdf314e 100644 --- a/torch/_higher_order_ops/__init__.py +++ b/torch/_higher_order_ops/__init__.py @@ -21,6 +21,7 @@ from torch._higher_order_ops.foreach_map import _foreach_map, foreach_map from torch._higher_order_ops.hints_wrap import hints_wrapper from torch._higher_order_ops.invoke_subgraph import invoke_subgraph +from torch._higher_order_ops.local_map import local_map_hop from torch._higher_order_ops.map import map from torch._higher_order_ops.out_dtype import out_dtype from torch._higher_order_ops.run_const_graph import run_const_graph @@ -73,4 +74,5 @@ "aoti_call_delegate", "map", "while_loop_stack_output", + "local_map_hop", ] diff --git a/torch/_higher_order_ops/local_map.py b/torch/_higher_order_ops/local_map.py new file mode 100644 index 0000000000000..22cb2af50f1f7 --- /dev/null +++ b/torch/_higher_order_ops/local_map.py @@ -0,0 +1,327 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. +# +# This source code is licensed under the BSD license found in the +# LICENSE file in the root directory of this source tree. + +# NOTE: this file may be removed once we move to a dynamo frontend + +import functools +from collections.abc import Generator +from contextlib import contextmanager +from typing import Any, Callable, Optional + +import torch +import torch.utils._pytree as pytree +from torch._C import DispatchKey +from torch._higher_order_ops.utils import ( + clone_outputs_aliasing_inputs, + save_tensors_and_symints_for_backward, + saved_tensors_and_symints, +) +from torch._ops import HigherOrderOperator +from torch._subclasses.fake_tensor import FakeTensorMode +from torch.fx import GraphModule +from torch.fx.experimental.proxy_tensor import ProxyTorchDispatchMode, track_tensor_tree + + +# Proxy the HOP instead of inlining into it +_DEFER_INLINING = False + + +@contextmanager +def defer_inlining() -> Generator[None, None, None]: + global _DEFER_INLINING + prior = _DEFER_INLINING + try: + _DEFER_INLINING = True + yield + finally: + _DEFER_INLINING = prior + + +class LocalMapHOP(HigherOrderOperator): + def __init__(self) -> None: + super().__init__("local_map_hop") + + def __call__(self, fw_gm: GraphModule, *args: Any, **kwargs: Any) -> Any: + return super().__call__(fw_gm, *args, **kwargs) + + +local_map_hop = LocalMapHOP() + + +def create_hop_fw_bw( + fw_gm: GraphModule, + *_args: Any, +) -> tuple[GraphModule, GraphModule, int, int, set[int]]: + """ + Traces a joint, applies passes and partitions it + """ + # Keeping these imports here + # Avoid circular dependencies once we upstream with dynamo frontend + from torch._dispatch.python import suspend_functionalization + from torch._functorch.aot_autograd import AOTConfig, create_joint + from torch._guards import detect_fake_mode + from torch._subclasses.fake_tensor import FakeTensor, FakeTensorMode + from torch._subclasses.functional_tensor import disable_functional_mode + from torch.fx.experimental.proxy_tensor import disable_proxy_modes_tracing, make_fx + + dummy_aot_config = AOTConfig( + fw_compiler=None, # type: ignore[arg-type] + bw_compiler=None, # type: ignore[arg-type] + partition_fn=None, # type: ignore[arg-type] + decompositions={}, + num_params_buffers=0, + aot_id=0, + keep_inference_input_mutations=False, + ) + + with suspend_functionalization(), disable_functional_mode(): + with disable_proxy_modes_tracing(): + # create a tensor (fake) from a compiler wrapped FunctionalTensor + def _from_fun(t: Any) -> Any: + if isinstance(t, torch.Tensor): + return torch.empty_strided( + t.size(), + t.stride(), + device=t.device, + dtype=t.dtype, + requires_grad=t.requires_grad, + ) + return t + + # If someone runs this hop under the default compiler backend ("eager") + # Then this path will be run with the actual user inputs. We convert them + # to fake tensors in order to not perform any actual compute. + + fake_mode = detect_fake_mode(_args) + if fake_mode is None: + fake_mode = FakeTensorMode(allow_non_fake_inputs=True) + + with fake_mode: + fw_inputs = pytree.tree_map(_from_fun, _args) + + assert all( + isinstance(t, (FakeTensor, int, torch.SymInt)) for t in fw_inputs + ), f"Unexpected element in {fw_inputs=}" + + example_grads = pytree.tree_map( + _from_fun, + fw_gm(*fw_inputs), + ) + if not isinstance(example_grads, (list, tuple)): + example_grads = [example_grads] + + num_fw_inputs = len(fw_inputs) + num_fw_outputs = len(example_grads) + + def joint_f( + *primals_and_tangents: list[torch.Tensor], + ) -> Any: + primals = primals_and_tangents[:num_fw_inputs] + tangents = primals_and_tangents[num_fw_inputs:] + + def prepare_fw_with_masks(fn: Callable[..., Any]) -> Callable[..., Any]: + def fw_with_masks(*args: Any) -> tuple[tuple[Any], list[bool]]: + fw_out = fn(*args) + assert isinstance(fw_out, tuple), ( + "Dynamo traced submodule should return tuple" + ) + return fw_out, [ + True + if isinstance(ret, torch.Tensor) and ret.requires_grad + else False + for ret in fw_out + ] + + return fw_with_masks + + fw_outs, grads = create_joint( + prepare_fw_with_masks(fw_gm), aot_config=dummy_aot_config + )(primals, tangents) + + maybe_clone = clone_outputs_aliasing_inputs(primals_and_tangents) + # put grads first to work with existing hop utils + return pytree.tree_map(maybe_clone, (*grads, *fw_outs)) + + filtered_grads_idx = set() + for i, example_grad in enumerate(example_grads): + # Filter out grads that are None or do not require_grad. + # The AOTAutograd utils we rely on force this assumption. + # We must also filter the runtime tangents too. + if example_grad is not None and ( + isinstance(example_grad, torch.Tensor) and example_grad.requires_grad + ): + filtered_grads_idx.add(i) + + primals_and_tangents = [ + *fw_inputs, + *[example_grads[i] for i in filtered_grads_idx], + ] + joint_hop_gm = make_fx(joint_f)(*primals_and_tangents) + + from torch._functorch._aot_autograd.graph_compile import prepare_for_partitioner + from torch._inductor.compile_fx import partition_fn + + # Match partitioner convention + prepped_joint_hop_gm = prepare_for_partitioner( + joint_hop_gm, num_fw_inputs, num_fw_outputs + ) + # Also runs joint passes + new_fw_gm, new_bw_gm = partition_fn( + prepped_joint_hop_gm, + [], + num_fwd_outputs=num_fw_outputs, + static_lifetime_input_indices=[], + ) + + # Propagate meta onto fw/bw graphs, later will be set on proxied nodes + local_map_kwargs = fw_gm.meta["local_map_kwargs"] # type: ignore[attr-defined] + + new_fw_gm.meta["local_map_kwargs"] = local_map_kwargs + new_bw_gm.meta["local_map_kwargs"] = {**local_map_kwargs} + # Okay because Autoparallel assumes same sharding between param and grads + new_bw_gm.meta["local_map_kwargs"]["in_placements"] = local_map_kwargs[ + "out_placements" + ] + new_bw_gm.meta["local_map_kwargs"]["out_placements"] = local_map_kwargs[ + "in_placements" + ] + + return new_fw_gm, new_bw_gm, num_fw_inputs, num_fw_outputs, filtered_grads_idx + + +class LocalMapAutogradOp(torch.autograd.Function): + @staticmethod + def forward( + ctx: Any, + fw_gm: GraphModule, + bw_gm: GraphModule, + num_fw_ins: int, + num_fw_outs: int, + filtered_grads_idx: set[int], + *args: Any, + **kwargs: Any, + ) -> tuple[Optional[torch.Tensor], ...]: + ctx.bw_gm = bw_gm + ctx.num_fw_ins = num_fw_ins + ctx.filtered_grads_idx = filtered_grads_idx + + with torch._C._AutoDispatchBelowAutograd(): + fw_outs_with_saved_activations = local_map_hop(fw_gm, *args, **kwargs) + + fw_outs = fw_outs_with_saved_activations[:num_fw_outs] + saved_activations = fw_outs_with_saved_activations[num_fw_outs:] + save_tensors_and_symints_for_backward(ctx, saved_activations) + + return fw_outs + + @staticmethod + def backward( + ctx: Any, *_grads: tuple[torch.Tensor] + ) -> tuple[Optional[torch.Tensor], ...]: + saved_activations = saved_tensors_and_symints(ctx) + with torch._C._AutoDispatchBelowAutograd(): + # Filter out grads that are None or do not require_grad. + # The AOTAutograd utils we rely on force this assumption. + grads = [_grads[i] for i in ctx.filtered_grads_idx] + grad_ins = local_map_hop(ctx.bw_gm, *saved_activations, *grads) + if len(grad_ins) != ctx.num_fw_ins: + raise RuntimeError( + f"Expected {ctx.num_fw_ins} grad_ins, got {len(grad_ins)}" + ) + return None, None, None, None, None, *grad_ins + + +@local_map_hop.py_impl(torch._C.DispatchKey.Autograd) +def autograd_key( + fw_gm: GraphModule, + *args: Any, + **kwargs: Any, +) -> Any: + if _DEFER_INLINING: + fw_gm, bw_gm, num_fw_ins, num_fw_outs, filtered_grads_idx = create_hop_fw_bw( + fw_gm, *args + ) + return LocalMapAutogradOp.apply( + fw_gm, bw_gm, num_fw_ins, num_fw_outs, filtered_grads_idx, *args, **kwargs + ) + + return fw_gm(*args, **kwargs) + + +@local_map_hop.py_functionalize_impl +def functional_mode_key( + ctx: Any, fw_gm: GraphModule, *args: Any, **kwargs: Any +) -> tuple[torch.Tensor]: + assert not kwargs + + unwrapped_inputs = ctx.unwrap_tensors(args) + with ctx.redispatch_to_next(): + out = local_map_hop(fw_gm, *unwrapped_inputs) + return ctx.wrap_tensors(out) + + +@local_map_hop.py_impl(FakeTensorMode) +def fake_mode_key( + mode: FakeTensorMode, + fw_gm: GraphModule, + *args: Any, + **kwargs: Any, +) -> tuple[torch.Tensor]: + with mode: + return fw_gm(*args, **kwargs) + + +def proxy_mode_key_common( + call_hop: Callable[..., Any], + proxy_mode: ProxyTorchDispatchMode, + gm: GraphModule, + *args: Any, + **kwargs: Any, +) -> tuple[torch.Tensor]: + assert proxy_mode is not None, ( + "Mode should always be enabled for python fallback key" + ) + assert len(kwargs) == 0 + + example_out = call_hop(*args, **kwargs) + proxy_args = pytree.tree_map(proxy_mode.tracer.unwrap_proxy, args) # type: ignore[union-attr] + + out_proxy = proxy_mode.tracer.create_proxy( + "call_function", call_hop, proxy_args, {} + ) + + # extract local_map args, post-dispatch operates on GraphModules + assert gm.meta["local_map_kwargs"] + local_map_kwargs = gm.meta["local_map_kwargs"] + + # propagate local_map args to the call_function node + out_proxy.node.meta["local_map_kwargs"] = local_map_kwargs + return track_tensor_tree( + example_out, out_proxy, constant=None, tracer=proxy_mode.tracer + ) + + +@local_map_hop.py_impl(ProxyTorchDispatchMode) +def proxy_mode_key( + proxy_mode: ProxyTorchDispatchMode, + fw_gm: GraphModule, + *args: Any, + **kwargs: Any, +) -> tuple[torch.Tensor]: + # TODO: get rid of this when we can install as a subgraph + def call_local_map(*_args: Any, **_kwargs: Any) -> Any: + return functools.partial(local_map_hop, fw_gm)(*_args, **_kwargs) + + return proxy_mode_key_common(call_local_map, proxy_mode, fw_gm, *args, **kwargs) + + +# Running HOP in eager with real tensors +@local_map_hop.py_impl(DispatchKey.CompositeExplicitAutograd) +def real_impl( + fw_gm: GraphModule, + *args: Any, + **kwargs: Any, +) -> tuple[torch.Tensor]: + return fw_gm(*args, **kwargs) diff --git a/torch/distributed/tensor/_ops/_view_ops.py b/torch/distributed/tensor/_ops/_view_ops.py index 62e8c68e9be9d..80a0491f694cc 100644 --- a/torch/distributed/tensor/_ops/_view_ops.py +++ b/torch/distributed/tensor/_ops/_view_ops.py @@ -490,7 +490,9 @@ def propagate_shape_and_sharding( - An output dimension that is a split of the input dimension can only be sharded if the leftmost split size is divisible by the mesh dimension """ - assert len(input_src_placements) == len(mesh_sizes) + assert len(input_src_placements) == len(mesh_sizes), ( + f"{input_src_placements} != {mesh_sizes}" + ) # for each input dim, for each mesh dim, provides a list of possible shardable dimensions mesh_ndim = len(mesh_sizes) shardable_dims: dict[int, list[bool]] = {} diff --git a/torch/testing/_internal/hop_db.py b/torch/testing/_internal/hop_db.py index 2a0883408892f..a927bbaa42f4d 100644 --- a/torch/testing/_internal/hop_db.py +++ b/torch/testing/_internal/hop_db.py @@ -212,6 +212,31 @@ def body_fn(iter_t, x): return torch._higher_order_ops.while_loop_stack_output(cond_fn, body_fn, (iter_t, x), tuple()) +def sample_inputs_local_map_hop(opinfo, device, dtype, requires_grad, **kwargs): + # TODO: once HOPs support DTensor inputs, we should also test DTensors + make_arg = functools.partial( + make_tensor, device=device, dtype=dtype, requires_grad=False + ) + yield SampleInput( + make_arg(2, 3, 4, low=0.1, high=2), + make_arg(2, 3, 4, low=0.1, high=2), + ) + + +def simple_local_map_hop(inp1, inp2): + def body_gm(inp1, inp2): + return inp1.cos() + inp2.sin() + gm = torch.fx.symbolic_trace(body_gm) + + assert torch.distributed.is_available() + from torch.distributed.tensor.placement_types import Replicate + gm.meta["local_map_kwargs"] = { + "in_placements": (Replicate(), Replicate(), Replicate()), + "out_placements": ((Replicate(), Replicate(), Replicate()),) + } + + return torch._higher_order_ops.local_map_hop(gm, inp1, inp2) + def sample_inputs_scan(opinfo, device, dtype, requires_grad, **kwargs): make_arg = functools.partial( make_tensor, device=device, dtype=dtype, requires_grad=requires_grad @@ -451,4 +476,17 @@ def fn(x): ), decorators=[onlyCUDA], ), + OpInfo( + name="local_map_hop", + variant_test_name="simple", + op=simple_local_map_hop, + sample_inputs_func=sample_inputs_local_map_hop, + dtypes=custom_types(torch.float16, torch.float32), + supports_out=False, + check_batched_grad=False, + check_batched_gradgrad=False, + check_batched_forward_grad=False, + check_inplace_batched_forward_grad=False, + decorators=[onlyCUDA, unittest.skipIf(not torch.distributed.is_available(), "requires distributed build")], + ), ] From 48dbd60df482fd884d211a800c5e82c648780a98 Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Mon, 15 Sep 2025 11:08:20 -0700 Subject: [PATCH 273/693] are_strides_like_channels_last_or_false (#162354) Note this could change suggest_memory_format behaviour for unbacked we used to return True for are_strides_like_channels_last sometimes even when results undecided now when its not decided we return False. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162354 Approved by: https://github.com/aorenste --- test/export/test_export.py | 4 +++- torch/_prims_common/__init__.py | 22 +++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/test/export/test_export.py b/test/export/test_export.py index 3ec52775cf097..3609f4fd52413 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -16182,9 +16182,11 @@ def forward( ) -> torch.Tensor: # x.sizes(): 1, 128, 16, 128 sp = start_pos.item() - torch._check_is_size(sp) + + # Checks needed for slicing. torch._check(sp >= 0) torch._check(sp <= 126) + key = cache[:, : sp + 1, :, :] # 1, sp+1, 16, 128 value = cache[:, : sp + 1, :, :] # 1, sp+1, 16, 128 query = query.transpose(1, 2) # (bs, n_local_heads, seqlen, head_dim) diff --git a/torch/_prims_common/__init__.py b/torch/_prims_common/__init__.py index 67238f05b23df..b4c3afce557b3 100644 --- a/torch/_prims_common/__init__.py +++ b/torch/_prims_common/__init__.py @@ -1975,10 +1975,15 @@ def check( # This combines is_channels_last_strides_2d and is_channels_last_strides_3d in # c10/core/MemoryFormat.h into one function -def are_strides_like_channels_last( +# May return False when input sizes are data-dependent and the property is not +# determined. +def are_strides_like_channels_last_or_false( shape: Sequence[int], strides: Sequence[int] ) -> bool: - from torch.fx.experimental.symbolic_shapes import guard_size_oblivious + from torch.fx.experimental.symbolic_shapes import ( + guard_or_true, + statically_known_true, + ) ndim = len(shape) @@ -1991,19 +1996,22 @@ def are_strides_like_channels_last( else: return False - if guard_size_oblivious(strides[1] == 0): + if guard_or_true(strides[1] == 0): return False min = 0 for d in dim_order: - if guard_size_oblivious(shape[d] == 0): + if guard_or_true(shape[d] == 0): return False - if guard_size_oblivious(strides[d] < min): + if guard_or_true(strides[d] < min): return False if d == 0 and min == strides[1]: return False min = strides[d] - if guard_size_oblivious(strides[d] > 1): + # Assume stride is not 1, the consequence is min could be larger than needed, + # which would result in returning False for this function but not vice versa, + # so it's ok. + if guard_or_true(strides[d] > 1): min *= shape[d] return True @@ -2012,7 +2020,7 @@ def suggest_memory_format(x: TensorLikeType) -> torch.memory_format: if x.layout != torch.strided: return torch.contiguous_format - if are_strides_like_channels_last(x.shape, x.stride()): + if are_strides_like_channels_last_or_false(x.shape, x.stride()): return torch.channels_last if x.ndim == 4 else torch.channels_last_3d return torch.contiguous_format From 61be0f1c11ef59ff8cf39138b594efe3672816c0 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 16 Sep 2025 01:46:59 +0000 Subject: [PATCH 274/693] Set the credential to upload vLLM nightly wheels on schedule and workflow_dispatch (#163018) The build is ok, but uploading is failing at the moment https://github.com/pytorch/pytorch/actions/runs/17734972779/job/50416387786 Pull Request resolved: https://github.com/pytorch/pytorch/pull/163018 Approved by: https://github.com/wdvr, https://github.com/malfet --- .github/workflows/build-vllm-wheel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-vllm-wheel.yml b/.github/workflows/build-vllm-wheel.yml index bc22ce8d32a0a..24505ca01e9b3 100644 --- a/.github/workflows/build-vllm-wheel.yml +++ b/.github/workflows/build-vllm-wheel.yml @@ -183,7 +183,7 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Configure AWS credentials(PyTorch account) for main - if: ${{ github.event_name == 'push' && github.event.ref == 'refs/heads/main' }} + if: ${{ (github.event_name == 'push' && github.event.ref == 'refs/heads/main') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 with: role-to-assume: arn:aws:iam::749337293305:role/gha_workflow_nightly_build_wheels From 1aa41eccc2071535b8048ac5560628d96b7cdd84 Mon Sep 17 00:00:00 2001 From: CaoE Date: Tue, 16 Sep 2025 01:54:21 +0000 Subject: [PATCH 275/693] [Inductor][CPP] Reuse the pre-existing kernel for the same kernels (#158404) Reuse the pre-existing kernel to avoid defining redundant kernels. Inductor CPP will generate same kernels. For example: ``` # Example class Model(torch.nn.Module): def __init__(self, K, N): super().__init__() self.linear0 = torch.nn.Linear(K, N) self.linear1 = torch.nn.Linear(N, K) self.linear2 = torch.nn.Linear(K, N) def forward(self, input): out = self.linear0(input) out = self.linear1(out) out = self.linear2(out) return out ``` For the above example, linear2 is same as linear0, and Inductor CPP generates 2 same kernels: cpp_fused_addmm_0 and cpp_fused_addmm_2. ``` # Generated code: ... cpp_fused_addmm_0 = async_compile.cpp_pybinding(['const at::BFloat16*', 'const at::BFloat16*', 'const at::BFloat16*', 'at::BFloat16*'], ''' ... extern "C" void kernel(const at::BFloat16* X, const at::BFloat16* W, const at::BFloat16* inp, at::BFloat16* Y) { constexpr int64_t num_threads = 32; constexpr int64_t N = 1024; constexpr int64_t K = 2048; constexpr int64_t Mr = 32; constexpr int64_t Nr = 32; constexpr int64_t Kr = 32; ... cpp_fused_addmm_1 = async_compile.cpp_pybinding(['const at::BFloat16*', 'const at::BFloat16*', 'const at::BFloat16*', 'at::BFloat16*'], ''' ... extern "C" void kernel(const at::BFloat16* X, const at::BFloat16* W, const at::BFloat16* inp, at::BFloat16* Y) { constexpr int64_t num_threads = 32; constexpr int64_t N = 2048; constexpr int64_t K = 1024; constexpr int64_t Mr = 32; constexpr int64_t Nr = 32; constexpr int64_t Kr = 32; ... cpp_fused_addmm_2 = async_compile.cpp_pybinding(['const at::BFloat16*', 'const at::BFloat16*', 'const at::BFloat16*', 'at::BFloat16*'], ''' extern "C" void kernel(const at::BFloat16* X, const at::BFloat16* W, const at::BFloat16* inp, at::BFloat16* Y) { constexpr int64_t num_threads = 32; constexpr int64_t N = 1024; constexpr int64_t K = 2048; constexpr int64_t Mr = 32; constexpr int64_t Nr = 32; constexpr int64_t Kr = 32; ... def call(self, args): arg6_1, = args args.clear() buf0 = empty_strided_cpu((1024, 1024), (1024, 1), torch.bfloat16) cpp_fused_addmm_0(arg6_1, constant6, _frozen_param6, buf0) del arg6_1 buf1 = empty_strided_cpu((1024, 2048), (2048, 1), torch.bfloat16) cpp_fused_addmm_1(buf0, constant6_0, _frozen_param8, buf1) buf2 = buf0; del buf0 # reuse cpp_fused_addmm_2(buf1, constant6_1, _frozen_param10, buf2) return (buf2, ) ``` After reusing the pre-existing kernel, Inductor CPP will reuse cpp_fused_addmm_0. ``` cpp_fused_addmm_0 = async_compile.cpp_pybinding(['const at::BFloat16*', 'const at::BFloat16*', 'const at::BFloat16*', 'at::BFloat16*'], ''' ... extern "C" void kernel(const at::BFloat16* X, const at::BFloat16* W, const at::BFloat16* inp, at::BFloat16* Y) { constexpr int64_t num_threads = 32; constexpr int64_t N = 1024; constexpr int64_t K = 2048; constexpr int64_t Mr = 32; constexpr int64_t Nr = 32; constexpr int64_t Kr = 32; ... cpp_fused_addmm_1 = async_compile.cpp_pybinding(['const at::BFloat16*', 'const at::BFloat16*', 'const at::BFloat16*', 'at::BFloat16*'], ''' ... extern "C" void kernel(const at::BFloat16* X, const at::BFloat16* W, const at::BFloat16* inp, at::BFloat16* Y) { constexpr int64_t num_threads = 32; constexpr int64_t N = 2048; constexpr int64_t K = 1024; constexpr int64_t Mr = 32; constexpr int64_t Nr = 32; constexpr int64_t Kr = 32; ... def call(self, args): arg6_1, = args args.clear() buf0 = empty_strided_cpu((1024, 1024), (1024, 1), torch.bfloat16) cpp_fused_addmm_0(arg6_1, constant6, _frozen_param6, buf0) del arg6_1 buf1 = empty_strided_cpu((1024, 2048), (2048, 1), torch.bfloat16) cpp_fused_addmm_1(buf0, constant6_0, _frozen_param8, buf1) buf2 = buf0; del buf0 # reuse cpp_fused_addmm_0(buf1, constant6_1, _frozen_param10, buf2) return (buf2, ) ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/158404 Approved by: https://github.com/jansel, https://github.com/leslie-fang-intel --- test/inductor/test_cpu_select_algorithm.py | 31 ++++++++ torch/_inductor/codegen/cpp.py | 78 ++++++++++--------- .../codegen/cpu_device_op_overrides.py | 3 + 3 files changed, 76 insertions(+), 36 deletions(-) diff --git a/test/inductor/test_cpu_select_algorithm.py b/test/inductor/test_cpu_select_algorithm.py index fe1e59bd7f49a..ad27dd3190f8b 100644 --- a/test/inductor/test_cpu_select_algorithm.py +++ b/test/inductor/test_cpu_select_algorithm.py @@ -2910,6 +2910,37 @@ def forward(self, u, v): with verify(u.dtype) as (atol, rtol): self.common(mod, (u, v)) + @unittest.skipIf( + not torch._C._cpu._is_amx_tile_supported(), "AMX ISA support is required" + ) + @inductor_config.patch({"freezing": True}) + @patches + @torch.no_grad + @parametrize("batch_size", (1024,)) + @parametrize("in_features", (1024,)) + @parametrize("out_features", (2048,)) + @dtypes(torch.bfloat16) + def test_linear_reuse_kernels(self, batch_size, in_features, out_features, dtype): + class M(torch.nn.Module): + def __init__(self): + super().__init__() + self.linear_x = torch.nn.Linear(in_features, out_features) + self.linear_y = torch.nn.Linear(out_features, in_features) + self.linear_z = torch.nn.Linear(in_features, out_features) + + def forward(self, x): + out = self.linear_x(x) + out = self.linear_y(out) + out = self.linear_z(out) + return out + + x = torch.randn(batch_size, in_features).to(dtype=dtype) + mod = M().to(dtype=dtype).eval() + self.common(mod, (x)) + _, code = run_and_get_cpp_code(mod, x) + # Check that only 2 kernels are in the generated code + assert code.count("AMXState amx_state") == 2 + @dynamo_config.patch({"dynamic_shapes": True, "assume_static_by_default": False}) class _DynamicShapesTestBase(BaseTestSelectAlgorithm): diff --git a/torch/_inductor/codegen/cpp.py b/torch/_inductor/codegen/cpp.py index 9d36e24d5f9e5..b339ee75262d2 100644 --- a/torch/_inductor/codegen/cpp.py +++ b/torch/_inductor/codegen/cpp.py @@ -5390,42 +5390,48 @@ def codegen_sync(self): def define_kernel(self, src_code, nodes, kernel_args=None): wrapper = V.graph.wrapper_code - fused_name = ( - get_fused_kernel_name(nodes, config.cpp.descriptive_names) - if config.cpp.descriptive_names - else "" - ) - kernel_name = "_".join(["cpp", fused_name, wrapper.next_kernel_suffix()]) - kernel_decl_name = kernel_name if V.graph.cpp_wrapper else "kernel" - src_code = src_code.replace(str(Placeholder.KERNEL_NAME), kernel_decl_name) - src_code = src_code.replace(str(Placeholder.DESCRIPTIVE_NAME), kernel_name) - # TODO(voz): Ostensibly, we should not need this. But there are cases where C++ codegen does - # not use BracesBuffer, so we have no good indicator of a C++ buffer atm. - src_code = src_code.replace("#pragma CMT", "//") - - # Get the lines in the source code representing the function definition, - # excluding the the first line including cpp_prefix.h. - first_char = src_code.rfind('extern "C"') - last_char = src_code.find(")", first_char) - if _IS_WINDOWS: - # get_export_declaration introduced one more ')' in Windows - last_char = src_code.find(")", last_char + 1) - kernel_definition = f"{src_code[first_char : last_char + 1]};\n" - - compile_wrapper = IndentedBuffer() - args = self.kernel_group.args if kernel_args is None else kernel_args - _, _, arg_types = args.cpp_argdefs() - if not V.graph.cpp_wrapper: - compile_wrapper.writeline(f"async_compile.cpp_pybinding({arg_types!r}, '''") - compile_wrapper.splice(src_code, strip=True) - if not V.graph.cpp_wrapper: - compile_wrapper.writeline("''')") - wrapper.define_kernel( - kernel_name, - compile_wrapper.getvalue(), - gpu=False, - cpp_definition=kernel_definition, - ) + if src_code in wrapper.src_to_kernel: + kernel_name = wrapper.src_to_kernel[src_code] + else: + fused_name = ( + get_fused_kernel_name(nodes, config.cpp.descriptive_names) + if config.cpp.descriptive_names + else "" + ) + kernel_name = "_".join(["cpp", fused_name, wrapper.next_kernel_suffix()]) + wrapper.src_to_kernel[src_code] = kernel_name + kernel_decl_name = kernel_name if V.graph.cpp_wrapper else "kernel" + src_code = src_code.replace(str(Placeholder.KERNEL_NAME), kernel_decl_name) + src_code = src_code.replace(str(Placeholder.DESCRIPTIVE_NAME), kernel_name) + # TODO(voz): Ostensibly, we should not need this. But there are cases where C++ codegen does + # not use BracesBuffer, so we have no good indicator of a C++ buffer atm. + src_code = src_code.replace("#pragma CMT", "//") + + # Get the lines in the source code representing the function definition, + # excluding the the first line including cpp_prefix.h. + first_char = src_code.rfind('extern "C"') + last_char = src_code.find(")", first_char) + if _IS_WINDOWS: + # get_export_declaration introduced one more ')' in Windows + last_char = src_code.find(")", last_char + 1) + kernel_definition = f"{src_code[first_char : last_char + 1]};\n" + + compile_wrapper = IndentedBuffer() + args = self.kernel_group.args if kernel_args is None else kernel_args + _, _, arg_types = args.cpp_argdefs() + if not V.graph.cpp_wrapper: + compile_wrapper.writeline( + f"async_compile.cpp_pybinding({arg_types!r}, '''" + ) + compile_wrapper.splice(src_code, strip=True) + if not V.graph.cpp_wrapper: + compile_wrapper.writeline("''')") + wrapper.define_kernel( + kernel_name, + compile_wrapper.getvalue(), + gpu=False, + cpp_definition=kernel_definition, + ) return kernel_name def flush(self): diff --git a/torch/_inductor/codegen/cpu_device_op_overrides.py b/torch/_inductor/codegen/cpu_device_op_overrides.py index 1ffafa74dd687..ccada837abbd4 100644 --- a/torch/_inductor/codegen/cpu_device_op_overrides.py +++ b/torch/_inductor/codegen/cpu_device_op_overrides.py @@ -14,6 +14,9 @@ def get_raw_stream(_): """ ) + def cpp_kernel_type(self) -> str: + return "void*" + def set_device(self, device_idx: int) -> str: return "pass" From c7fa16a05c944bf05c59e7ff3f603eaa89036384 Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Tue, 16 Sep 2025 02:14:38 +0000 Subject: [PATCH 276/693] [ROCm][CI] update _rocm-test.yml based on _linux-test.yml (#163014) Fixes missing huggingface secrets and aligns _rocm-test.yml with other updates from _linux-test.yml that it was initially based on. Pull Request resolved: https://github.com/pytorch/pytorch/pull/163014 Approved by: https://github.com/huydhn --- .github/workflows/_rocm-test.yml | 33 ++++++++++++++----- ...dynamic_aot_eager_torchbench_inference.csv | 2 +- .../dynamo_eager_torchbench_inference.csv | 2 +- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/.github/workflows/_rocm-test.yml b/.github/workflows/_rocm-test.yml index f73972942b5f9..7781e1f65fd16 100644 --- a/.github/workflows/_rocm-test.yml +++ b/.github/workflows/_rocm-test.yml @@ -62,6 +62,11 @@ on: required: false type: number default: 1 + secrets: + HUGGING_FACE_HUB_TOKEN: + required: false + description: | + HF Auth token to avoid rate limits when downloading models or datasets from hub env: GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} @@ -76,10 +81,9 @@ jobs: strategy: matrix: ${{ fromJSON(inputs.test-matrix) }} fail-fast: false - timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }} runs-on: ${{ matrix.runner }} + timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }} steps: - # [see note: pytorch repo ref] - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@main with: @@ -131,6 +135,9 @@ jobs: - name: Start monitoring script id: monitor-script + if: ${{ !inputs.disable-monitor }} + shell: bash + continue-on-error: true env: JOB_ID: ${{ steps.get-job-id.outputs.job-id }} JOB_NAME: ${{ steps.get-job-id.outputs.job-name }} @@ -138,9 +145,6 @@ jobs: WORKFLOW_RUN_ID: ${{github.run_id}} MONITOR_LOG_INTERVAL: ${{ inputs.monitor-log-interval }} MONITOR_DATA_COLLECT_INTERVAL: ${{ inputs.monitor-data-collect-interval }} - if: ${{ !inputs.disable-monitor }} - shell: bash - continue-on-error: true run: | python3 -m pip install psutil==5.9.8 dataclasses_json==0.6.7 python3 -m tools.stats.monitor --log-interval "$MONITOR_LOG_INTERVAL" --data-collect-interval "$MONITOR_DATA_COLLECT_INTERVAL" > usage_log.txt 2>&1 & @@ -178,6 +182,12 @@ jobs: run: | echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}" + - name: Preserve github env variables for use in docker + shell: bash + run: | + env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}" + env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}" + - name: Test id: test env: @@ -193,20 +203,22 @@ jobs: JOB_NAME: ${{ steps.get-job-id.outputs.job-name }} BRANCH: ${{ steps.parse-ref.outputs.branch }} SHA1: ${{ github.event.pull_request.head.sha || github.sha }} + BASE_SHA: ${{ github.event.pull_request.base.sha || github.sha }} + TEST_CONFIG: ${{ matrix.config }} + SHARD_NUMBER: ${{ matrix.shard }} + NUM_TEST_SHARDS: ${{ matrix.num_shards }} + REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }} CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }} VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }} TEST_SHOWLOCALS: ${{ steps.keep-going.outputs.ci-test-showlocals }} NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }} NO_TD: ${{ steps.keep-going.outputs.ci-no-td }} - TEST_CONFIG: ${{ matrix.config }} - SHARD_NUMBER: ${{ matrix.shard }} - NUM_TEST_SHARDS: ${{ matrix.num_shards }} - REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }} DOCKER_IMAGE: ${{ inputs.docker-image }} PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }} PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }} TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }} DASHBOARD_TAG: ${{ inputs.dashboard-tag }} + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }} run: | set -x @@ -236,6 +248,7 @@ jobs: -e GITHUB_RUN_ATTEMPT \ -e JOB_ID \ -e JOB_NAME \ + -e BASE_SHA \ -e BRANCH \ -e SHA1 \ -e AWS_DEFAULT_REGION \ @@ -253,10 +266,12 @@ jobs: -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \ -e PYTORCH_TEST_RERUN_DISABLED_TESTS \ -e TESTS_TO_INCLUDE \ + -e HUGGING_FACE_HUB_TOKEN \ -e DASHBOARD_TAG \ --env-file="${RUNNER_TEMP}/github_env_${GITHUB_RUN_ID}" \ --ulimit stack=10485760:83886080 \ --ulimit core=0 \ + --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ --security-opt seccomp=unconfined \ --cap-add=SYS_PTRACE \ --shm-size="8g" \ diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv index e019365ccbfdb..7797f2dcdf9d5 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv @@ -205,7 +205,7 @@ llama,pass,0 -llama_v2_7b_16h,model_fail_to_load,0 +llama_v2_7b_16h,pass_due_to_skip,0 diff --git a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv index bf70642a855ef..9199f0cf6c371 100644 --- a/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv +++ b/benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv @@ -205,7 +205,7 @@ llama,pass,0 -llama_v2_7b_16h,model_fail_to_load,0 +llama_v2_7b_16h,pass_due_to_skip,0 From 3ae31782cc3df812257a3d6cdda4a1f033ad7c23 Mon Sep 17 00:00:00 2001 From: Kevin Tang Date: Tue, 16 Sep 2025 02:32:50 +0000 Subject: [PATCH 277/693] [DCP] Add timeout for checkpoint background process join (#162828) Summary: Cleaning up checkpoint background process can currently block trainer thread indefinitely if the process is hanging (notably due to Gloo pg init timeout). This diff adds a 5s grace period for normal termination and sends SIGTERM if unable to shut down in that period. Rollback Plan: Differential Revision: D82268979 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162828 Approved by: https://github.com/meetv18 --- .../checkpoint/_async_process_executor.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/torch/distributed/checkpoint/_async_process_executor.py b/torch/distributed/checkpoint/_async_process_executor.py index 0614f65837f7d..d9c6de79b32b7 100644 --- a/torch/distributed/checkpoint/_async_process_executor.py +++ b/torch/distributed/checkpoint/_async_process_executor.py @@ -114,9 +114,16 @@ def __init__( def __del__(self) -> None: if self._save_process.is_alive(): - logger.info("Terminating the checkpoint background process...") - self._send(_CheckpointSaveProcessControlOpts.TERMINATE) - self._save_process.join() + try: + logger.info("Terminating the checkpoint background process.") + self._send(_CheckpointSaveProcessControlOpts.TERMINATE) + self._save_process.join(timeout=5) + finally: + if self._save_process.is_alive(): + logger.warning( + "Checkpoint background process is still alive after termination request. Sending SIGTERM." + ) + self._save_process.terminate() def _send(self, data: Any) -> None: self._process_pipe.send(data) From 7924b083c1a839dbbdba0c28713f7280de12f181 Mon Sep 17 00:00:00 2001 From: Ke Wen Date: Mon, 15 Sep 2025 17:06:57 -0700 Subject: [PATCH 278/693] [CI] disable rerun of distributed tests (#163025) #162978 identified an issue that distributed test failures were wrongly muted. Per discussion with @malfet, one solution is to disable rerun of distributed tests in `run_test.py`. The PR makes use of the `is_distributed_test` flag to identify those tests. Pull Request resolved: https://github.com/pytorch/pytorch/pull/163025 Approved by: https://github.com/malfet --- test/run_test.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/run_test.py b/test/run_test.py index 9414a340257c0..ab0a56d301fae 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -1192,12 +1192,14 @@ def handle_log_file( def get_pytest_args(options, is_cpp_test=False, is_distributed_test=False): - if RERUN_DISABLED_TESTS: - # Distributed tests are too slow, so running them x50 will cause the jobs to timeout after + if is_distributed_test: + # Distributed tests do not support rerun, see https://github.com/pytorch/pytorch/issues/162978 + rerun_options = ["-x", "--reruns=0"] + elif RERUN_DISABLED_TESTS: + # ASAN tests are too slow, so running them x50 will cause the jobs to timeout after # 3+ hours. So, let's opt for less number of reruns. We need at least 150 instances of the - # test every 2 weeks to satisfy the SQL query (15 x 14 = 210). The same logic applies - # to ASAN, which is also slow - count = 15 if is_distributed_test or TEST_WITH_ASAN else 50 + # test every 2 weeks to satisfy the SQL query (15 x 14 = 210). + count = 15 if TEST_WITH_ASAN else 50 # When under rerun-disabled-tests mode, run the same tests multiple times to determine their # flakiness status. Default to 50 re-runs rerun_options = ["--flake-finder", f"--flake-runs={count}"] From fdf68fa5d70abebee1c5090a51ea30c7aa40b9b0 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Tue, 16 Sep 2025 03:30:02 +0000 Subject: [PATCH 279/693] [ONNX] Fix rotary_embedding_23 implementation (#162865) The implementation of rotary_embedding_23 when input is 3D was incorrect. ## Tested Locally with ```py import onnx_ir as ir import onnx import torch import os import numpy as np base_path = "/home/justinchu/dev/onnx/onnx/backend/test/data/node" test_names = [ "test_rotary_embedding", "test_rotary_embedding_3d_input", "test_rotary_embedding_interleaved", "test_rotary_embedding_no_position_ids", "test_rotary_embedding_no_position_ids_interleaved", "test_rotary_embedding_no_position_ids_rotary_dim", "test_rotary_embedding_with_interleaved_rotary_dim", "test_rotary_embedding_with_rotary_dim", ] model_paths = [os.path.join(base_path, name) for name in test_names] for path in model_paths: print(f"Checking {path} for issues...") model = onnx.load(os.path.join(path, "model.onnx")) input0 = ir.from_proto( onnx.load_tensor(os.path.join(path, "test_data_set_0", "input_0.pb")) ).numpy() input1 = ir.from_proto( onnx.load_tensor(os.path.join(path, "test_data_set_0", "input_1.pb")) ).numpy() input2 = ir.from_proto( onnx.load_tensor(os.path.join(path, "test_data_set_0", "input_2.pb")) ).numpy() if os.path.exists(os.path.join(path, "test_data_set_0", "input_3.pb")): input3 = ir.from_proto( onnx.load_tensor(os.path.join(path, "test_data_set_0", "input_3.pb")) ).numpy() else: input3 = None output0 = ir.from_proto( onnx.load_tensor(os.path.join(path, "test_data_set_0", "output_0.pb")) ).numpy() m = ir.from_proto(model) node = m.graph[-1] print(node) assert node.op_type == "RotaryEmbedding" interleaved = node.attributes.get_int("interleaved", 0) num_heads = node.attributes.get_int("num_heads", 0) rotary_embedding_dim = node.attributes.get_int("rotary_embedding_dim", 0) torch_out = torch.onnx.ops.rotary_embedding( torch.tensor(input0), torch.tensor(input1), torch.tensor(input2), position_ids=torch.tensor(input3) if input3 is not None else None, interleaved=bool(interleaved), num_heads=num_heads, rotary_embedding_dim=rotary_embedding_dim, ) torch_out = torch_out.detach().cpu().numpy() np.testing.assert_allclose(torch_out, output0) ``` Fix https://github.com/pytorch/pytorch/issues/162848 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162865 Approved by: https://github.com/kunal-vaishnavi, https://github.com/titaiwangms --- test/onnx/ops/test_ops.py | 56 ++++++++++++++++++++++++-- torch/onnx/ops/_impl.py | 82 +++++++++++++++++++++++++++++++-------- 2 files changed, 119 insertions(+), 19 deletions(-) diff --git a/test/onnx/ops/test_ops.py b/test/onnx/ops/test_ops.py index 437c74e9bfbfd..3736b930900f1 100644 --- a/test/onnx/ops/test_ops.py +++ b/test/onnx/ops/test_ops.py @@ -4,13 +4,20 @@ from __future__ import annotations import onnx_ir.passes.common as common_passes +import onnxruntime from onnxscript import ir +from packaging import version import torch +from torch.onnx._internal.exporter import _testing as onnx_testing from torch.onnx.ops import _impl, _symbolic_impl from torch.testing._internal import common_utils +def has_onnxruntime_opset_23() -> bool: + return version.parse(onnxruntime.__version__) >= version.parse("1.23") + + class SchemaTest(common_utils.TestCase): def test_symbolic_has_correct_schema(self): torch.library.opcheck( @@ -432,7 +439,7 @@ def export(self, model, args=(), kwargs=None, **options) -> torch.onnx.ONNXProgr def test_onnx_ops_can_be_decomposed_to_aten(self): input_data = torch.rand(2, 3, 4, 8) - position_ids_data = torch.randint(0, 50, (2, 3)).long() + position_ids_data = torch.randint(0, 50, (2, 4)).long() sin_cache_data = torch.rand(50, 4) cos_cache_data = torch.rand(50, 4) @@ -473,7 +480,7 @@ def forward( def test_rotary_embedding_opcheck(self): input_data = torch.rand(2, 3, 4, 8) - position_ids_data = torch.randint(0, 50, (2, 3)).long() + position_ids_data = torch.randint(0, 50, (2, 4)).long() sin_cache_data = torch.rand(50, 4) cos_cache_data = torch.rand(50, 4) @@ -484,7 +491,7 @@ def test_rotary_embedding_opcheck(self): def test_rotary_embedding(self): input_data = torch.rand(2, 3, 4, 8) - position_ids_data = torch.randint(0, 50, (2, 3)).long() + position_ids_data = torch.randint(0, 50, (2, 4)).long() sin_cache_data = torch.rand(50, 4) cos_cache_data = torch.rand(50, 4) @@ -525,6 +532,49 @@ def forward( ) self.assertEqual(onnx_program.model.opset_imports[""], 23) self.assertEqual("RotaryEmbedding", onnx_program.model.graph.node(0).op_type) + if has_onnxruntime_opset_23(): + onnx_testing.assert_onnx_program(onnx_program) + else: + # Test with reference evaluator because ORT does not support the op as of version 1.22 + onnx_testing.assert_onnx_program(onnx_program, backend="reference") + + def test_rotary_embedding_3d(self): + num_heads = 2 + input_data = torch.rand(2, 3, 8) + sin_cache_data = torch.rand(2, 3, 2) + cos_cache_data = torch.rand(2, 3, 2) + + class Model(torch.nn.Module): + def forward(self, input_data, cos_cache_data, sin_cache_data): + return torch.onnx.ops.rotary_embedding( + input_data, + cos_cache_data, + sin_cache_data, + num_heads=num_heads, + ) + + model = Model() + + # Dynamic shapes are supported + dynamic_shapes = { + "input_data": {0: torch.export.Dim.DYNAMIC}, + "cos_cache_data": {0: torch.export.Dim.DYNAMIC}, + "sin_cache_data": {0: torch.export.Dim.DYNAMIC}, + } + + onnx_program = self.export( + model, + (input_data, cos_cache_data, sin_cache_data), + dynamic_shapes=dynamic_shapes, + opset_version=23, + ) + self.assertEqual(onnx_program.model.opset_imports[""], 23) + self.assertEqual("RotaryEmbedding", onnx_program.model.graph.node(0).op_type) + if has_onnxruntime_opset_23(): + onnx_testing.assert_onnx_program(onnx_program) + else: + # Test with reference evaluator because ORT does not support the op as of version 1.22 + onnx_testing.assert_onnx_program(onnx_program, backend="reference") def test_attention_basic(self): """Test basic attention functionality.""" diff --git a/torch/onnx/ops/_impl.py b/torch/onnx/ops/_impl.py index 30ffa9caf56d2..a7eba334ecfc8 100644 --- a/torch/onnx/ops/_impl.py +++ b/torch/onnx/ops/_impl.py @@ -56,18 +56,55 @@ def rotary_embedding_23( rotary_embedding_dim: int = 0, ) -> torch.Tensor: """RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23""" + # x has shape (batch_size, num_heads, sequence_length, head_size) + # or (batch_size, sequence_length, hidden_size) + input_shape = x.shape + input_rank = len(input_shape) + batch_size = input_shape[0] + sequence_length = input_shape[-2] + + # Validate position_ids and caches match x + if position_ids is not None: + torch._check( + position_ids.dim() == 2, + lambda: f"position_ids must be 2D when provided. Received shape {position_ids.shape}", + ) + torch._check( + position_ids.shape[0] == batch_size, + lambda: f"position_ids first dim (batch) must match x.shape[0] ({batch_size}). Received {position_ids.shape[0]}", + ) + torch._check( + position_ids.shape[1] == sequence_length, + lambda: f"position_ids second dim (sequence) must match x.shape[-2] ({sequence_length}). Received {position_ids.shape[1]}", + ) + torch._check( + cos_cache.dim() == 2 and sin_cache.dim() == 2, + lambda: "cos_cache/sin_cache must be 2D when position_ids is provided. " + f"Received cos_cache shape {cos_cache.shape}, sin_cache shape {sin_cache.shape}", + ) + else: + torch._check( + cos_cache.dim() == 3 and sin_cache.dim() == 3, + lambda: "cos_cache/sin_cache must be 3D when position_ids is not provided. " + f"Received cos_cache shape {cos_cache.shape}, sin_cache shape {sin_cache.shape}", + ) + # First ensure x has shape [batch_size, num_heads, seq_len, head_size] - batch_size = x.shape[0] - sequence_length = x.shape[1] - if len(x.shape) == 3: - hidden_size = x.shape[2] + # So that the rotation logic can be shared with reshaped 3D inputs + if input_rank == 4: + # Reshape from (batch_size, num_heads, seq_len, head_size) + # to [batch_size, seq_len, num_heads, head_size] + x = torch.permute(x, (0, 2, 1, 3)) + elif input_rank == 3: torch._check( num_heads != 0, - lambda: f"num_heads must be provided for 3D inputs. Received input tensor with shape {x.shape}", + lambda: f"num_heads must be provided for 3D inputs. Received input tensor with shape {input_shape}", ) + hidden_size = input_shape[2] head_size = hidden_size // num_heads new_shape = [batch_size, sequence_length, num_heads, head_size] x = torch.reshape(x, new_shape) + torch._check(len(x.shape) == 4, lambda: "x should be a 4D tensor by now") head_size = x.shape[3] @@ -88,14 +125,25 @@ def rotary_embedding_23( position_ids ] # Shape: [batch_size, sequence_length, head_size/2] else: - cos = cos_cache - sin = sin_cache - cos = cos[ - :, :, :rotary_embedding_dim_half - ] # Shape: [batch_size, sequence_length, rotary_embedding_dim/2] - sin = sin[ - :, :, :rotary_embedding_dim_half - ] # Shape: [batch_size, sequence_length, rotary_embedding_dim/2] + cos = cos_cache # Shape: [batch_size, sequence_length, rotary_embedding_dim/2] + sin = sin_cache # Shape: [batch_size, sequence_length, rotary_embedding_dim/2] + + torch._check( + cos.shape[0] == batch_size and cos.shape[1] == sequence_length, + lambda: f"cos has shape {cos.shape} but expected (batch={batch_size}, seq={sequence_length}, ...)", + ) + torch._check( + sin.shape[0] == batch_size and sin.shape[1] == sequence_length, + lambda: f"sin has shape {sin.shape} but expected (batch={batch_size}, seq={sequence_length}, ...)", + ) + torch._check( + cos.shape[-1] == rotary_embedding_dim_half, + lambda: f"Last dimension of cos cache ({cos.shape[-1]}) should match rotary_embedding_dim/2 ({rotary_embedding_dim_half}).", + ) + torch._check( + sin.shape[-1] == rotary_embedding_dim_half, + lambda: f"Last dimension of sin cache ({sin.shape[-1]}) should match rotary_embedding_dim/2 ({rotary_embedding_dim_half}).", + ) cos = torch.unsqueeze( cos, 2 ) # Shape: [batch_size, sequence_length, 1, rotary_embedding_dim/2] @@ -125,9 +173,11 @@ def rotary_embedding_23( else: x_rotate = torch.cat((real, imag), dim=-1) output = torch.cat((x_rotate, x_not_rotate), dim=-1) - if len(x.shape) == 3: - output = torch.reshape(output, x.shape) - return output + if input_rank == 3: + return torch.reshape(output, input_shape) + + # Return the dimensions to the original order + return torch.permute(output, (0, 2, 1, 3)) def _get_scale_factor(scale: Optional[float], head_size: int) -> float: From d172d0231b09bac343fc11c8e62785cdf25989a9 Mon Sep 17 00:00:00 2001 From: Nicolas Macchioni Date: Tue, 16 Sep 2025 04:07:12 +0000 Subject: [PATCH 280/693] [pcache] Cache and AsyncCache implementations (#162777) Summary: Implemented caching abstractions: `Cache` and `AsyncCache`. `Cache` provides an abstraction for defining simple key -> value stores with get and put functionality. We propose using `Cache` for implementations with very low (microseconds) overhead, for example an in-memory cache. `AsyncCache` provides an abstraction for defining simple key -> value stores with asynchronous get and put functionality. We propose using `AsyncCache` for implementations with medium to high (> millisecond) overhead, for example an on-disk cache. We provide an initial extension of `Cache` in the form of `InMemoryCache`. `InMemoryCache` provides fast, in-memory caching that can be later used to memoize more expensive cache accesses. `InMemoryCache` also provides a custom constructor `InMemoryCache.from_env_var` that can be used to pre-populate the in-memory cache, which will be helpful for enabling determinism in the future. We also provides extensions of `AsyncCache`. `OnDiskCache` subclasses `AsyncCache` and serves as a generic on-disk caching implementation with atomic, write-once guarantees. `OnDiskCache` is semi-generic, allowing subclassing to alter the output directory. `InductorOnDiskCache` subclasses `OnDiskCache` to create an Inductor-specific on-disk cache that outputs to Inductor's default caching directory. Test Plan: `Cache` Tests: 1. Get -> Set -> Get - Checks that `get(key)` returns `None` when `key` is not cached, and that after calling `put(key, value)` subsequent `get(key)` calls return `value` 2. Set -> Set - Checks that with duplicated `set(key, value)` calls only the initial call is successful 3. From env var - Checks that constructing an `InMemoryCache` from an environment variable works. `AsyncCache` Tests: 1. Get -> Set -> Get - Same as `Cache` test, but checks both with synchronous and asynchronous execution 2. Set -> Set - Same as `Cache` test, but checks both with synchronous and asynchronous execution 3. Set -> Set Concurrent - Checks that of two concurrent `set(key, value)` operations, only one passes ``` cd ~/fbsource/fbcode && buck test mode/opt //caffe2/test/inductor:pcache ``` {F1981926248} Rollback Plan: Differential Revision: D82269762 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162777 Approved by: https://github.com/masnesral, https://github.com/aorenste --- test/inductor/test_pcache.py | 297 +++++++++++++++++++++++++++++++ torch/_inductor/pcache.py | 330 +++++++++++++++++++++++++++++++++++ 2 files changed, 627 insertions(+) create mode 100644 test/inductor/test_pcache.py create mode 100644 torch/_inductor/pcache.py diff --git a/test/inductor/test_pcache.py b/test/inductor/test_pcache.py new file mode 100644 index 0000000000000..1a260b3ceb912 --- /dev/null +++ b/test/inductor/test_pcache.py @@ -0,0 +1,297 @@ +# Owner(s): ["module: inductor"] +from __future__ import annotations + +from concurrent.futures import Future, ThreadPoolExecutor +from os import environ +from random import randint +from typing import TYPE_CHECKING +from typing_extensions import Self + +from torch._inductor import pcache +from torch._inductor.test_case import run_tests, TestCase +from torch.testing._internal.common_utils import ( + instantiate_parametrized_tests, + parametrize, +) + + +if TYPE_CHECKING: + from collections.abc import Generator + + +str_key_gen: Generator[str, None, None] = ( + f"dummy_key_{randint(0, 100000)}" for _ in iter(int, 1) +) +bytes_value_gen: Generator[bytes, None, None] = ( + f"dummy_value_{randint(0, 100000)}".encode() for _ in iter(int, 1) +) + + +Caches: list[type[pcache.Cache]] = [pcache.InMemoryCache] +AsyncCaches: list[type[pcache.AsyncCache]] = [pcache.InductorOnDiskCache] + + +@instantiate_parametrized_tests +class CacheTest(TestCase): + @parametrize("Cache", Caches) + def test_get_insert_get(self: Self, Cache: type[pcache.Cache]) -> None: + key: str = next(str_key_gen) + value: bytes = next(bytes_value_gen) + + cache: pcache.Cache = Cache() + + # make sure our key is fresh + while cache.get(key) is not None: + key = next(str_key_gen) + + # first get should return None, no hit + self.assertIsNone(cache.get(key)) + # insert should return True, having set key -> value + self.assertTrue(cache.insert(key, value)) + # second get should return value, hit + self.assertEqual(cache.get(key), value) + + @parametrize("Cache", Caches) + def test_insert_insert(self: Self, Cache: type[pcache.Cache]) -> None: + key: str = next(str_key_gen) + value: bytes = next(bytes_value_gen) + + cache: pcache.Cache = Cache() + + if cache.get(key) is None: + # if key isn't already cached, cache it + self.assertTrue(cache.insert(key, value)) + + # second insert should not update the value + self.assertFalse(cache.insert(key, value)) + + def test_in_memory_cache_from_env_var( + self: Self, Cache: type[pcache.InMemoryCache] = pcache.InMemoryCache + ) -> None: + key_1: str = next(str_key_gen) + value_1: bytes = next(bytes_value_gen) + + key_2: str = next(str_key_gen) + while key_2 == key_1: + key_2 = next(str_key_gen) + value_2: bytes = next(bytes_value_gen) + + key_3: str = next(str_key_gen) + while key_3 in (key_1, key_2): + key_3 = next(str_key_gen) + + env_var = "INMEMORYCACHE_TEST" + env_val = f"{key_1},{value_1!r};{key_2},{value_2!r}" + environ[env_var] = env_val + + cache = Cache.from_env_var(env_var) + + # key_1 -> value_1 is in env_val, so we should hit + self.assertEqual(cache.get(key_1), value_1) + # key_2 -> value_2 is in env_val, so we should hit + self.assertEqual(cache.get(key_2), value_2) + # key_3 -> value_3 is not in env_val, so we should miss + self.assertIsNone(cache.get(key_3)) + + def test_in_memory_cache_from_env_var_bad_kv_pair( + self: Self, Cache: type[pcache.InMemoryCache] = pcache.InMemoryCache + ) -> None: + key_1: str = next(str_key_gen) + value_1: bytes = next(bytes_value_gen) + + env_var = "INMEMORYCACHE_TEST" + # missing "," delimiter + env_val = f"{key_1}{value_1!r};" + kv_pair = env_val[:-1] + environ[env_var] = env_val + + with self.assertRaisesRegex( + ValueError, + f"Malformed kv_pair {kv_pair!r} in env_var {env_var!r}, missing comma separator!", + ): + _ = Cache.from_env_var(env_var) + + def test_in_memory_cache_from_env_var_bad_value( + self: Self, Cache: type[pcache.InMemoryCache] = pcache.InMemoryCache + ) -> None: + key_1: str = next(str_key_gen) + # exclude b' prefix and ' suffix + value_1: str = "bad_value" + + env_var = "INMEMORYCACHE_TEST" + env_val = f"{key_1},{value_1};" + kv_pair = env_val[:-1] + environ[env_var] = env_val + + with self.assertRaisesRegex( + ValueError, + f"Malformed value {value_1!r} in kv_pair {kv_pair!r}, expected b'...' format!", + ): + _ = Cache.from_env_var(env_var) + + # not encoded + value_2: str = f"b'{chr(256)}'" + + env_val = f"{key_1},{value_2};" + kv_pair = env_val[:-1] + environ[env_var] = env_val + + with self.assertRaisesRegex( + ValueError, f"Malformed value {value_2!r} in kv_pair {kv_pair!r}!" + ): + _ = Cache.from_env_var(env_var) + + def test_in_memory_cache_from_env_var_one_key_many_values( + self: Self, Cache: type[pcache.InMemoryCache] = pcache.InMemoryCache + ) -> None: + key_1: str = next(str_key_gen) + value_1: bytes = next(bytes_value_gen) + value_2: bytes = next(bytes_value_gen) + + env_var = "INMEMORYCACHE_TEST" + env_val = f"{key_1},{value_1!r};{key_1},{value_2!r}" + environ[env_var] = env_val + + with self.assertRaisesRegex( + ValueError, + f"Duplicated values for key {key_1!r}, got {value_1!r} and {value_2!r}!", + ): + _ = Cache.from_env_var(env_var) + + +@instantiate_parametrized_tests +class AsyncCacheTest(TestCase): + @parametrize("AsyncCache", AsyncCaches) + @parametrize("Executor", [ThreadPoolExecutor, None]) + def test_get_insert_get( + self: Self, + AsyncCache: type[pcache.AsyncCache], + Executor: type[ThreadPoolExecutor] | None = None, + ) -> None: + key: str = next(str_key_gen) + value: bytes = next(bytes_value_gen) + + async_cache: pcache.AsyncCache = AsyncCache() + executor: ThreadPoolExecutor = Executor() if Executor is not None else None + + if executor is None: + # make sure our key is fresh + while async_cache.get(key) is not None: + key = next(str_key_gen) + + # first get should miss + self.assertIsNone(async_cache.get(key)) + # insert should set key -> value mapping + self.assertTrue(async_cache.insert(key, value)) + # second get should hit + self.assertEqual(async_cache.get(key), value) + else: + # make sure our key is fresh + while async_cache.get_async(key, executor).result() is not None: + key = next(str_key_gen) + + # first get should miss + self.assertIsNone(async_cache.get_async(key, executor).result()) + # insert should set key -> value mapping + self.assertTrue(async_cache.insert_async(key, value, executor).result()) + # second get should hit + self.assertEqual(async_cache.get_async(key, executor).result(), value) + executor.shutdown() + + @parametrize("AsyncCache", AsyncCaches) + @parametrize("Executor", [ThreadPoolExecutor, None]) + def test_insert_insert( + self: Self, + AsyncCache: type[pcache.AsyncCache], + Executor: type[ThreadPoolExecutor] | None = None, + ) -> None: + key: str = next(str_key_gen) + value: bytes = next(bytes_value_gen) + + async_cache: pcache.AsyncCache = AsyncCache() + executor: ThreadPoolExecutor = Executor() if Executor is not None else None + + if executor is None: + if async_cache.get(key) is None: + # set key -> value mapping if unset + self.assertTrue(async_cache.insert(key, value)) + # second insert should not override the prior insert + self.assertFalse(async_cache.insert(key, value)) + else: + if async_cache.get_async(key, executor).result() is None: + # set key -> value mapping if unset + self.assertTrue(async_cache.insert_async(key, value, executor).result()) + # second insert should not override the prior insert + self.assertFalse(async_cache.insert_async(key, value, executor).result()) + executor.shutdown() + + @parametrize("AsyncCache", AsyncCaches) + def test_concurrent_insert_insert( + self: Self, + AsyncCache: type[pcache.AsyncCache], + Executor: type[ThreadPoolExecutor] = ThreadPoolExecutor, + ) -> None: + key: str = next(str_key_gen) + value: bytes = next(bytes_value_gen) + + async_cache: pcache.AsyncCache = AsyncCache() + executor: ThreadPoolExecutor = Executor() + + # make sure our key is fresh + while async_cache.get_async(key, executor).result() is not None: + key = next(str_key_gen) + + insert_1: Future[bool] = async_cache.insert_async(key, value, executor) + insert_2: Future[bool] = async_cache.insert_async(key, value, executor) + + # only one insert should succeed + self.assertTrue(insert_1.result() ^ insert_2.result()) + executor.shutdown() + + @parametrize("AsyncCache", AsyncCaches) + def test_concurrent_get_insert( + self: Self, + AsyncCache: type[pcache.AsyncCache], + Executor: type[ThreadPoolExecutor] = ThreadPoolExecutor, + ) -> None: + key: str = next(str_key_gen) + value: bytes = next(bytes_value_gen) + + async_cache: pcache.AsyncCache = AsyncCache() + executor: ThreadPoolExecutor = Executor() + + # make sure our key is fresh + while async_cache.get_async(key, executor).result() is not None: + key = next(str_key_gen) + + # try get first + get_1: Future[bytes | None] = async_cache.get_async(key, executor) + insert_1: Future[bool] = async_cache.insert_async(key, value, executor) + + if get_1.result() is not None: + # if the get succeeded it should return the value stored by the insert + self.assertEqual(get_1.result(), value) + + # either way the insert should succeed as the key is fresh + self.assertTrue(insert_1.result()) + + # make sure our key is fresh + while async_cache.get_async(key, executor).result() is not None: + key = next(str_key_gen) + + # try insert first + insert_2: Future[bool] = async_cache.insert_async(key, value, executor) + get_2: Future[bytes | None] = async_cache.get_async(key, executor) + + if get_2.result() is not None: + # if the get succeeded it should return the value stored by the insert + self.assertEqual(get_2.result(), value) + + # either way the insert should succeed as the key is fresh + self.assertTrue(insert_2.result()) + + executor.shutdown() + + +if __name__ == "__main__": + run_tests() diff --git a/torch/_inductor/pcache.py b/torch/_inductor/pcache.py new file mode 100644 index 0000000000000..6112eb2f002cd --- /dev/null +++ b/torch/_inductor/pcache.py @@ -0,0 +1,330 @@ +from __future__ import annotations + +from functools import cached_property +from os import getenv +from pathlib import Path +from typing import Generic, TYPE_CHECKING, TypeVar +from typing_extensions import override, Self + +from torch.utils._filelock import FileLock + + +if TYPE_CHECKING: + from concurrent.futures import Future, ThreadPoolExecutor + + +Key = TypeVar("Key") +Value = TypeVar("Value") + + +class Cache(Generic[Key, Value]): + """ + Abstract base class for cache implementations. + + Provides the interface for basic synchronous get and insert methods for storing and retrieving data. + Subclasses must implement both methods. + + Note: + - Not guaranteed to be thread-safe. + - For asynchronous and thread-safe cache, see `AsyncCache`. + + Methods: + get(key): Retrieve a value by key. + insert(key, value): Insert a value if the key does not already exist. + """ + + def get(self: Self, key: Key) -> Value | None: + """ + Retrieve the value associated with the given key from the cache. + + Args: + key (Key): The key used to query the cache. + + Returns: + Value | None: The value associated with the key, or None if not found. + + Raises: + NotImplementedError: If not implemented by a subclass. + """ + raise NotImplementedError + + def insert(self: Self, key: Key, value: Value) -> bool: + """ + Store the given value in the cache with the associated key if the key does + not already exist in the cache, otherwise do nothing. + + Args: + key (Key): The key to associate with the value. + value (Value): The value to be stored in the cache. + + Returns: + bool: True if the value was stored successfully, False if the key already exists. + + Raises: + NotImplementedError: If not implemented by a subclass. + """ + raise NotImplementedError + + +class InMemoryCache(Cache[str, bytes]): + """ + In-memory cache implementation. + + Stores cache data in a dictionary for fast lookups and insertions. + Not thread-safe. + """ + + def __init__(self: Self) -> None: + """ + Initialize the in-memory cache. + """ + self._cache: dict[str, bytes] = {} + + @override + def get(self: Self, key: str) -> bytes | None: + """ + Retrieve the value associated with the given key from the cache. + + Args: + key (str): The key used to query the cache. + + Returns: + bytes | None: The value associated with the key, or None if not found. + """ + return self._cache.get(key) + + @override + def insert(self: Self, key: str, value: bytes) -> bool: + """ + Store the given value in the cache with the associated key if the key does + not already exist in the cache, otherwise do nothing. + + Args: + key (str): The key to associate with the value. + value (bytes): The value to be stored in the cache. + + Returns: + bool: True if the value was stored successfully, False if the key already exists. + """ + if key in self._cache: + return False + self._cache[key] = value + return True + + @classmethod + def from_env_var(cls, env_var: str) -> Self: + """ + Create a new in-memory cache instance from an environment variable. + + The environment variable should contain key-value pairs separated by ';', + with each pair formatted as 'key,value'. The value should be a string + representation of bytes (e.g., b'...'). + + Args: + env_var (str): The environment variable containing cache data. + + Returns: + InMemoryCache: A new in-memory cache instance populated with data from the environment variable. + + Raises: + ValueError: If a key is associated with two distinct values, or if the environment variable + is malformed (e.g., missing comma, value not a bytes string). + """ + cache: Self = cls() + env_val: str | None = getenv(env_var, None) + + if env_val is not None: + for kv_pair in env_val.split(";"): + if not kv_pair: + # can happen if env_val is an empty string, or ends with ; + continue + try: + key, raw_value = kv_pair.split(",", 1) + except ValueError as err: + raise ValueError( + f"Malformed kv_pair {kv_pair!r} in env_var {env_var!r}, missing comma separator!" + ) from err + # check that raw_value is a str repr of bytes + if (not raw_value.startswith("b'")) or (not raw_value.endswith("'")): + raise ValueError( + f"Malformed value {raw_value!r} in kv_pair {kv_pair!r}, expected b'...' format!" + ) + # remove b' prefix and ' suffix + str_value = raw_value[2:-1] + try: + # make sure the value is legitimately encoded + value = bytes([ord(char) for char in str_value]) + except ValueError as err: + raise ValueError( + f"Malformed value {raw_value!r} in kv_pair {kv_pair!r}!" + ) from err + # duplicates are ok, so long as the key does not point to two distinct values + if (not cache.insert(key, value)) and (cache.get(key) != value): + raise ValueError( + f"Duplicated values for key {key!r}, got {cache.get(key)!r} and {value!r}!" + ) + + return cache + + +class AsyncCache(Cache[Key, Value]): + """ + Abstract base class for asynchronous, thread-safe cache implementations. + + Provides synchronous get/insert methods and additional asynchronous (_async) methods + for concurrent access using a ThreadPoolExecutor. All methods are thread-safe. + + Note: + - Use this class or its subclasses when thread safety or async access is required. + - The _async methods return concurrent.futures.Future objects. + + Methods: + get(key): Retrieve a value by key. + get_async(key, executor): Asynchronously retrieve a value by key. + insert(key, value): Insert a value. + insert_async(key, value, executor): Asynchronously insert a value. + """ + + def get_async( + self: Self, key: Key, executor: ThreadPoolExecutor + ) -> Future[Value | None]: + """ + Retrieve the value associated with the given key from the cache asynchronously. + + Args: + key (Key): The key used to query the cache. + executor (ThreadPoolExecutor): The executor to use for asynchronous execution. + + Returns: + Future[Value | None]: A Future representing the result of the asynchronous operation. + """ + return executor.submit(self.get, key) + + def insert_async( + self: Self, key: Key, value: Value, executor: ThreadPoolExecutor + ) -> Future[bool]: + """ + Store the given value in the cache with the associated key if the key does + not already exist in the cache, otherwise do nothing, asynchronously. + + Args: + key (Key): The key to associate with the value. + value (Value): The value to be stored in the cache. + executor (ThreadPoolExecutor): The executor to use for asynchronous execution. + + Returns: + Future[bool]: A Future representing the result of the asynchronous operation. + """ + return executor.submit(self.insert, key, value) + + +class OnDiskCache(AsyncCache[str, bytes]): + """ + Abstract base class for on-disk cache implementations. + + Provides synchronous and asynchronous get/insert methods for storing and retrieving data on disk. + All methods are thread-safe. + + Methods: + get(key): Retrieve a value by key from disk. + get_async(key, executor): Asynchronously retrieve a value by key from disk. + insert(key, value): Insert a value on disk. + insert_async(key, value, executor): Asynchronously insert a value on disk. + """ + + @property + def base_dir(self: Self) -> Path: + """ + Get the base directory for the on-disk cache. + + Returns: + Path: The base directory for the on-disk cache. + + Raises: + NotImplementedError: If not implemented by a subclass. + """ + raise NotImplementedError + + def _fpath_from_key(self: Self, key: str) -> Path: + """ + Get the file path associated with the given key. + + Args: + key (str): The key used to query the cache. + + Returns: + Path: The file path associated with the key. + """ + return self.base_dir / key + + def _flock_from_fpath(self: Self, fpath: Path) -> FileLock: + """ + Get the file lock associated with the given file path. + + Args: + fpath (Path): The file path to lock. + + Returns: + FileLock: The file lock associated with the file path. + """ + return FileLock(str(fpath) + ".lock") + + @override + def get(self: Self, key: str) -> bytes | None: + """ + Retrieve the value associated with the given key from the cache on disk. + + Args: + key (str): The key used to query the cache. + + Returns: + bytes | None: The value associated with the key, or None if not found. + """ + fpath = self._fpath_from_key(key) + flock = self._flock_from_fpath(fpath) + with flock: + return fpath.read_bytes() if fpath.is_file() else None + + @override + def insert(self: Self, key: str, value: bytes) -> bool: + """ + Store the given value in the cache with the associated key on disk. + + Args: + key (str): The key to associate with the value. + value (bytes): The value to be stored in the cache. + + Returns: + bool: True if the value was stored successfully, False if the key already exists. + """ + fpath = self._fpath_from_key(key) + flock = self._flock_from_fpath(fpath) + fpath.parent.mkdir(parents=True, exist_ok=True) + try: + # "x" mode is exclusive creation, meaning the file will be created + # iff the file does not already exist (atomic w/o overwrite) + with flock as _, open(fpath, "xb") as fp: + fp.write(value) + except FileExistsError: + return False + return True + + +class InductorOnDiskCache(OnDiskCache): + """ + On-disk cache implementation for Inductor. + + Uses the default cache directory provided by Inductor. + """ + + @cached_property + def base_dir(self: Self) -> Path: + """ + Get the base directory for the on-disk cache. + + Returns: + Path: The base directory for the on-disk cache. + """ + from torch._inductor.runtime.runtime_utils import default_cache_dir + + return Path(default_cache_dir(), "pcache") From 6c0fd747af10eb0a6936a138cb9a4e04b039be22 Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Tue, 16 Sep 2025 04:25:01 +0000 Subject: [PATCH 281/693] [vllm hash update] update the pinned vllm hash (#162928) This PR is auto-generated nightly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/nightly.yml). Update the pinned vllm hash. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162928 Approved by: https://github.com/pytorchbot --- .github/ci_commit_pins/vllm.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ci_commit_pins/vllm.txt b/.github/ci_commit_pins/vllm.txt index f1a4e5deb8d92..3b9e88ac336eb 100644 --- a/.github/ci_commit_pins/vllm.txt +++ b/.github/ci_commit_pins/vllm.txt @@ -1 +1 @@ -973c9d01da863cac9c51e8a5c0d390fc84b84fbc +5bcc153d7bf69ef34bc5788a33f60f1792cf2861 From 2c4562881312d7cc3c9ad60c541ac091cd5f2136 Mon Sep 17 00:00:00 2001 From: Phillip Liu Date: Tue, 16 Sep 2025 04:46:05 +0000 Subject: [PATCH 282/693] [Flight Recorder][WP] Added mismatch tail as an arg (#162991) Summary: Mismatch tail is used as a fixed variable and there are cases that there are more than 10 mismatches FR gives up producing results (e.g. https://fburl.com/ai_infra/7gjl5ucb). This diff added the mismatch tail in the parsed args so make this configuarble. Also tho the variable name is `mismatch_tail`(last 10) it is used as `mismatch_head` (the first 10). Updated it to be `num_mismatch_to_print` Test Plan: `buck2 run @//mode/opt //caffe2/fb/flight_recorder:fr_trace -- --mast_job_id aps-ctx_fm_pipeline_change-1c8ea38a94 --mast_job_version 0 --mast_job_attempt 2 --bucket tlcm_log_blob --world_size 128 --dump_file_name_offset 0 --allow-incomplete-ranks --num_mismatch_to_print 20 1>out 2>err` Confirm no error and output 20 mismatches. Rollback Plan: Differential Revision: D82335995 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162991 Approved by: https://github.com/fduwjj --- tools/flight_recorder/components/builder.py | 6 +++--- tools/flight_recorder/components/config_manager.py | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/flight_recorder/components/builder.py b/tools/flight_recorder/components/builder.py index e0aaef31c1c32..b2917a557b4da 100644 --- a/tools/flight_recorder/components/builder.py +++ b/tools/flight_recorder/components/builder.py @@ -134,6 +134,7 @@ def build_collectives( _memberships: dict[str, set[Any]], _pg_guids: dict[tuple[str, int], str], version: str, + mismatch_cap: int = 10, ) -> tuple[list[Traceback], list[Collective], list[NCCLCall]]: """ groups, memberships are the non-flat dicts that are indexable @@ -171,7 +172,6 @@ def build_collectives( # once we find one mismatch, we stop pairing up collectives since the pairing is possibly incorrect # instead, just record the remaining ops as NCCLCalls mismatch = {_groups[g].id: 0 for g in _groups} - MISMATCH_TAIL = 10 # For best effort partial analysis. dumps_ranks = {int(key) for key in all_entries.keys()} @@ -365,7 +365,7 @@ def build_collectives( ) ) - if mismatch[pg_name] > MISMATCH_TAIL: + if mismatch[pg_name] > mismatch_cap: logger.error( "Too many mismatches for process_group %s: %s aborting", pg_name, desc ) @@ -412,7 +412,7 @@ def build_db( check_no_missing_dump_files(entries, memberships) tracebacks, collectives, nccl_calls = build_collectives( - entries, _groups, _memberships, _pg_guids, version + entries, _groups, _memberships, _pg_guids, version, args.mismatch_cap ) logger.debug("built collectives, nccl_calls") if args.verbose: diff --git a/tools/flight_recorder/components/config_manager.py b/tools/flight_recorder/components/config_manager.py index abd7f5372133c..1b4eafc3631d2 100644 --- a/tools/flight_recorder/components/config_manager.py +++ b/tools/flight_recorder/components/config_manager.py @@ -68,6 +68,12 @@ def __init__(self: "JobConfig"): self.parser.add_argument("-j", "--just_print_entries", action="store_true") self.parser.add_argument("-v", "--verbose", action="store_true") self.parser.add_argument("--print_stack_trace", action="store_true") + self.parser.add_argument( + "--mismatch_cap", + type=int, + default=10, + help="Maximum number of mismatches we print (from earliest).", + ) def parse_args( self: "JobConfig", args: Optional[Sequence[str]] From b68a5115a40307c98db64a062599965a5cdce2df Mon Sep 17 00:00:00 2001 From: Scott Rostrup Date: Tue, 16 Sep 2025 04:46:07 +0000 Subject: [PATCH 283/693] Workaround for mtia double init issue in has_triton (#162974) Summary: This change adds a new environment variable (`TORCHINDUCTOR_TRITON_DISABLE_DEVICE_DETECTION`) and configuration in `torch._inductor.config` which can be set to `"1"` to allow a user to disable triton's device detection logic in [torch/utils/_triton.py:has_triton()](https://github.com/pytorch/pytorch/blob/c9e57d7e9f326e427fc4ae5c318fd017cd4b75a9/torch/utils/_triton.py#L128). This function is used at import scope in several places but the function has a side effect of initializing the mtia device if it is available which is causing some of our autotuning workflows to crash. Worth noting that when enabled this configuration disables all device detection not just mtia and this is because the logic in has_triton will initialize the mtia device as a side effect even when checking for a cuda or other device via the [get_interface_for_device()](https://github.com/pytorch/pytorch/blob/c9e57d7e9f326e427fc4ae5c318fd017cd4b75a9/torch/_dynamo/device_interface.py#L570) function. I've tagged it `topic: not user facing` since I don't anticipate any outside of meta users making use of this, however this is my first PR here, so please indicate if it should be handled differently. Test Plan: This has been tested in the context of internal workflows. Differential Revision: D82347853 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162974 Approved by: https://github.com/xmfan --- torch/_inductor/config.py | 5 +++++ torch/utils/_triton.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py index 13512b19a05fa..bea2ab0bbce98 100644 --- a/torch/_inductor/config.py +++ b/torch/_inductor/config.py @@ -467,6 +467,11 @@ def prologue_fusion_enabled() -> bool: == "1" ) +# Disable triton from trying to initialize and detect devices on the host +triton_disable_device_detection = ( + os.environ.get("TORCHINDUCTOR_TRITON_DISABLE_DEVICE_DETECTION", "0") == "1" +) + # enable inductor graph partition to allow multiple inductor graphs for the same dynamo graph graph_partition: bool = ( os.environ.get("TORCHINDUCTOR_GRAPH_PARTITION", "1" if not is_fbcode() else "0") diff --git a/torch/utils/_triton.py b/torch/utils/_triton.py index 9901fe58d1f21..5f0ca5b4eff8d 100644 --- a/torch/utils/_triton.py +++ b/torch/utils/_triton.py @@ -144,6 +144,11 @@ def has_triton() -> bool: if not has_triton_package(): return False + from torch._inductor.config import triton_disable_device_detection + + if triton_disable_device_detection: + return False + from torch._dynamo.device_interface import get_interface_for_device def cuda_extra_check(device_interface: Any) -> bool: From 9009c4da3917788a5cccd007787becf7f01bc4e2 Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Mon, 15 Sep 2025 12:00:45 -0700 Subject: [PATCH 284/693] [functional] Avoid duplicate custom get_device call in constructor (#162889) Trying to reduce the number of `__torch_dispatch__` calls of FakeTensorMode in the AOT metadata collection pass. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162889 Approved by: https://github.com/Lucaskabela, https://github.com/zou3519 --- aten/src/ATen/FunctionalStorageImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aten/src/ATen/FunctionalStorageImpl.cpp b/aten/src/ATen/FunctionalStorageImpl.cpp index a5512818343fb..2cf8d9727f658 100644 --- a/aten/src/ATen/FunctionalStorageImpl.cpp +++ b/aten/src/ATen/FunctionalStorageImpl.cpp @@ -102,7 +102,7 @@ FunctionalStorageImpl::FunctionalStorageImpl(const Tensor& base) // SparseTensorImpl has no storage, so we cannot query its nbytes. // (original_storage_size is only used for storage resizing in fsdp anyway, which does not apply to sparse) // Same for XLA - if (base.unsafeGetTensorImpl()->has_storage() && base.device().type() != c10::DeviceType::XLA) { + if (base.unsafeGetTensorImpl()->has_storage() && data_ptr().device().type() != c10::DeviceType::XLA) { original_storage_size_ = base.unsafeGetTensorImpl()->unsafe_storage().unsafeGetStorageImpl()->sym_nbytes(); } else { original_storage_size_ = -1; From 9786243b64f32a18eddefbaa51318576d8b16735 Mon Sep 17 00:00:00 2001 From: "Cui, Yifeng" Date: Tue, 16 Sep 2025 06:30:48 +0000 Subject: [PATCH 285/693] Update torch-xpu-ops commit pin (#162804) Update the torch-xpu-ops commit to [intel/torch-xpu-ops@d8c3ee](https://github.com/intel/torch-xpu-ops/commit/d8c3eefc297193cf9e0888a7d8ff32dc74da0793), includes: - Optimize adaptive average pool for channel-last memory format - Add unregister wait_tensor - Replace deprecated `[[intel::reqd_sub_group_size(SgSize)]]` with `[[sycl::reqd_sub_group_size(SIMD)]]` and remove unnecessary attributes - Revert "Roll back to original usage of sycl::get_kernel_bundle" Pull Request resolved: https://github.com/pytorch/pytorch/pull/162804 Approved by: https://github.com/EikanWang --- third_party/xpu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/xpu.txt b/third_party/xpu.txt index 74925f898e74b..ed84e6812d9b2 100644 --- a/third_party/xpu.txt +++ b/third_party/xpu.txt @@ -1 +1 @@ -83c5a5a5516d498dde2ae131ca2d10a4abb94cfb +d8c3eefc297193cf9e0888a7d8ff32dc74da0793 From 1115749da7e64c0f434f08f602d352ee8c49e2e9 Mon Sep 17 00:00:00 2001 From: Shangdi Yu Date: Tue, 16 Sep 2025 06:56:00 +0000 Subject: [PATCH 286/693] Fix provenance tracking kernel name for fallback kernels (#162628) Summary: as title `kernel.cpp_kernel_name` is something like `at::_ops::_scaled_dot_product_efficient_attention::call`, but the actual kernel name we want is `aoti_torch_cuda__scaled_dot_product_efficient_attention` Differential Revision: D82142287 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162628 Approved by: https://github.com/angelayi, https://github.com/desertfire --- torch/_inductor/codegen/cpp_wrapper_cpu.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch/_inductor/codegen/cpp_wrapper_cpu.py b/torch/_inductor/codegen/cpp_wrapper_cpu.py index d9be6cc71eb61..df162b806b73a 100644 --- a/torch/_inductor/codegen/cpp_wrapper_cpu.py +++ b/torch/_inductor/codegen/cpp_wrapper_cpu.py @@ -1364,9 +1364,10 @@ def generate_c_shim_fallback_kernel( debug_handle = None if config.trace.provenance_tracking_level != 0: + shim_fn = self.get_c_shim_func_name(fallback_kernel.cpp_kernel_name, device) # type: ignore[arg-type] debug_handle = set_kernel_post_grad_provenance_tracing( fallback_kernel, - fallback_kernel.cpp_kernel_name, # type: ignore[arg-type] + shim_fn, is_extern=True, ) self.generate_c_shim_extern_kernel_call( From d2ecddf1a35901c8a7df4947891c24a9db27a6d1 Mon Sep 17 00:00:00 2001 From: Kevin Fu Date: Tue, 16 Sep 2025 06:56:04 +0000 Subject: [PATCH 287/693] [PT2]: Overriding Tensor device by SubmodNameToDevice (#162144) Summary: A temporarily solution mainly for weights that are not moved to cuda in fake mode during publishing, but runs on cuda in serving. This has some overlap with placement, but with 2 differences: 1. OverrideWeightsDevice only changes weights, not graph. 2. Placement only handles mapping between non-empty cuda indices, while here we override everything as submodNameToDevice is the ground truth. Test Plan: ICE replayer with custom package: https://www.internalfb.com/intern/unidash/dashboard/ads_infra_cost_estimation/model_infra_cost_estimation/?e[select_ESTIMATION_RUN_ID]=ICE_kevinqfu_1756939411c164_replayeripnext_00 Rollback Plan: Differential Revision: D81284723 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162144 Approved by: https://github.com/henryoier, https://github.com/SherlockNoMad --- torch/nativert/graph/Graph.cpp | 30 ++++++++++++++++++++++++++++++ torch/nativert/graph/Graph.h | 5 +++++ torch/nativert/graph/TensorMeta.h | 5 +++++ 3 files changed, 40 insertions(+) diff --git a/torch/nativert/graph/Graph.cpp b/torch/nativert/graph/Graph.cpp index ee5fbaca11b91..260af58a2a492 100644 --- a/torch/nativert/graph/Graph.cpp +++ b/torch/nativert/graph/Graph.cpp @@ -683,6 +683,36 @@ void Graph::applyDevicePlacement(const Placement& placement) { } } +void Graph::overrideWeightsDevice( + const std::unordered_map>& + submodNameToDevice) { + for (auto& [weightName, weightMeta] : weightsMeta_) { + for (auto& [name, device] : submodNameToDevice) { + if (device.has_value() && weightMeta.device() != device && + c10::starts_with(weightName, name) && + (weightName == name || weightName[name.length()] == '.')) { + LOG(INFO) << "Overriding " << weightName << " from " + << weightMeta.device() << " to device " << device.value(); + weightMeta.setDevice(device.value()); + break; + } + } + } + + for (auto& [tensorName, tensorMeta] : tensorValuesMeta_) { + for (auto& [name, device] : submodNameToDevice) { + if (device.has_value() && tensorMeta.device() != device && + c10::starts_with(tensorName, name) && + (tensorName == name || tensorName[name.length()] == '.')) { + LOG(INFO) << "Overriding " << tensorName << " from " + << tensorMeta.device() << " to device " << device.value(); + tensorMeta.setDevice(device.value()); + break; + } + } + } +} + Node* Graph::nodeAfter(Node* n) { TORCH_CHECK(n->owningGraph() == this); if (n == outputNode_) { diff --git a/torch/nativert/graph/Graph.h b/torch/nativert/graph/Graph.h index a86e973621994..49335ec6aebd9 100644 --- a/torch/nativert/graph/Graph.h +++ b/torch/nativert/graph/Graph.h @@ -442,6 +442,11 @@ class Graph { void applyDevicePlacement(const Placement& placement); + // Override all weights in the graph if matching name is found in the map. + void overrideWeightsDevice( + const std::unordered_map>& + submodNameToDevice); + std::string getUniqueValueName(); ValueId getNextValueId() { diff --git a/torch/nativert/graph/TensorMeta.h b/torch/nativert/graph/TensorMeta.h index 7fe9a88c731af..5d1d39e5d2d60 100644 --- a/torch/nativert/graph/TensorMeta.h +++ b/torch/nativert/graph/TensorMeta.h @@ -64,6 +64,11 @@ class TensorMeta { return device_; } + // override device according to placement + void setDevice(c10::Device device) { + device_ = device; + } + c10::TensorOptions asTensorOptions() const { return c10::TensorOptions().dtype(dtype_).layout(layout_).requires_grad( requiresGrad_); From 29ea6254a066ab59dbf0c9d9d714fc8f95a4458a Mon Sep 17 00:00:00 2001 From: can-gaa-hou Date: Tue, 16 Sep 2025 06:58:59 +0000 Subject: [PATCH 288/693] [Bug] Add more boundary check for FractionalMaxPool3d (#161876) This PR aims to fix the bug mentioned at [#161853](https://github.com/pytorch/pytorch/issues/161853#issuecomment-3240695121) Pull Request resolved: https://github.com/pytorch/pytorch/pull/161876 Approved by: https://github.com/malfet --- aten/src/ATen/native/FractionalMaxPool3d.cpp | 6 ++--- test/nn/test_pooling.py | 25 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/aten/src/ATen/native/FractionalMaxPool3d.cpp b/aten/src/ATen/native/FractionalMaxPool3d.cpp index d1fa7092f5f15..68328018b24b4 100644 --- a/aten/src/ATen/native/FractionalMaxPool3d.cpp +++ b/aten/src/ATen/native/FractionalMaxPool3d.cpp @@ -67,13 +67,13 @@ TORCH_PRECOMPUTE_META_FUNC(fractional_max_pool3d)( int64_t inputH = input_.size(heightDim); int64_t inputW = input_.size(widthDim); - TORCH_CHECK(outputT + poolSizeT - 1 < inputT, + TORCH_CHECK((poolSizeT <= inputT) && (outputT + poolSizeT - 1 < inputT), "fractional_max_pool3d_out(): pool time ", poolSizeT, " too large relative to input time ", inputT); - TORCH_CHECK(outputW + poolSizeW - 1 < inputW, + TORCH_CHECK((poolSizeW <= inputW) && (outputW + poolSizeW - 1 < inputW), "fractional_max_pool3d_out(): pool width ", poolSizeW, " too large relative to input width ", inputW); - TORCH_CHECK(outputH + poolSizeH - 1 < inputH, + TORCH_CHECK((poolSizeH <= inputH) && (outputH + poolSizeH - 1 < inputH), "fractional_max_pool3d_out(): pool height ", poolSizeH, " too large relative to input height ", inputH); diff --git a/test/nn/test_pooling.py b/test/nn/test_pooling.py index a8f77df22d311..3567ecd8e55b9 100644 --- a/test/nn/test_pooling.py +++ b/test/nn/test_pooling.py @@ -662,6 +662,31 @@ def test_FractionalMaxPool3d_errors(self, device): nn.FractionalMaxPool3d( [0, 0, 0], output_size=[1, 1, 1], _random_samples=samples ) + samples = torch.randn(1, 3, 10, 10, 10) + with self.assertRaisesRegex(RuntimeError, "too large relative to"): + nn.FractionalMaxPool3d( + kernel_size=9223372036854775803, + output_size=[1, 1, 1], + )(samples) + with self.assertRaisesRegex(ValueError, "kernel_size must greater than 0"): + nn.FractionalMaxPool3d( + kernel_size=-1, + output_size=[1, 1, 1], + )(samples) + + @onlyNativeDeviceTypes + def test_MaxPool3d_errors(self, device): + samples = torch.randn(1, 3, 10, 10, 10) + with self.assertRaisesRegex(RuntimeError, "integer out of range"): + nn.MaxPool3d( + kernel_size=9223372036854775803, + )(samples) + with self.assertRaisesRegex( + RuntimeError, "kernel size should be greater than zero" + ): + nn.MaxPool3d( + kernel_size=-1, + )(samples) @onlyNativeDeviceTypes def test_MaxPool_zero_batch_dim(self, device): From 76fa381eefaef30c5dd0600897afc303cadab4b9 Mon Sep 17 00:00:00 2001 From: angelayi Date: Tue, 16 Sep 2025 07:14:31 +0000 Subject: [PATCH 289/693] [mps] Take into account offset (#163021) Fixes issue when running AOTI + MPS on voxtral model Pull Request resolved: https://github.com/pytorch/pytorch/pull/163021 Approved by: https://github.com/malfet --- aten/src/ATen/native/mps/OperationUtils.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aten/src/ATen/native/mps/OperationUtils.mm b/aten/src/ATen/native/mps/OperationUtils.mm index cae9f5de31092..ef42ea6de48c6 100644 --- a/aten/src/ATen/native/mps/OperationUtils.mm +++ b/aten/src/ATen/native/mps/OperationUtils.mm @@ -568,7 +568,7 @@ static void check_mps_shape(MPSShape* shape) { MPSShape* mpsStrides = getMPSShape(_tensor.strides()); check_mps_shape(mpsShape); - auto storage_numel = src.storage().nbytes() / src.element_size(); + auto storage_numel = src.storage().nbytes() / src.element_size() - src.storage_offset(); TORCH_CHECK(storage_numel <= std::numeric_limits::max(), "MPSGaph does not support tensor dims larger than INT_MAX"); MPSNDArrayDescriptor* srcTensorDesc = [MPSNDArrayDescriptor descriptorWithDataType:dataType From 2459da4a641953d6bf4f1419287c38d6c802e39d Mon Sep 17 00:00:00 2001 From: Nicolas De Carli Date: Tue, 16 Sep 2025 07:25:00 +0000 Subject: [PATCH 290/693] [Caffe2] Add float batch box cox SVE128 implementation (#159778) Introduce SVE128 SIMD batch box-cox computation. We've seen about 65% throughput improvement. Privacy Context Container: L1196524 This is a no-op from OSS point of view, therefore it could be landed without tests (see precedence set by https://github.com/pytorch/pytorch/pull/143627), but we should delete those at some point Pull Request resolved: https://github.com/pytorch/pytorch/pull/159778 Approved by: https://github.com/malfet --- caffe2/perfkernels/batch_box_cox_sve128.cc | 243 +++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 caffe2/perfkernels/batch_box_cox_sve128.cc diff --git a/caffe2/perfkernels/batch_box_cox_sve128.cc b/caffe2/perfkernels/batch_box_cox_sve128.cc new file mode 100644 index 0000000000000..3aed8b2c87f25 --- /dev/null +++ b/caffe2/perfkernels/batch_box_cox_sve128.cc @@ -0,0 +1,243 @@ +#if defined(__aarch64__) && defined(CAFFE2_PERF_WITH_SVE128) +#include +#include +#include + +#include "c10/macros/Macros.h" + +// Log and exp approximations inspired from ACL implementation + +inline float32x4_t vtaylor_polyq_for_log_f32(float32x4_t x) { + const float32x4_t log_tab_1 = vdupq_n_f32(-2.29561495781f); + const float32x4_t log_tab_2 = vdupq_n_f32(-2.47071170807f); + const float32x4_t log_tab_3 = vdupq_n_f32(-5.68692588806f); + const float32x4_t log_tab_4 = vdupq_n_f32(-0.165253549814f); + const float32x4_t log_tab_5 = vdupq_n_f32(5.17591238022f); + const float32x4_t log_tab_6 = vdupq_n_f32(0.844007015228f); + const float32x4_t log_tab_7 = vdupq_n_f32(4.58445882797f); + const float32x4_t log_tab_8 = vdupq_n_f32(0.0141278216615f); + + float32x4_t A = vmlaq_f32(log_tab_1, log_tab_5, x); + float32x4_t B = vmlaq_f32(log_tab_3, log_tab_7, x); + float32x4_t C = vmlaq_f32(log_tab_2, log_tab_6, x); + float32x4_t x2 = vmulq_f32(x, x); + float32x4_t D = svget_neonq(svmad_f32_x( + svptrue_b8(), + svset_neonq(svundef_f32(), x), + svset_neonq(svundef_f32(), log_tab_8), + svset_neonq(svundef_f32(), log_tab_4))); + float32x4_t x4 = vmulq_f32(x2, x2); + float32x4_t res = vmlaq_f32(vmlaq_f32(A, B, x2), vmlaq_f32(C, D, x2), x4); + return res; +} + +inline float32x4_t vlogq_f32(float32x4_t x) { + const float32x4_t CONST_LN2 = vdupq_n_f32(0.6931471805f); // ln(2) + + // Extract exponent + int32x4_t m = svget_neonq(svsub_n_s32_x( + svptrue_b8(), + svset_neonq( + svundef_s32(), + vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_f32(x), 23))), + 127)); + float32x4_t val = vreinterpretq_f32_s32( + vsubq_s32(vreinterpretq_s32_f32(x), vshlq_n_s32(m, 23))); + + // Polynomial Approximation + float32x4_t poly = vtaylor_polyq_for_log_f32(val); + + // Reconstruct + poly = vmlaq_f32(poly, vcvtq_f32_s32(m), CONST_LN2); + + return poly; +} + +inline float32x4_t vexpq_f32(float32x4_t x) { + const auto c1 = vreinterpretq_f32_u32(svget_neonq(svdup_n_u32(0x3f7ffff6))); + const auto c2 = vreinterpretq_f32_u32(svget_neonq(svdup_n_u32(0x3efffedb))); + const auto c3 = vreinterpretq_f32_u32(svget_neonq(svdup_n_u32(0x3e2aaf33))); + const auto c4 = vreinterpretq_f32_u32(svget_neonq(svdup_n_u32(0x3d2b9f17))); + const auto c5 = vreinterpretq_f32_u32(svget_neonq(svdup_n_u32(0x3c072010))); + + const auto shift = vreinterpretq_f32_u32( + svget_neonq(svdup_n_u32(0x4b00007f))); // 2^23 + 127 = 0x1.0000fep23f + const auto inv_ln2 = vreinterpretq_f32_u32( + svget_neonq(svdup_n_u32(0x3fb8aa3b))); // 1 / ln(2) = 0x1.715476p+0f + const auto neg_ln2_hi = vreinterpretq_f32_u32(svget_neonq( + svdup_n_u32(0xbf317200))); // -ln(2) from bits -1 to -19: -0x1.62e400p-1f + const auto neg_ln2_lo = vreinterpretq_f32_u32(svget_neonq(svdup_n_u32( + 0xb5bfbe8e))); // -ln(2) from bits -20 to -42: -0x1.7f7d1cp-20f + + const auto inf = svdup_n_f32(std::numeric_limits::infinity()); + const auto max_input = svdup_n_f32(88.37f); // Approximately ln(2^127.5) + const auto zero = svdup_n_f32(0.f); + const auto min_input = svdup_n_f32(-86.64f); // Approximately ln(2^-125) + + // Range reduction: + // e^x = 2^n * e^r + // where: + // n = floor(x / ln(2)) + // r = x - n * ln(2) + // + // By adding x / ln(2) with 2^23 + 127 (shift): + // * As FP32 fraction part only has 23-bits, the addition of 2^23 + 127 + // forces decimal part + // of x / ln(2) out of the result. The integer part of x / ln(2) (i.e. n) + // + 127 will occupy the whole fraction part of z in FP32 format. + // Subtracting 2^23 + 127 (shift) from z will result in the integer part + // of x / ln(2) (i.e. n) because the decimal part has been pushed out and + // lost. + // * The addition of 127 makes the FP32 fraction part of z ready to be used + // as the exponent + // in FP32 format. Left shifting z by 23 bits will result in 2^n. + const auto z = vfmaq_f32(shift, x, inv_ln2); + const auto n = z - shift; + const auto scale = + vreinterpretq_f32_u32(vreinterpretq_u32_f32(z) << 23); // 2^n + + // The calculation of n * ln(2) is done using 2 steps to achieve accuracy + // beyond FP32. This outperforms longer Taylor series (3-4 tabs) both in term + // of accuracy and performance. + const auto r_hi = vfmaq_f32(x, n, neg_ln2_hi); + const auto r = vfmaq_f32(r_hi, n, neg_ln2_lo); + + // Compute the truncated Taylor series of e^r. + // poly = scale * (1 + c1 * r + c2 * r^2 + c3 * r^3 + c4 * r^4 + c5 * r^5) + const auto r2 = r * r; + + const auto p1 = c1 * r; + const auto p23 = vfmaq_f32(c2, c3, r); + const auto p45 = vfmaq_f32(c4, c5, r); + const auto p2345 = vfmaq_f32(p23, p45, r2); + const auto p12345 = vfmaq_f32(p1, p2345, r2); + + auto poly = svset_neonq(svundef_f32(), vfmaq_f32(scale, p12345, scale)); + + // Handle underflow and overflow. + poly = svsel_f32( + svcmplt_f32(svptrue_b8(), svset_neonq(svundef_f32(), x), min_input), + zero, + poly); + poly = svsel_f32( + svcmpgt_f32(svptrue_b8(), svset_neonq(svundef_f32(), x), max_input), + inf, + poly); + + return svget_neonq(poly); +} + +// ln(x) = log2(x) * ln(2) +// pow(x, n) = exp(n * ln(x)) +inline float32x4_t compute_batch_box_cox_vec_sve128_float( + svfloat32_t lambda1_v, + svfloat32_t lambda2_v, + svfloat32_t data_v, + svfloat32_t k_eps) { + // sum_v = lambda2_v + data_v + float32x4_t sum_v = vaddq_f32(svget_neonq(data_v), svget_neonq(lambda2_v)); + + // test lambda1_v: predNZ == 1 iff lambda1_v != 0 + svbool_t predNZ = svcmpne_n_f32(svptrue_b8(), lambda1_v, 0.0f); + + // clamp sum_v: sum_v = max(sum_v, k_eps) + sum_v = vmaxq_f32(sum_v, svget_neonq(k_eps)); + + // lnData = log(sum_v) + svfloat32_t lnData = svset_neonq(svundef_f32(), vlogq_f32(sum_v)); + + // if any lambda1 != 0, compute pow(sum_v, lambda1) using lnData + // pow(sum_v, lambda1) == exp(lambda1 * ln(sum_v)) + if (C10_LIKELY(svptest_any(predNZ, predNZ))) { + // mult = lambda1 * ln(sum_v) + float32x4_t mult = vmulq_f32(svget_neonq(lnData), svget_neonq(lambda1_v)); + + // lambda1_r = 1 / lambda1 + svfloat32_t lambda1_r = svdivr_f32_m(predNZ, lambda1_v, svdup_n_f32(1.0f)); + + // pow = exp(mult) + float32x4_t pow = vexpq_f32(mult); + + // merge results + // lnData if lambda1 == 0, (lambda1_r * pow - lambda1_r) if lambda1 != 0 + lnData = svsel_f32(predNZ, lambda1_r, lnData); + lnData = + svnmsb_f32_m(predNZ, lnData, svset_neonq(svundef_f32(), pow), lnData); + } + return svget_neonq(lnData); +} + +template +void compute_batch_box_cox_vec_sve128( + std::size_t N, + std::size_t D, + const T* data_ptr, + const T* __restrict lambda1_ptr, + const T* __restrict lambda2_ptr, + T* output_ptr); + +template <> +void compute_batch_box_cox_vec_sve128( + std::size_t N, + std::size_t D, + const float* data_ptr, + const float* __restrict lambda1_ptr, + const float* __restrict lambda2_ptr, + float* output_ptr) { + svfloat32_t k_eps = svdup_n_f32(static_cast(1e-6)); + + std::size_t remainder = D % 4; + std::size_t loopBound = D - remainder; + svbool_t remainderPred = svwhilelt_b32_u64(0, remainder); + + for (; C10_LIKELY(N > 0); --N) { + for (std::size_t j = 0; C10_LIKELY(j != loopBound); + j += 4, data_ptr += 4, output_ptr += 4) { + svfloat32_t lambda1_v = + svset_neonq(svundef_f32(), vld1q_f32(lambda1_ptr + j)); + svfloat32_t lambda2_v = + svset_neonq(svundef_f32(), vld1q_f32(lambda2_ptr + j)); + svfloat32_t data_v = svset_neonq(svundef_f32(), vld1q_f32(data_ptr)); + float32x4_t result = compute_batch_box_cox_vec_sve128_float( + lambda1_v, lambda2_v, data_v, k_eps); + vst1q_f32(output_ptr, result); + } + if (C10_LIKELY(remainder > 0)) { + svfloat32_t lambda1_v = svld1_f32(remainderPred, lambda1_ptr + loopBound); + svfloat32_t lambda2_v = svld1_f32(remainderPred, lambda2_ptr + loopBound); + svfloat32_t data_v = svld1_f32(remainderPred, data_ptr); + float32x4_t result = compute_batch_box_cox_vec_sve128_float( + lambda1_v, lambda2_v, data_v, k_eps); + svst1_f32(remainderPred, output_ptr, svset_neonq(svundef_f32(), result)); + data_ptr += remainder; + output_ptr += remainder; + } + } +} + +namespace caffe2::details { + +template +void compute_batch_box_cox__sve128( + std::size_t N, + std::size_t D, + const T* self_data, + const T* __restrict lambda1_data, + const T* __restrict lambda2_data, + T* output_data) { + compute_batch_box_cox_vec_sve128( + N, D, self_data, lambda1_data, lambda2_data, output_data); +} + +// Vectorized version specializations for float and double +template void compute_batch_box_cox__sve128( + std::size_t N, + std::size_t D, + const float* self_data, + const float* __restrict lambda1_data, + const float* __restrict lambda2_data, + float* output_data); + +} // namespace caffe2::details + +#endif // __aarch64__ && CAFFE2_PERF_WITH_SVE128 From f8d379d29ed093b5f19875de0e33ec007bc3ae9d Mon Sep 17 00:00:00 2001 From: Sherlock Huang Date: Tue, 16 Sep 2025 07:30:01 +0000 Subject: [PATCH 291/693] [DTensor] Introduce DebugMode (#162665) Introduce a lightweight TorchDispatchMode for understanding the magic behind DTensor. - Tracks redistribution, see `redistribute_input(input_idx, from_placement, to_placement)` - Optionally tracks torch-level functions, via `__torch_function__` - Optionally tracks FakeTensor operations, which was needed for propagating tensor meta as a step of sharding propagation - Optionally tracks real tensor operations, including functional c10d op, and regular ops - Calls are shown in the hierarchical structure! - shorthand representation - dt: DTesnor, ft: FakeTensor, t: Tensor - DM(2, 2) == DeviceMesh(shape = [2, 2]) - [R, P, S(0)] == Placement[Replicate, Partial, Shard(0)] - f32[8,8] == float32 with shape[8, 8] ``` debug_mode = DTensorDebugMode(record_faketensor=False, record_realtensor=True) with debug_mode: torch.mm(x_dtensor, y_dtensor) print(debug_mode.debug_string()) ``` produces: ``` torch.mm(dt: f32[8, 8][S(0)], dt: f32[8, 32][S(0)]) aten::mm(dt: f32[8, 8][S(0)], dt: f32[8, 32][S(0)]) redistribute_input(1, [S(0)], [R]) _c10d_functional::all_gather_into_tensor(t: f32[1, 32], 8, 0) _c10d_functional::wait_tensor(t: f32[8, 32]) aten::mm(t: f32[1, 8], t: f32[8, 32]) ``` Another example, for torch.einsum ``` torch.functional.einsum(bld,dnh->blnh, dt: f32[16, 6, 8][P, R], dt: f32[8, 4, 4][R, P]) aten::unsqueeze(dt: f32[16, 6, 8][P, R], 3) aten::unsqueeze(t: f32[16, 6, 8], 3) aten::unsqueeze(dt: f32[16, 6, 8, 1][P, R], 4) aten::unsqueeze(t: f32[16, 6, 8, 1], 4) aten::permute(dt: f32[16, 6, 8, 1, 1][P, R], [0, 1, 3, 4, 2]) aten::permute(t: f32[16, 6, 8, 1, 1], [0, 1, 3, 4, 2]) aten::unsqueeze(dt: f32[8, 4, 4][R, P], 3) aten::unsqueeze(t: f32[8, 4, 4], 3) aten::unsqueeze(dt: f32[8, 4, 4, 1][R, P], 4) aten::unsqueeze(t: f32[8, 4, 4, 1], 4) aten::permute(dt: f32[8, 4, 4, 1, 1][R, P], [3, 4, 1, 2, 0]) aten::permute(t: f32[8, 4, 4, 1, 1], [3, 4, 1, 2, 0]) aten::permute(dt: f32[16, 6, 1, 1, 8][P, R], [0, 1, 4, 2, 3]) aten::permute(t: f32[16, 6, 1, 1, 8], [0, 1, 4, 2, 3]) aten::view(dt: f32[16, 6, 8, 1, 1][P, R], [1, 96, 8]) aten::view(t: f32[16, 6, 8, 1, 1], [1, 96, 8]) aten::permute(dt: f32[1, 1, 4, 4, 8][R, P], [4, 2, 3, 0, 1]) aten::permute(t: f32[1, 1, 4, 4, 8], [4, 2, 3, 0, 1]) aten::view(dt: f32[8, 4, 4, 1, 1][R, P], [1, 8, 16]) aten::view(t: f32[8, 4, 4, 1, 1], [1, 8, 16]) aten::bmm(dt: f32[1, 96, 8][P, R], dt: f32[1, 8, 16][R, P]) redistribute_input(0, [P, R], [S(2), S(2)]) aten::chunk(t: f32[1, 96, 8], 4, 2) aten::cat(['t: f32[1, 96, 2]', 't: f32[1, 96, 2]', 't: f32[1, 96, 2]', 't: f32[1, 96, 2]']) _c10d_functional::reduce_scatter_tensor(t: f32[4, 96, 2], sum, 4, 2) aten::clone(t: f32[1, 96, 1]) redistribute_input(1, [R, P], [S(1), S(1)]) aten::chunk(t: f32[1, 8, 16], 4, 1) aten::clone(t: f32[1, 2, 16]) aten::chunk(t: f32[1, 2, 16], 2, 1) aten::cat(['t: f32[1, 1, 16]', 't: f32[1, 1, 16]']) _c10d_functional::reduce_scatter_tensor(t: f32[2, 1, 16], sum, 2, 3) _c10d_functional::wait_tensor(t: f32[1, 1, 16]) aten::bmm(t: f32[1, 96, 1], t: f32[1, 1, 16]) aten::view(dt: f32[1, 96, 16][P, P], [16, 6, 1, 4, 4]) aten::view(t: f32[1, 96, 16], [16, 6, 1, 4, 4]) aten::permute(dt: f32[16, 6, 1, 4, 4][P, P], [0, 1, 3, 4, 2]) aten::permute(t: f32[16, 6, 1, 4, 4], [0, 1, 3, 4, 2]) aten::view(dt: f32[16, 6, 4, 4, 1][P, P], [16, 6, 4, 4]) aten::view(t: f32[16, 6, 4, 4, 1], [16, 6, 4, 4]) ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/162665 Approved by: https://github.com/ezyang --- docs/source/conf.py | 2 + docs/source/utils.md | 1 + .../tensor/debug/test_debug_mode.py | 234 ++++++++++++++++++ torch/distributed/tensor/_dispatch.py | 22 +- torch/utils/debug_mode.py | 168 +++++++++++++ 5 files changed, 424 insertions(+), 3 deletions(-) create mode 100644 test/distributed/tensor/debug/test_debug_mode.py create mode 100644 torch/utils/debug_mode.py diff --git a/docs/source/conf.py b/docs/source/conf.py index d1504757f9c54..fe43dae728757 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -3180,6 +3180,8 @@ "WeakIdKeyDictionary", "WeakIdRef", "WeakTensorKeyDictionary", + # torch.utils.debug_mode + "DebugMode", ] # The suffix(es) of source filenames. diff --git a/docs/source/utils.md b/docs/source/utils.md index 6742866a8b25b..d667e12b6aaee 100644 --- a/docs/source/utils.md +++ b/docs/source/utils.md @@ -78,6 +78,7 @@ for tracking purposes --> .. py:module:: torch.utils.data.graph .. py:module:: torch.utils.data.graph_settings .. py:module:: torch.utils.data.sampler +.. py:module:: torch.utils.debug_mode .. py:module:: torch.utils.dlpack .. py:module:: torch.utils.file_baton .. py:module:: torch.utils.flop_counter diff --git a/test/distributed/tensor/debug/test_debug_mode.py b/test/distributed/tensor/debug/test_debug_mode.py new file mode 100644 index 0000000000000..b404b84b167cb --- /dev/null +++ b/test/distributed/tensor/debug/test_debug_mode.py @@ -0,0 +1,234 @@ +# Owner(s): ["oncall: distributed"] + +import contextlib + +import torch +import torch.distributed as dist +from torch._subclasses.fake_tensor import FakeTensorMode +from torch.distributed.tensor import DeviceMesh, DTensor, Partial, Replicate, Shard +from torch.testing._internal.common_utils import ( + instantiate_parametrized_tests, + parametrize, + requires_cuda, + run_tests, + TestCase, +) +from torch.testing._internal.distributed.fake_pg import FakeStore +from torch.utils._python_dispatch import TorchDispatchMode +from torch.utils.debug_mode import DebugMode + + +@requires_cuda +class TestDTensorDebugMode(TestCase): + def tearDown(self): + super().tearDown() + dist.destroy_process_group() + + def setUp(self): + super().setUp() + self.world_size = 8 + store = FakeStore() + dist.init_process_group( + backend="fake", rank=0, world_size=self.world_size, store=store + ) + self.device_type = "cuda" + + def test_debug_mode_mm(self): + mesh = DeviceMesh(self.device_type, list(range(self.world_size))) + + x = torch.randn(1, 8, requires_grad=False) + y = torch.randn(1, 32, requires_grad=True) + x_dtensor = DTensor.from_local(x, mesh, [Shard(0)], run_check=False) + y_dtensor = DTensor.from_local(y, mesh, [Shard(0)], run_check=False) + + with DebugMode() as debug_mode: + torch.mm(x_dtensor, y_dtensor).sum() + + self.assertExpectedInline( + debug_mode.debug_string(), + """\ + torch.mm(dt: f32[8, 8][S(0)], dt: f32[8, 32][S(0)]) + aten::mm(dt: f32[8, 8][S(0)], dt: f32[8, 32][S(0)]) + redistribute_input(1, [S(0)] -> [R]) + _c10d_functional::all_gather_into_tensor(t: f32[1, 32], 8, 0) + _c10d_functional::wait_tensor(t: f32[8, 32]) + aten::mm(t: f32[1, 8], t: f32[8, 32]) + (dt: f32[8, 32][S(0)]) + aten::sum(dt: f32[8, 32][S(0)]) + aten::sum(t: f32[1, 32])""", + ) + + def test_debug_mode_backward(self): + mesh = DeviceMesh(self.device_type, list(range(self.world_size))) + + x = torch.randn(1, 8, requires_grad=True) + y = torch.randn(8, 1, requires_grad=True) + x_dtensor = DTensor.from_local(x, mesh, [Shard(0)], run_check=False) + y_dtensor = DTensor.from_local(y, mesh, [Shard(1)], run_check=False) + + with DebugMode() as debug_mode: + z = x_dtensor + y_dtensor + z.sum().backward() + + self.assertExpectedInline( + debug_mode.debug_string(), + """\ + (dt: f32[8, 8][S(0)], dt: f32[8, 8][S(1)]) + aten::add.Tensor(dt: f32[8, 8][S(0)], dt: f32[8, 8][S(1)]) + redistribute_input(1, [S(1)] -> [S(0)]) + _dtensor::shard_dim_alltoall(t: f32[8, 1], 1, 0, 0) + aten::add.Tensor(t: f32[1, 8], t: f32[1, 8]) + (dt: f32[8, 8][S(0)]) + aten::sum(dt: f32[8, 8][S(0)]) + aten::sum(t: f32[1, 8]) + torch._tensor.backward(dt: f32[][P], gradient=None, retain_graph=None, create_graph=False, inputs=None) + aten::ones_like(dt: f32[][P], pin_memory=False, memory_format=torch.preserve_format) + aten::ones_like(t: f32[], pin_memory=False, memory_format=torch.preserve_format) + aten::expand(dt: f32[][R], [8, 8]) + aten::expand(t: f32[], [8, 8]) + aten::split.Tensor(t: f32[8, 8], 1, 1) + aten::clone(t: f32[8, 1]) + aten::_to_copy(t: f32[8, 1], dtype=torch.float32, layout=torch.strided, device=cpu) + aten::detach(t: f32[8, 1]) + aten::split.Tensor(t: f32[8, 8], 1) + aten::clone(t: f32[1, 8]) + aten::_to_copy(t: f32[1, 8], dtype=torch.float32, layout=torch.strided, device=cpu) + aten::detach(t: f32[1, 8])""", + ) + + def test_debug_mode_einsum(self): + mesh = DeviceMesh(self.device_type, torch.arange(self.world_size).view(4, 2)) + + # Create test tensors + a = torch.randn(16, 6, 8) + b = torch.randn(8, 4, 4) + + a_dt = DTensor.from_local(a, mesh, [Partial(), Replicate()], run_check=False) + b_dt = DTensor.from_local(b, mesh, [Replicate(), Partial()], run_check=False) + + # Capture the operator decomposition + with DebugMode() as debug_mode: + torch.einsum("bld,dnh->blnh", a_dt, b_dt) + + self.assertExpectedInline( + debug_mode.debug_string(), + """\ + torch.functional.einsum(bld,dnh->blnh, dt: f32[16, 6, 8][P, R], dt: f32[8, 4, 4][R, P]) + aten::unsqueeze(dt: f32[16, 6, 8][P, R], 3) + aten::unsqueeze(t: f32[16, 6, 8], 3) + aten::unsqueeze(dt: f32[16, 6, 8, 1][P, R], 4) + aten::unsqueeze(t: f32[16, 6, 8, 1], 4) + aten::permute(dt: f32[16, 6, 8, 1, 1][P, R], [0, 1, 3, 4, 2]) + aten::permute(t: f32[16, 6, 8, 1, 1], [0, 1, 3, 4, 2]) + aten::unsqueeze(dt: f32[8, 4, 4][R, P], 3) + aten::unsqueeze(t: f32[8, 4, 4], 3) + aten::unsqueeze(dt: f32[8, 4, 4, 1][R, P], 4) + aten::unsqueeze(t: f32[8, 4, 4, 1], 4) + aten::permute(dt: f32[8, 4, 4, 1, 1][R, P], [3, 4, 1, 2, 0]) + aten::permute(t: f32[8, 4, 4, 1, 1], [3, 4, 1, 2, 0]) + aten::permute(dt: f32[16, 6, 1, 1, 8][P, R], [0, 1, 4, 2, 3]) + aten::permute(t: f32[16, 6, 1, 1, 8], [0, 1, 4, 2, 3]) + aten::view(dt: f32[16, 6, 8, 1, 1][P, R], [1, 96, 8]) + aten::view(t: f32[16, 6, 8, 1, 1], [1, 96, 8]) + aten::permute(dt: f32[1, 1, 4, 4, 8][R, P], [4, 2, 3, 0, 1]) + aten::permute(t: f32[1, 1, 4, 4, 8], [4, 2, 3, 0, 1]) + aten::view(dt: f32[8, 4, 4, 1, 1][R, P], [1, 8, 16]) + aten::view(t: f32[8, 4, 4, 1, 1], [1, 8, 16]) + aten::bmm(dt: f32[1, 96, 8][P, R], dt: f32[1, 8, 16][R, P]) + redistribute_input(0, [P, R] -> [S(2), S(2)]) + aten::chunk(t: f32[1, 96, 8], 4, 2) + aten::cat(['t: f32[1, 96, 2]', 't: f32[1, 96, 2]', 't: f32[1, 96, 2]', 't: f32[1, 96, 2]']) + _c10d_functional::reduce_scatter_tensor(t: f32[4, 96, 2], sum, 4, 1) + aten::clone(t: f32[1, 96, 1]) + redistribute_input(1, [R, P] -> [S(1), S(1)]) + aten::chunk(t: f32[1, 8, 16], 4, 1) + aten::clone(t: f32[1, 2, 16]) + aten::chunk(t: f32[1, 2, 16], 2, 1) + aten::cat(['t: f32[1, 1, 16]', 't: f32[1, 1, 16]']) + _c10d_functional::reduce_scatter_tensor(t: f32[2, 1, 16], sum, 2, 3) + _c10d_functional::wait_tensor(t: f32[1, 1, 16]) + aten::bmm(t: f32[1, 96, 1], t: f32[1, 1, 16]) + aten::view(dt: f32[1, 96, 16][P, P], [16, 6, 1, 4, 4]) + aten::view(t: f32[1, 96, 16], [16, 6, 1, 4, 4]) + aten::permute(dt: f32[16, 6, 1, 4, 4][P, P], [0, 1, 3, 4, 2]) + aten::permute(t: f32[16, 6, 1, 4, 4], [0, 1, 3, 4, 2]) + aten::view(dt: f32[16, 6, 4, 4, 1][P, P], [16, 6, 4, 4]) + aten::view(t: f32[16, 6, 4, 4, 1], [16, 6, 4, 4])""", + ) + + def test_real_tensor(self): + x = torch.randn(8, 8, 8) + linear = torch.nn.Linear(8, 8) + + with DebugMode() as debug_mode: + linear(x).sum() + + self.assertExpectedInline( + debug_mode.debug_string(), + """\ + torch._C._nn.linear(t: f32[8, 8, 8], t: f32[8, 8], t: f32[8]) + aten::view(t: f32[8, 8, 8], [64, 8]) + aten::t(t: f32[8, 8]) + aten::addmm(t: f32[8], t: f32[64, 8], t: f32[8, 8]) + aten::view(t: f32[64, 8], [8, 8, 8]) + (t: f32[8, 8, 8]) + aten::sum(t: f32[8, 8, 8])""", + ) + + def test_fake_tensor(self): + with FakeTensorMode(): + x = torch.randn(8, 8) + y = torch.randn(8, 8, 8) + + with DebugMode(record_faketensor=True) as debug_mode: + torch.matmul(y, x) + + self.assertExpectedInline( + debug_mode.debug_string(), + """\ + torch.matmul(ft: f32[8, 8, 8], ft: f32[8, 8]) + aten::view(ft: f32[8, 8, 8], [64, 8]) + aten::mm(ft: f32[64, 8], ft: f32[8, 8]) + aten::_unsafe_view(ft: f32[64, 8], [8, 8, 8])""", + ) + + @parametrize("has_inner_mode", [True, False]) + @parametrize("has_outer_mode", [True, False]) + def test_nested_debug_mode(self, has_inner_mode, has_outer_mode): + class DummyTorchDispatchMode1(TorchDispatchMode): + def __torch_dispatch__(self, func, types, args=(), kwargs=None): + return func(*args, **kwargs) + + class DummyTorchDispatchMode2(TorchDispatchMode): + def __torch_dispatch__(self, func, types, args=(), kwargs=None): + return func(*args, **kwargs) + + mesh = DeviceMesh(self.device_type, list(range(self.world_size))) + + x = torch.randn(1, 8, requires_grad=True) + y = torch.randn(1, 32, requires_grad=True) + x_dtensor = DTensor.from_local(x, mesh, [Shard(0)], run_check=False) + y_dtensor = DTensor.from_local(y, mesh, [Shard(0)], run_check=False) + + inner_mode = ( + DummyTorchDispatchMode1() if has_inner_mode else contextlib.nullcontext() + ) + outer_mode = ( + DummyTorchDispatchMode2() if has_outer_mode else contextlib.nullcontext() + ) + + with outer_mode: + with DebugMode() as debug_mode: + with inner_mode: + torch.mm(x_dtensor, y_dtensor) + + self.assertTrue( + "redistribute_input(1, [S(0)] -> [R])" in debug_mode.debug_string() + ) + + +instantiate_parametrized_tests(TestDTensorDebugMode) + + +if __name__ == "__main__": + run_tests() diff --git a/torch/distributed/tensor/_dispatch.py b/torch/distributed/tensor/_dispatch.py index 2f0a77d4d27d3..9703c412657f8 100644 --- a/torch/distributed/tensor/_dispatch.py +++ b/torch/distributed/tensor/_dispatch.py @@ -23,7 +23,11 @@ ) from torch.distributed.tensor._utils import try_find_mesh_from_args from torch.distributed.tensor.placement_types import Partial, Placement, Replicate -from torch.utils._python_dispatch import return_and_correct_aliasing +from torch.utils._python_dispatch import ( + _get_current_dispatch_mode, + return_and_correct_aliasing, +) +from torch.utils.debug_mode import DebugMode try: @@ -334,6 +338,9 @@ def redistribute_local_args( suggested_input_schema: OpSchema, use_val_from_redistribute_schema: bool, ) -> None: + debug_mode = _get_current_dispatch_mode() + in_debug_mode = isinstance(debug_mode, DebugMode) + # NOTE: it's very rare that we need to reshard kwargs so we intentionally skip it if op_info.args_tree_spec is not None: flatten_args_schema_to_reshard = tuple( @@ -348,9 +355,18 @@ def redistribute_local_args( if isinstance(arg_spec, DTensorSpec): local_tensor = cast(torch.Tensor, op_info.local_args[i]) if arg_spec != reshard_arg_spec: - resharded_local_tensor = redistribute_local_tensor( - local_tensor, arg_spec, reshard_arg_spec + redistribute_context = ( + debug_mode.record_redistribute_calls( + i, arg_spec, reshard_arg_spec + ) + if in_debug_mode + else contextlib.nullcontext() ) + + with redistribute_context: + resharded_local_tensor = redistribute_local_tensor( + local_tensor, arg_spec, reshard_arg_spec + ) new_local_args.append(resharded_local_tensor) else: new_local_args.append(local_tensor) diff --git a/torch/utils/debug_mode.py b/torch/utils/debug_mode.py new file mode 100644 index 0000000000000..31cff049ec05b --- /dev/null +++ b/torch/utils/debug_mode.py @@ -0,0 +1,168 @@ +# mypy: allow-untyped-defs +import contextlib + +import torch +import torch.distributed.tensor as dt +from torch._subclasses.fake_tensor import FakeTensor, FakeTensorMode +from torch.distributed.tensor._dtensor_spec import DTensorSpec +from torch.utils._dtype_abbrs import dtype_abbrs +from torch.utils._python_dispatch import _get_current_dispatch_mode, TorchDispatchMode +from torch.utils._pytree import tree_map + + +__all__ = ["DebugMode"] + +REDISTRIBUTE_FUNC = "redistribute_input" + + +def _stringify_shape(shape) -> str: + return f"[{', '.join([str(x) for x in shape])}]" + + +def _stringify_device_mesh(mesh) -> str: + return f"DM({', '.join([str(s) for s in mesh.shape])})" + + +def _stringify_placement(placement) -> str: + return f"[{', '.join([str(p) for p in placement])}]" + + +def _tensor_debug_string(tensor) -> str: + """Convert tensor to debug string representation.""" + if isinstance(tensor, dt.DTensor): + # omitted device mesh + return f"dt: {dtype_abbrs[tensor.dtype]}{_stringify_shape(tensor.shape)}{_stringify_placement(tensor.placements)}" + elif isinstance(tensor, FakeTensor): + return f"ft: {dtype_abbrs[tensor.dtype]}{_stringify_shape(tensor.shape)}" + elif isinstance(tensor, torch.Tensor): + return f"t: {dtype_abbrs[tensor.dtype]}{_stringify_shape(tensor.shape)}" + else: + raise RuntimeError(f"Unsupported tensor type: {type(tensor)}") + + +def _arg_to_str(arg) -> str: + def to_str(x): + if isinstance(x, torch.Tensor): + return _tensor_debug_string(x) + elif isinstance(x, DTensorSpec): + return _stringify_placement(x.placements) + return x + + arg = tree_map(to_str, arg) + return str(arg) + + +def _op_to_str(op, *args, **kwargs) -> str: + if op == REDISTRIBUTE_FUNC: + assert len(args) == 3 + _args = [_arg_to_str(arg) for arg in args] + args_str = f"{_args[0]}, {_args[1]} -> {_args[2]}" + else: + args_str = ", ".join(_arg_to_str(arg) for arg in args) + + if kwargs: + kwargs_str = ", " + ", ".join( + f"{k}={_arg_to_str(v)}" for k, v in kwargs.items() + ) + else: + kwargs_str = "" + + if isinstance(op, torch._ops.OpOverload): + op_name = op.__qualname__ + elif hasattr(op, "__module__") and hasattr(op, "__name__"): + op_name = f"{op.__module__}.{op.__name__}" + else: + op_name = str(op) + + return f"{op_name}({args_str}{kwargs_str})" + + +class DebugMode(TorchDispatchMode): + def __init__( + self, + *, + record_torchfunction=True, + record_faketensor=False, + record_realtensor=True, + ): + super().__init__() + + self.record_torchfunction = record_torchfunction + self.record_faketensor = record_faketensor + self.record_realtensor = record_realtensor + + self.operators = [] + self.call_depth = 0 + + def __torch_function__(self, func, types, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + + self.operators.append((func, args, kwargs, self.call_depth)) + + try: + self.call_depth += 1 + return func(*args, **kwargs) + finally: + self.call_depth -= 1 + + def __torch_dispatch__(self, func, types, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + + # Record the operation with its call depth + if dt.DTensor in types: + self.operators.append((func, args, kwargs, self.call_depth)) + return NotImplemented + elif FakeTensor in types or isinstance( + _get_current_dispatch_mode(), FakeTensorMode + ): + if self.record_faketensor: + if func not in {torch.ops.prim.device.default}: + self.operators.append((func, args, kwargs, self.call_depth + 1)) + elif len(types) == 0: + if self.record_realtensor: + self.operators.append((func, args, kwargs, self.call_depth + 1)) + + result = func(*args, **kwargs) + + return result + + def __enter__(self): + self.operators = [] + self.call_depth = 0 + + if self.record_torchfunction: + torch._C._push_on_torch_function_stack(self) + + super().__enter__() + return self + + def __exit__(self, *args): + super().__exit__(*args) + if self.record_torchfunction: + torch._C._pop_torch_function_stack() + + @contextlib.contextmanager + def record_redistribute_calls(self, arg_idx, src_placement, dst_placement): + try: + self.operators.append( + ( + REDISTRIBUTE_FUNC, + [arg_idx, src_placement, dst_placement], + {}, + self.call_depth + 1, + ) + ) + self.call_depth += 1 + yield + finally: + self.call_depth -= 1 + + def debug_string(self) -> str: + result = "" + result += "\n".join( + " " + " " * depth + _op_to_str(op, *args, **kwargs) + for op, args, kwargs, depth in self.operators + ) + return result From fb1e0321da301d30ce5700865266842c08aadc7f Mon Sep 17 00:00:00 2001 From: Eddie Yan Date: Tue, 16 Sep 2025 07:53:16 +0000 Subject: [PATCH 292/693] [CUDA] fix shared memory race in `reduce_kernel` (#162995) Reported by compute-sanitizer, otherwise it looks like `block_y_reduce` and `block_x_reduce` both use `shared_memory` for temporaries without synchronization between them reproduces in e.g., `compute-sanitizer --tool=racecheck python test/test_matmul_cuda.py -k test_scaled_mm_vs_emulated_block_wise_float32_lhs_block_128_rhs_block_1_cuda` (note that this test requires H100 to run unless only the non-emulated (cuBLAS impl.) is commented out) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162995 Approved by: https://github.com/msaroufim --- aten/src/ATen/native/cuda/Reduce.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/aten/src/ATen/native/cuda/Reduce.cuh b/aten/src/ATen/native/cuda/Reduce.cuh index 9914ba3a01564..4e1ddb57fc0f4 100644 --- a/aten/src/ATen/native/cuda/Reduce.cuh +++ b/aten/src/ATen/native/cuda/Reduce.cuh @@ -416,6 +416,7 @@ struct ReduceOp { if (config.should_block_y_reduce()) { value = block_y_reduce(value, shared_memory); } + __syncthreads(); if (config.should_block_x_reduce()) { value = block_x_reduce(value, shared_memory); } From de143bf79beb87856afe498b8f8e686f83ad1921 Mon Sep 17 00:00:00 2001 From: FFFrog Date: Tue, 16 Sep 2025 10:59:20 +0800 Subject: [PATCH 293/693] [C10d] Code clean for torch.distributed.init_process_group (#163038) As the title stated. Pull Request resolved: https://github.com/pytorch/pytorch/pull/163038 Approved by: https://github.com/msaroufim --- torch/distributed/distributed_c10d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index c81d9c60eb1fe..2e1e70627284a 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -1735,7 +1735,6 @@ def init_process_group( timeout=timeout, group_desc="default_pg", ) - _update_default_pg(default_pg) else: # backward compatible API if store is None: @@ -1766,7 +1765,8 @@ def init_process_group( device_id=device_id, group_desc="default_pg", ) - _update_default_pg(default_pg) + + _update_default_pg(default_pg) _world.pg_group_ranks[GroupMember.WORLD] = { # type: ignore[index] i: i From 9aca0ba027794f85eba51e981a522e1fb350f126 Mon Sep 17 00:00:00 2001 From: Blaine Burton Rister <145300525+blaine-rister@users.noreply.github.com> Date: Tue, 16 Sep 2025 08:52:43 +0000 Subject: [PATCH 294/693] [Inductor-FX] Support IndexPutFallback (#162863) # Feature This PR supports lowering `IndexPutFallback` through Inductor's FX converter. The approach is very similar to the one taken in https://github.com/pytorch/pytorch/pull/162686. Compared to `ScatterFallback`, this required one additional change: the value of `self.op_overload` for `IndexPutFallback` was inaccurate. Previously, it used `aten.index_put`, which would result in unsound FX IR. The existing Python/C++ codegen use `aten.index_put_`, since the fallback mutates its input. This PR changes `self.op_overload` to match that. # Test plan Added a CI test lowering deterministic index put via the FX converter. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162863 Approved by: https://github.com/angelayi --- test/inductor/test_fxir_backend.py | 18 ++++++++ torch/_inductor/codegen/cpp_wrapper_cpu.py | 2 +- .../codegen/cpp_wrapper_cpu_array_ref.py | 4 +- torch/_inductor/codegen/wrapper.py | 40 ++++++++++++++++- torch/_inductor/codegen/wrapper_fxir.py | 45 ++++++++++++++++--- torch/_inductor/ir.py | 14 +----- torch/_inductor/lowering.py | 4 +- 7 files changed, 102 insertions(+), 25 deletions(-) diff --git a/test/inductor/test_fxir_backend.py b/test/inductor/test_fxir_backend.py index d2f274f2e412c..e7e857fc0dc3a 100644 --- a/test/inductor/test_fxir_backend.py +++ b/test/inductor/test_fxir_backend.py @@ -591,6 +591,24 @@ def foo(input_): num_fallback = self._count_ops(gm, torch.ops.aten.scatter_.value) self.assertEqual(num_fallback, 1) + def test_index_put_fallback(self): + """ + Test the deterministic fallback for index_put. + """ + length = 8 + out, values = [torch.randn(length, device=self.device) for _ in range(2)] + indices = (torch.randint(length, (length,), device=self.device),) + accumulate = True + with DeterministicGuard(True): + (gm,) = self._compile_and_check( + torch.index_put, + (out, indices, values, accumulate), + expected_num_triton_kernels=1, + ) + + # Check for the fallback op. + self.assertEqual(self._count_ops(gm, torch.ops.aten.index_put_.default), 1) + def test_scatter_reduce_fallback(self): """ Test the customized wrapper codegen for ScatterFallback ops. diff --git a/torch/_inductor/codegen/cpp_wrapper_cpu.py b/torch/_inductor/codegen/cpp_wrapper_cpu.py index df162b806b73a..759eb3da462c6 100644 --- a/torch/_inductor/codegen/cpp_wrapper_cpu.py +++ b/torch/_inductor/codegen/cpp_wrapper_cpu.py @@ -1439,7 +1439,7 @@ def _generate_scatter_fallback( line += ");" self.writeline(line) - def generate_index_put_fallback(self, kernel, x, indices, values, accumulate): + def _generate_index_put_fallback(self, kernel, x, indices, values, accumulate): # TODO: update aoti_torch_index_put_out in ir.py to use autogen out version # See the comment in codegen_reinterpret_view about why having something like # RAIIAtenTensorHandle(tmp_tensor_handle_2) in a tmp array can cause the corresponding diff --git a/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py b/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py index 086a9bc37a6d5..9749d09a1af20 100644 --- a/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py +++ b/torch/_inductor/codegen/cpp_wrapper_cpu_array_ref.py @@ -735,10 +735,12 @@ def _generate_scatter_fallback( line += ");" self.writeline(line) - def generate_index_put_fallback(self, kernel, x, indices, values, accumulate): + def generate_index_put_fallback(self, node: ir.IndexPutFallback) -> None: # No stack allocation when there is a fallback op self.allow_stack_allocation = False + super().generate_index_put_fallback(node) + def _generate_index_put_fallback(self, kernel, x, indices, values, accumulate): self._assert_safe_to_use_borrow_arrayref_tensor_as_tensor() # TODO: update aoti_torch_index_put_out in ir.py to use autogen out version # See the comment in codegen_reinterpret_view about why having something like diff --git a/torch/_inductor/codegen/wrapper.py b/torch/_inductor/codegen/wrapper.py index 6154df7dccf31..0bd059f19565d 100644 --- a/torch/_inductor/codegen/wrapper.py +++ b/torch/_inductor/codegen/wrapper.py @@ -908,6 +908,29 @@ def codegen_fx(self, converter: FxConverter) -> FxConversionFunc: return converter._generate_multi_output +@dataclasses.dataclass +class IndexPutFallbackLine(WrapperLine): + wrapper: PythonWrapperCodegen + node: ir.IndexPutFallback + indices: list[Optional[ir.IRNode]] + + def codegen(self, code: IndentedBuffer) -> None: + node = self.node + assert ir.is_node_sequence(node.inputs) + (x, values) = (t.codegen_reference() for t in node.inputs[:2]) + indices = [ + idx.codegen_reference() if idx else self.wrapper.none_str + for idx in self.indices + ] + + self.wrapper._generate_index_put_fallback( + node.get_kernel_name(), x, indices, values, *node.codegen_const_args() + ) + + def codegen_fx(self, converter: FxConverter) -> FxConversionFunc: + return converter._generate_index_put_fallback + + @dataclasses.dataclass class ScatterFallbackLine(WrapperLine): wrapper: PythonWrapperCodegen @@ -1560,7 +1583,22 @@ def _generate_scatter_fallback( line += ")" self.writeline(line) - def generate_index_put_fallback(self, kernel, x, indices, values, accumulate): + def generate_index_put_fallback(self, node: ir.IndexPutFallback) -> None: + # Collect index tensors into a list. + indices: list[Optional[ir.IRNode]] = [] + valid_indices = node.inputs[2:] + iter_valid_indices = iter(valid_indices) + for i, _ in enumerate(node.indices): + if node.indices[i] is not None: + index = next(iter_valid_indices) + assert isinstance(index, ir.IRNode) + indices.append(index) + else: + indices.append(None) + + self.writeline(IndexPutFallbackLine(self, node, indices)) + + def _generate_index_put_fallback(self, kernel, x, indices, values, accumulate): indices_str = f"[{', '.join(indices)}]" args = [x, indices_str, values, accumulate] self.writeline(self.wrap_kernel_call(kernel, args)) diff --git a/torch/_inductor/codegen/wrapper_fxir.py b/torch/_inductor/codegen/wrapper_fxir.py index 133e307096453..9bc7a98fadb63 100644 --- a/torch/_inductor/codegen/wrapper_fxir.py +++ b/torch/_inductor/codegen/wrapper_fxir.py @@ -55,6 +55,7 @@ ExternKernelOutLine, FreeIfNotReusedLine, FreeLine, + IndexPutFallbackLine, KernelCallLine, KernelDefinitionLine, Line, @@ -654,6 +655,42 @@ def _generate_multi_output(self, line: WrapperLine) -> None: node.name = line.result_name self.buffer_to_node[line.result_name] = node + def _generate_fallback_call( + self, + ir_node: ir.ExternKernel, + args: Optional[tuple[Any, ...]] = None, + kwargs: Optional[dict[str, Any]] = None, + ) -> None: + fx_node = self.gm.graph.call_function( + ir_node.op_overload, # type: ignore[arg-type] + args=args, + kwargs=kwargs, + ) + result_buffer = ir_node.codegen_reference() + self.buffer_to_node[result_buffer] = fx_node + + def _generate_index_put_fallback(self, line: WrapperLine) -> None: + assert isinstance(line, IndexPutFallbackLine) + ir_node = line.node + + def generate_buffer_or_none( + x: Union[ir.IRNode, Sequence[ir.IRNode], None], + ) -> Optional[torch.fx.Node]: + """ + Handles None before calling _generate_buffer. + """ + if x is None: + return None + + assert isinstance(x, ir.IRNode) + return self._generate_buffer(x) + + (x, values) = [generate_buffer_or_none(t) for t in ir_node.inputs[:2]] + indices = tuple(generate_buffer_or_none(t) for t in line.indices) + accumulate = ir_node.constant_args[0] + args = (x, indices, values, accumulate) + self._generate_fallback_call(ir_node, args) + def _generate_scatter_fallback(self, line: WrapperLine) -> None: assert isinstance(line, ScatterFallbackLine) ir_node = line.node @@ -666,13 +703,7 @@ def _generate_scatter_fallback(self, line: WrapperLine) -> None: if reduce := ir_node.kwargs.get("reduce"): kwargs["reduce"] = reduce - fx_node = self.gm.graph.call_function( - ir_node.op_overload, # type: ignore[arg-type] - args=args, - kwargs=kwargs, - ) - result_buffer = ir_node.codegen_reference() - self.buffer_to_node[result_buffer] = fx_node + self._generate_fallback_call(ir_node, args, kwargs) def _generate_null(self, line: WrapperLine) -> None: assert isinstance(line, NullLine) diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index 9aece7015b970..6cab868b916b3 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -7115,19 +7115,7 @@ class IndexPutFallback(ExternKernel): """ def codegen(self, wrapper: PythonWrapperCodegen) -> None: - assert is_node_sequence(self.inputs) - (x, values, *valid_indices) = (t.codegen_reference() for t in self.inputs) - indices = [] - iter_valid_indices = iter(valid_indices) - for i, _ in enumerate(self.indices): - if self.indices[i] is not None: - indices.append(next(iter_valid_indices)) - else: - indices.append(V.graph.wrapper_code.none_str) - - wrapper.generate_index_put_fallback( - self.get_kernel_name(), x, indices, values, *self.codegen_const_args() - ) + wrapper.generate_index_put_fallback(self) def should_allocate(self) -> bool: return False diff --git a/torch/_inductor/lowering.py b/torch/_inductor/lowering.py index d05bdd1354694..eec60b31f2eba 100644 --- a/torch/_inductor/lowering.py +++ b/torch/_inductor/lowering.py @@ -3717,8 +3717,8 @@ def index_put_as_masked_fill(self, indices, value, accumulate): def index_put_fallback(self, indices, values, accumulate): - assert isinstance(V.graph.current_node.target, torch._ops.OpOverload) - ir.IndexPutFallback(V.graph.current_node.target, self, indices, values, accumulate) + op_overload = getattr(aten.index_put_, V.graph.current_node.target._overloadname) # type: ignore[union-attr] + ir.IndexPutFallback(op_overload, self, indices, values, accumulate) return self From e3783a9575b810f9a3f51334270668357463958e Mon Sep 17 00:00:00 2001 From: zeshengzong Date: Tue, 16 Sep 2025 09:00:02 +0000 Subject: [PATCH 295/693] Replace `std::runtime_error` with `TORCH_CHECK` (#159344) Fixes part of #148114 Pull Request resolved: https://github.com/pytorch/pytorch/pull/159344 Approved by: https://github.com/cyyever, https://github.com/albanD --- torch/csrc/DataLoader.cpp | 29 +++++---- torch/csrc/Device.cpp | 5 +- .../utils/generated_serialization_types.h | 65 ++++++++++--------- torch/csrc/utils/python_arg_parser.cpp | 51 ++++++--------- torch/csrc/utils/python_numbers.h | 14 ++-- torch/csrc/utils/python_scalars.h | 4 +- torch/csrc/utils/python_strings.h | 12 ++-- torch/csrc/utils/tensor_new.cpp | 14 ++-- torch/csrc/utils/tensor_numpy.cpp | 22 +++---- 9 files changed, 99 insertions(+), 117 deletions(-) diff --git a/torch/csrc/DataLoader.cpp b/torch/csrc/DataLoader.cpp index 216360b63d65f..a6ad3f00b2782 100644 --- a/torch/csrc/DataLoader.cpp +++ b/torch/csrc/DataLoader.cpp @@ -62,7 +62,7 @@ static void setSignalHandler( std::ostringstream oss; oss << "An error occurred while setting handler for " << strsignal(signal) << "."; - throw std::runtime_error(oss.str()); + TORCH_CHECK(false, oss.str()); } } @@ -141,29 +141,32 @@ static PyObject* THPModule_errorIfAnyWorkerFails( continue; if (infop.si_code == CLD_EXITED && infop.si_status != EXIT_SUCCESS) { // exit with error - std::ostringstream oss; - oss << "DataLoader worker (pid " << worker_pid << ") exited " - << "unexpectedly with exit code " << infop.si_status << ". " - << "Details are lost due to multiprocessing. Rerunning with " - << "num_workers=0 may give better error trace."; + auto error_msg = fmt::format( + "DataLoader worker (pid {}) exited unexpectedly with exit code {}. " + "Details are lost due to multiprocessing. Rerunning with " + "num_workers=0 may give better error trace.", + worker_pid, + infop.si_status); // This is necessary. Otherwise, the runtime error will kill the other // workers, and trigger this again. pid_set.clear(); - throw std::runtime_error(oss.str()); + TORCH_CHECK(false, error_msg); } else if ( infop.si_code == CLD_KILLED || infop.si_code == CLD_DUMPED) { // killed by signal - std::ostringstream oss; - oss << "DataLoader worker (pid " << worker_pid << ") is killed " - << "by signal: " << strsignal(infop.si_status) << ". "; + auto error_msg = fmt::format( + "DataLoader worker (pid {}) is killed by signal: {}. ", + worker_pid, + strsignal(infop.si_status)); if (infop.si_status == SIGBUS) { - oss << "It is possible that dataloader's workers are out of shared memory. " - << "Please try to raise your shared memory limit."; + error_msg += + "It is possible that dataloader's workers are out of shared memory. " + "Please try to raise your shared memory limit."; } // This is necessary. Otherwise, the runtime error will kill the other // workers, and trigger this again. pid_set.clear(); - throw std::runtime_error(oss.str()); + TORCH_CHECK(false, error_msg); } } } diff --git a/torch/csrc/Device.cpp b/torch/csrc/Device.cpp index 53aca5ae8e31b..a86bf6cfa3131 100644 --- a/torch/csrc/Device.cpp +++ b/torch/csrc/Device.cpp @@ -67,10 +67,11 @@ static PyObject* THPDevice_pynew( auto as_device = r.device(0); // this works, because device can take strings if (as_device.has_index()) { auto device_type = r.string(0); - throw std::runtime_error( + TORCH_CHECK( + false, "type (string) must not include an index because index " "was passed explicitly: " + - device_type); + device_type); } int64_t device_index = -1; if (!r.isNone(1)) { diff --git a/torch/csrc/utils/generated_serialization_types.h b/torch/csrc/utils/generated_serialization_types.h index bec4e283dcac8..3090d58f5c094 100644 --- a/torch/csrc/utils/generated_serialization_types.h +++ b/torch/csrc/utils/generated_serialization_types.h @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -190,7 +191,7 @@ inline std::string_view printEnum(const ArgumentKind& e) { case ArgumentKind::POSITIONAL: return "POSITIONAL"; case ArgumentKind::KEYWORD: return "KEYWORD"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } @@ -198,7 +199,7 @@ inline void parseEnum(std::string_view s, ArgumentKind& t) { if (s == "UNKNOWN") { t = ArgumentKind::UNKNOWN; return; } if (s == "POSITIONAL") { t = ArgumentKind::POSITIONAL; return; } if (s == "KEYWORD") { t = ArgumentKind::KEYWORD; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } enum class Layout { @@ -223,7 +224,7 @@ inline std::string_view printEnum(const Layout& e) { case Layout::_mkldnn: return "_mkldnn"; case Layout::Strided: return "Strided"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } @@ -236,7 +237,7 @@ inline void parseEnum(std::string_view s, Layout& t) { if (s == "SparseBsc") { t = Layout::SparseBsc; return; } if (s == "_mkldnn") { t = Layout::_mkldnn; return; } if (s == "Strided") { t = Layout::Strided; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } enum class MemoryFormat { @@ -255,7 +256,7 @@ inline std::string_view printEnum(const MemoryFormat& e) { case MemoryFormat::ChannelsLast3d: return "ChannelsLast3d"; case MemoryFormat::PreserveFormat: return "PreserveFormat"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } @@ -265,7 +266,7 @@ inline void parseEnum(std::string_view s, MemoryFormat& t) { if (s == "ChannelsLast") { t = MemoryFormat::ChannelsLast; return; } if (s == "ChannelsLast3d") { t = MemoryFormat::ChannelsLast3d; return; } if (s == "PreserveFormat") { t = MemoryFormat::PreserveFormat; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } enum class ScalarType { @@ -312,7 +313,7 @@ inline std::string_view printEnum(const ScalarType& e) { case ScalarType::FLOAT8E4M3FNUZ: return "FLOAT8E4M3FNUZ"; case ScalarType::FLOAT8E5M2FNUZ: return "FLOAT8E5M2FNUZ"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } @@ -336,7 +337,7 @@ inline void parseEnum(std::string_view s, ScalarType& t) { if (s == "FLOAT8E5M2") { t = ScalarType::FLOAT8E5M2; return; } if (s == "FLOAT8E4M3FNUZ") { t = ScalarType::FLOAT8E4M3FNUZ; return; } if (s == "FLOAT8E5M2FNUZ") { t = ScalarType::FLOAT8E5M2FNUZ; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -453,7 +454,7 @@ inline std::string_view printEnum(const SymExprHint::Tag& e) { case SymExprHint::Tag::AS_BOOL: return "AS_BOOL"; case SymExprHint::Tag::AS_FLOAT: return "AS_FLOAT"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } @@ -461,7 +462,7 @@ inline void parseEnum(std::string_view s, SymExprHint::Tag& t) { if (s == "AS_INT") { t = SymExprHint::Tag::AS_INT; return; } if (s == "AS_BOOL") { t = SymExprHint::Tag::AS_BOOL; return; } if (s == "AS_FLOAT") { t = SymExprHint::Tag::AS_FLOAT; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -559,14 +560,14 @@ inline std::string_view printEnum(const SymInt::Tag& e) { case SymInt::Tag::AS_EXPR: return "AS_EXPR"; case SymInt::Tag::AS_INT: return "AS_INT"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } inline void parseEnum(std::string_view s, SymInt::Tag& t) { if (s == "AS_EXPR") { t = SymInt::Tag::AS_EXPR; return; } if (s == "AS_INT") { t = SymInt::Tag::AS_INT; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -637,14 +638,14 @@ inline std::string_view printEnum(const SymFloat::Tag& e) { case SymFloat::Tag::AS_EXPR: return "AS_EXPR"; case SymFloat::Tag::AS_FLOAT: return "AS_FLOAT"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } inline void parseEnum(std::string_view s, SymFloat::Tag& t) { if (s == "AS_EXPR") { t = SymFloat::Tag::AS_EXPR; return; } if (s == "AS_FLOAT") { t = SymFloat::Tag::AS_FLOAT; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -715,14 +716,14 @@ inline std::string_view printEnum(const SymBool::Tag& e) { case SymBool::Tag::AS_EXPR: return "AS_EXPR"; case SymBool::Tag::AS_BOOL: return "AS_BOOL"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } inline void parseEnum(std::string_view s, SymBool::Tag& t) { if (s == "AS_EXPR") { t = SymBool::Tag::AS_EXPR; return; } if (s == "AS_BOOL") { t = SymBool::Tag::AS_BOOL; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -865,14 +866,14 @@ inline std::string_view printEnum(const SymIntArgument::Tag& e) { case SymIntArgument::Tag::AS_NAME: return "AS_NAME"; case SymIntArgument::Tag::AS_INT: return "AS_INT"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } inline void parseEnum(std::string_view s, SymIntArgument::Tag& t) { if (s == "AS_NAME") { t = SymIntArgument::Tag::AS_NAME; return; } if (s == "AS_INT") { t = SymIntArgument::Tag::AS_INT; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -943,14 +944,14 @@ inline std::string_view printEnum(const SymFloatArgument::Tag& e) { case SymFloatArgument::Tag::AS_NAME: return "AS_NAME"; case SymFloatArgument::Tag::AS_FLOAT: return "AS_FLOAT"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } inline void parseEnum(std::string_view s, SymFloatArgument::Tag& t) { if (s == "AS_NAME") { t = SymFloatArgument::Tag::AS_NAME; return; } if (s == "AS_FLOAT") { t = SymFloatArgument::Tag::AS_FLOAT; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -1021,14 +1022,14 @@ inline std::string_view printEnum(const SymBoolArgument::Tag& e) { case SymBoolArgument::Tag::AS_NAME: return "AS_NAME"; case SymBoolArgument::Tag::AS_BOOL: return "AS_BOOL"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } inline void parseEnum(std::string_view s, SymBoolArgument::Tag& t) { if (s == "AS_NAME") { t = SymBoolArgument::Tag::AS_NAME; return; } if (s == "AS_BOOL") { t = SymBoolArgument::Tag::AS_BOOL; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -1135,14 +1136,14 @@ inline std::string_view printEnum(const OptionalTensorArgument::Tag& e) { case OptionalTensorArgument::Tag::AS_TENSOR: return "AS_TENSOR"; case OptionalTensorArgument::Tag::AS_NONE: return "AS_NONE"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } inline void parseEnum(std::string_view s, OptionalTensorArgument::Tag& t) { if (s == "AS_TENSOR") { t = OptionalTensorArgument::Tag::AS_TENSOR; return; } if (s == "AS_NONE") { t = OptionalTensorArgument::Tag::AS_NONE; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -1769,7 +1770,7 @@ inline std::string_view printEnum(const Argument::Tag& e) { case Argument::Tag::AS_OPTIONAL_TENSOR: return "AS_OPTIONAL_TENSOR"; case Argument::Tag::AS_COMPLEX: return "AS_COMPLEX"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } @@ -1801,7 +1802,7 @@ inline void parseEnum(std::string_view s, Argument::Tag& t) { if (s == "AS_SYM_FLOATS") { t = Argument::Tag::AS_SYM_FLOATS; return; } if (s == "AS_OPTIONAL_TENSOR") { t = Argument::Tag::AS_OPTIONAL_TENSOR; return; } if (s == "AS_COMPLEX") { t = Argument::Tag::AS_COMPLEX; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -2127,7 +2128,7 @@ inline std::string_view printEnum(const ConstantValue::Tag& e) { case ConstantValue::Tag::AS_STRING: return "AS_STRING"; case ConstantValue::Tag::AS_BOOL: return "AS_BOOL"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } @@ -2137,7 +2138,7 @@ inline void parseEnum(std::string_view s, ConstantValue::Tag& t) { if (s == "AS_FLOAT") { t = ConstantValue::Tag::AS_FLOAT; return; } if (s == "AS_STRING") { t = ConstantValue::Tag::AS_STRING; return; } if (s == "AS_BOOL") { t = ConstantValue::Tag::AS_BOOL; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -2465,7 +2466,7 @@ inline std::string_view printEnum(const InputSpec::Tag& e) { case InputSpec::Tag::TOKEN: return "TOKEN"; case InputSpec::Tag::CONSTANT_INPUT: return "CONSTANT_INPUT"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } @@ -2477,7 +2478,7 @@ inline void parseEnum(std::string_view s, InputSpec::Tag& t) { if (s == "CUSTOM_OBJ") { t = InputSpec::Tag::CUSTOM_OBJ; return; } if (s == "TOKEN") { t = InputSpec::Tag::TOKEN; return; } if (s == "CONSTANT_INPUT") { t = InputSpec::Tag::CONSTANT_INPUT; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } @@ -2851,7 +2852,7 @@ inline std::string_view printEnum(const OutputSpec::Tag& e) { case OutputSpec::Tag::TOKEN: return "TOKEN"; case OutputSpec::Tag::PARAMETER_MUTATION: return "PARAMETER_MUTATION"; default: - throw std::runtime_error("Unknown enum value"); + TORCH_CHECK(false, "Unknown enum value"); } } @@ -2864,7 +2865,7 @@ inline void parseEnum(std::string_view s, OutputSpec::Tag& t) { if (s == "USER_INPUT_MUTATION") { t = OutputSpec::Tag::USER_INPUT_MUTATION; return; } if (s == "TOKEN") { t = OutputSpec::Tag::TOKEN; return; } if (s == "PARAMETER_MUTATION") { t = OutputSpec::Tag::PARAMETER_MUTATION; return; } - throw std::runtime_error("Unknown enum value: " + std::string{s}); + TORCH_CHECK(false, "Unknown enum value: " + std::string{s}); } diff --git a/torch/csrc/utils/python_arg_parser.cpp b/torch/csrc/utils/python_arg_parser.cpp index 613657e03b926..53cee2632b11f 100644 --- a/torch/csrc/utils/python_arg_parser.cpp +++ b/torch/csrc/utils/python_arg_parser.cpp @@ -131,9 +131,8 @@ FunctionParameter::FunctionParameter(const std::string& fmt, bool keyword_only) size(0), default_scalar(0) { auto space = fmt.find(' '); - if (space == std::string::npos) { - throw std::runtime_error("FunctionParameter(): missing type: " + fmt); - } + TORCH_CHECK( + space != std::string::npos, "FunctionParameter(): missing type: " + fmt); auto type_str = fmt.substr(0, space); @@ -154,10 +153,9 @@ FunctionParameter::FunctionParameter(const std::string& fmt, bool keyword_only) auto name_str = fmt.substr(space + 1); auto it = type_map.find(type_str); - if (it == type_map.end()) { - throw std::runtime_error( - "FunctionParameter(): invalid type string: " + type_str); - } + TORCH_CHECK( + it != type_map.end(), + "FunctionParameter(): invalid type string: " + type_str); type_ = it->second; auto eq = name_str.find('='); @@ -1145,7 +1143,7 @@ auto FunctionParameter::_check( case ParameterType::DISPATCH_KEY_SET: return py::isinstance(py::handle(obj)); default: - throw std::runtime_error("unknown parameter type"); + TORCH_CHECK(false, "unknown parameter type"); } } @@ -1202,7 +1200,7 @@ std::string FunctionParameter::type_name() const { case ParameterType::DISPATCH_KEY_SET: return "DispatchKeySet"; default: - throw std::runtime_error("unknown parameter type"); + TORCH_CHECK(false, "unknown parameter type"); } } @@ -1324,10 +1322,8 @@ void FunctionParameter::set_default_str(const std::string& str) { } if (type_ == ParameterType::TENSOR || type_ == ParameterType::DISPATCH_KEY_SET) { - if (str != "None") { - throw std::runtime_error( - "default value for Tensor must be none, got: " + str); - } + TORCH_CHECK( + str == "None", "default value for Tensor must be none, got: " + str); } else if (type_ == ParameterType::INT64 || type_ == ParameterType::SYM_INT) { default_int = atol(str.c_str()); } else if (type_ == ParameterType::BOOL) { @@ -1351,16 +1347,14 @@ void FunctionParameter::set_default_str(const std::string& str) { default_intlist = parse_intlist_args(str, size); } } else if (type_ == ParameterType::FLOAT_LIST) { - if (str != "None") { - throw std::runtime_error("Defaults not supported for float[]"); - } + TORCH_CHECK(str == "None", "Defaults not supported for float[]"); } else if (type_ == ParameterType::SCALARTYPE) { if (str == "None") { default_scalartype = at::ScalarType::Undefined; } else if (str == "torch.int64") { default_scalartype = at::ScalarType::Long; } else { - throw std::runtime_error("invalid default value for ScalarType: " + str); + TORCH_CHECK(false, "invalid default value for ScalarType: " + str); } } else if (type_ == ParameterType::LAYOUT) { if (str == "None") { @@ -1370,16 +1364,12 @@ void FunctionParameter::set_default_str(const std::string& str) { } else if (str == "torch.sparse_coo") { default_layout = at::Layout::Sparse; } else { - throw std::runtime_error("invalid default value for layout: " + str); + TORCH_CHECK(false, "invalid default value for layout: " + str); } } else if (type_ == ParameterType::DEVICE) { - if (str != "None") { - throw std::runtime_error("invalid device: " + str); - } + TORCH_CHECK(str == "None", "invalid device: " + str); } else if (type_ == ParameterType::STREAM) { - if (str != "None") { - throw std::runtime_error("invalid stream: " + str); - } + TORCH_CHECK(str == "None", "invalid stream: " + str); } else if (type_ == ParameterType::STRING) { if (str != "None") { default_string = parse_string_literal(str); @@ -1408,7 +1398,7 @@ void FunctionParameter::set_default_str(const std::string& str) { } else if (type_ == ParameterType::QSCHEME) { // NOLINT // throw std::runtime_error("ParameterType::QSCHEME"); } else { - throw std::runtime_error("unknown parameter type"); + TORCH_CHECK(false, "unknown parameter type"); } default_value = str; } @@ -1423,7 +1413,7 @@ FunctionSignature::FunctionSignature(const std::string& fmt, int index) deprecated(false) { auto open_paren = fmt.find('('); if (open_paren == std::string::npos) { - throw std::runtime_error("missing opening parenthesis: " + fmt); + TORCH_CHECK(false, "missing opening parenthesis: " + fmt); } name = fmt.substr(0, open_paren); @@ -1445,12 +1435,9 @@ FunctionSignature::FunctionSignature(const std::string& fmt, int index) break; } } - if (offset == std::string::npos) { - throw std::runtime_error("missing closing parenthesis: " + fmt); - } - if (offset == last_offset) { - throw std::runtime_error("malformed signature: " + fmt); - } + TORCH_CHECK( + offset != std::string::npos, "missing closing parenthesis: " + fmt); + TORCH_CHECK(offset != last_offset, "malformed signature: " + fmt); auto param_str = fmt.substr(last_offset, offset - last_offset); last_offset = next_offset; diff --git a/torch/csrc/utils/python_numbers.h b/torch/csrc/utils/python_numbers.h index a8b9b8632a00b..c7e6cc29bf783 100644 --- a/torch/csrc/utils/python_numbers.h +++ b/torch/csrc/utils/python_numbers.h @@ -120,7 +120,7 @@ inline bool THPUtils_unpackBool(PyObject* obj) { } else if (obj == Py_False) { return false; } else { - throw std::runtime_error("couldn't convert python object to boolean"); + TORCH_CHECK(false, "couldn't convert python object to boolean"); } } @@ -199,13 +199,11 @@ inline c10::DeviceIndex THPUtils_unpackDeviceIndex(PyObject* obj) { if (value == -1 && PyErr_Occurred()) { throw python_error(); } - if (overflow != 0) { - throw std::runtime_error("Overflow when unpacking DeviceIndex"); - } - if (value > std::numeric_limits::max() || - value < std::numeric_limits::min()) { - throw std::runtime_error("Overflow when unpacking DeviceIndex"); - } + TORCH_CHECK(overflow == 0, "Overflow when unpacking DeviceIndex"); + TORCH_CHECK( + value <= std::numeric_limits::max() && + value >= std::numeric_limits::min(), + "Overflow when unpacking DeviceIndex"); return (c10::DeviceIndex)value; } diff --git a/torch/csrc/utils/python_scalars.h b/torch/csrc/utils/python_scalars.h index 89ce38353bebc..c5e19f8855d5e 100644 --- a/torch/csrc/utils/python_scalars.h +++ b/torch/csrc/utils/python_scalars.h @@ -101,7 +101,7 @@ inline void store_scalar(void* data, at::ScalarType scalarType, PyObject* obj) { at::convert(THPUtils_unpackDouble(obj)); break; default: - throw std::runtime_error("store_scalar: invalid type"); + TORCH_CHECK(false, "store_scalar: invalid type"); } } @@ -165,7 +165,7 @@ inline PyObject* load_scalar(const void* data, at::ScalarType scalarType) { return PyFloat_FromDouble( at::convert(*(at::Float8_e8m0fnu*)data)); default: - throw std::runtime_error("load_scalar: invalid type"); + TORCH_CHECK(false, "load_scalar: invalid type"); } } diff --git a/torch/csrc/utils/python_strings.h b/torch/csrc/utils/python_strings.h index 1d26c4333bc2b..229734af238f4 100644 --- a/torch/csrc/utils/python_strings.h +++ b/torch/csrc/utils/python_strings.h @@ -26,12 +26,10 @@ inline std::string THPUtils_unpackString(PyObject* obj) { if (PyUnicode_Check(obj)) { Py_ssize_t size = 0; const char* data = PyUnicode_AsUTF8AndSize(obj, &size); - if (!data) { - throw std::runtime_error("error unpacking string as utf-8"); - } + TORCH_CHECK(data, "error unpacking string as utf-8"); return std::string(data, (size_t)size); } - throw std::runtime_error("unpackString: expected bytes or unicode object"); + TORCH_CHECK(false, "unpackString: expected bytes or unicode object"); } // Unpacks PyBytes (PyString) or PyUnicode as std::string_view @@ -50,12 +48,10 @@ inline std::string_view THPUtils_unpackStringView(PyObject* obj) { if (PyUnicode_Check(obj)) { Py_ssize_t size = 0; const char* data = PyUnicode_AsUTF8AndSize(obj, &size); - if (!data) { - throw std::runtime_error("error unpacking string as utf-8"); - } + TORCH_CHECK(data, "error unpacking string as utf-8"); return std::string_view(data, (size_t)size); } - throw std::runtime_error("unpackString: expected bytes or unicode object"); + TORCH_CHECK(false, "unpackString: expected bytes or unicode object"); } inline PyObject* THPUtils_packString(const char* str) { diff --git a/torch/csrc/utils/tensor_new.cpp b/torch/csrc/utils/tensor_new.cpp index 35511300f703e..77a22568a3fd3 100644 --- a/torch/csrc/utils/tensor_new.cpp +++ b/torch/csrc/utils/tensor_new.cpp @@ -689,7 +689,7 @@ Tensor legacy_sparse_tensor_generic_ctor_new( return new_with_sizes( options, scalar_type, deviceOptional, r.symintlist(0)); } - throw std::runtime_error("new(): invalid arguments"); + TORCH_CHECK(false, "new(): invalid arguments"); } // NB: device_idx here is NOT a DeviceIndex, but index into PythonArgs @@ -808,7 +808,7 @@ static Tensor legacy_tensor_generic_ctor_new( return legacy_new_from_sequence( options, scalar_type, deviceOptional, r.pyobject(0)); } - throw std::runtime_error("new(): invalid arguments"); + TORCH_CHECK(false, "new(): invalid arguments"); } // Handles ONLY torch.Tensor @@ -1072,7 +1072,7 @@ static Tensor sparse_compressed_tensor_ctor_worker( values.options().layout(layout).pinned_memory(pin_memory)) .set_requires_grad(r.toBool(ARG_REQUIRES_GRAD1)); } - throw std::runtime_error(name + ": invalid arguments"); + TORCH_CHECK(false, name + ": invalid arguments"); } Tensor sparse_compressed_tensor_ctor( @@ -1274,7 +1274,7 @@ Tensor sparse_coo_tensor_ctor( inferred_options.dtype(inferred_scalar_type).layout(at::kSparse)) .set_requires_grad(r.toBool(ARG_REQUIRES_GRAD2)); } - throw std::runtime_error("sparse_coo_tensor(): invalid arguments"); + TORCH_CHECK(false, "sparse_coo_tensor(): invalid arguments"); } void _validate_sparse_coo_tensor_args( @@ -1504,7 +1504,7 @@ Tensor tensor_ctor( new_tensor.set_requires_grad(args_requires_grad); return new_tensor; } - throw std::runtime_error("tensor(): invalid arguments"); + TORCH_CHECK(false, "tensor(): invalid arguments"); } Tensor as_tensor( @@ -1523,7 +1523,7 @@ Tensor as_tensor( /*copy_numpy=*/false, /*type_inference=*/type_inference); } - throw std::runtime_error("tensor(): invalid arguments"); + TORCH_CHECK(false, "tensor(): invalid arguments"); } Tensor new_tensor( @@ -1561,7 +1561,7 @@ Tensor new_tensor( new_tensor.set_requires_grad(args_requires_grad); return new_tensor; } - throw std::runtime_error("new_tensor(): invalid arguments"); + TORCH_CHECK(false, "new_tensor(): invalid arguments"); } Tensor tensor_frombuffer( diff --git a/torch/csrc/utils/tensor_numpy.cpp b/torch/csrc/utils/tensor_numpy.cpp index b9839a79f6110..c6cd56711c4a1 100644 --- a/torch/csrc/utils/tensor_numpy.cpp +++ b/torch/csrc/utils/tensor_numpy.cpp @@ -9,32 +9,32 @@ namespace torch::utils { PyObject* tensor_to_numpy(const at::Tensor&, bool) { - throw std::runtime_error("PyTorch was compiled without NumPy support"); + TORCH_CHECK(false, "PyTorch was compiled without NumPy support"); } at::Tensor tensor_from_numpy( PyObject* obj, bool warn_if_not_writeable /*=true*/) { - throw std::runtime_error("PyTorch was compiled without NumPy support"); + TORCH_CHECK(false, "PyTorch was compiled without NumPy support"); } bool is_numpy_available() { - throw std::runtime_error("PyTorch was compiled without NumPy support"); + TORCH_CHECK(false, "PyTorch was compiled without NumPy support"); } bool is_numpy_int(PyObject* obj) { - throw std::runtime_error("PyTorch was compiled without NumPy support"); + TORCH_CHECK(false, "PyTorch was compiled without NumPy support"); } bool is_numpy_scalar(PyObject* obj) { - throw std::runtime_error("PyTorch was compiled without NumPy support"); + TORCH_CHECK(false, "PyTorch was compiled without NumPy support"); } at::Tensor tensor_from_cuda_array_interface( PyObject* obj, std::optional device_opt) { - throw std::runtime_error("PyTorch was compiled without NumPy support"); + TORCH_CHECK(false, "PyTorch was compiled without NumPy support"); } void warn_numpy_not_writeable() { - throw std::runtime_error("PyTorch was compiled without NumPy support"); + TORCH_CHECK(false, "PyTorch was compiled without NumPy support"); } // No-op stubs. @@ -215,9 +215,7 @@ void warn_numpy_not_writeable() { at::Tensor tensor_from_numpy( PyObject* obj, bool warn_if_not_writeable /*=true*/) { - if (!is_numpy_available()) { - throw std::runtime_error("Numpy is not available"); - } + TORCH_CHECK(is_numpy_available(), "Numpy is not available"); TORCH_CHECK_TYPE( PyArray_Check(obj), "expected np.ndarray (got ", @@ -385,9 +383,7 @@ bool is_numpy_scalar(PyObject* obj) { at::Tensor tensor_from_cuda_array_interface( PyObject* obj, std::optional device_opt) { - if (!is_numpy_available()) { - throw std::runtime_error("Numpy is not available"); - } + TORCH_CHECK(is_numpy_available(), "Numpy is not available"); auto cuda_dict = THPObjectPtr(PyObject_GetAttrString(obj, "__cuda_array_interface__")); TORCH_INTERNAL_ASSERT(cuda_dict); From 6926710adf697e9d2160d43c4a96212dd27ceae0 Mon Sep 17 00:00:00 2001 From: Aidyn-A Date: Tue, 16 Sep 2025 10:29:55 +0000 Subject: [PATCH 296/693] [ATen][CUDA] CUTLASS matmuls: add sm_103a flag (#162956) This PR adds an `sm_103a` flag for GroupMM and RowwiseScaledMM. Contrary to just #161399, this simply adds the flag as the support for `sm_103a` matmuls is going to be added to CUTLASS v4.2 (see https://github.com/pytorch/pytorch/pull/161399#issuecomment-3252892937). Pull Request resolved: https://github.com/pytorch/pytorch/pull/162956 Approved by: https://github.com/eqy, https://github.com/Skylion007 --- cmake/Codegen.cmake | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cmake/Codegen.cmake b/cmake/Codegen.cmake index e4973c849a18f..55d03b7c46320 100644 --- a/cmake/Codegen.cmake +++ b/cmake/Codegen.cmake @@ -107,6 +107,12 @@ if(INTERN_BUILD_ATEN_OPS) list(APPEND _file_compile_flags "-gencode;arch=compute_100a,code=sm_100a") endif() endif() + # We will need to gate against CUDA version, because sm_103a is available on CUDA 12.9+ + if("${_arch}" STREQUAL "103a" AND CUDA_VERSION VERSION_GREATER_EQUAL 12.9) + if(_existing_arch_flags MATCHES ".*compute_100.*") + list(APPEND _file_compile_flags "-gencode;arch=compute_103a,code=sm_103a") + endif() + endif() if("${_arch}" STREQUAL "120a") if(_existing_arch_flags MATCHES ".*compute_120.*") list(APPEND _file_compile_flags "-gencode;arch=compute_120a,code=sm_120a") @@ -120,13 +126,13 @@ if(INTERN_BUILD_ATEN_OPS) _BUILD_FOR_ADDITIONAL_ARCHS( "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/RowwiseScaledMM.cu" - "89;90a;100a;120a") + "89;90a;100a;103a;120a") _BUILD_FOR_ADDITIONAL_ARCHS( "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/ScaledGroupMM.cu" "90a") _BUILD_FOR_ADDITIONAL_ARCHS( "${CMAKE_CURRENT_LIST_DIR}/../aten/src/ATen/native/cuda/GroupMM.cu" - "90a;100a") + "90a;100a;103a") endif() From fa127d9b20720b70c6481ee9c19693714c428446 Mon Sep 17 00:00:00 2001 From: zeshengzong Date: Tue, 16 Sep 2025 12:07:46 +0000 Subject: [PATCH 297/693] Fix `LBFGS` wolfe max iteration (#161488) Fixes #91581 , based on #135026 ## Test Result ```bash pytest test/test_optim.py ......... ========================== 1473 passed, 242 skipped in 2412.49s (0:40:12) =========================== ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/161488 Approved by: https://github.com/albanD --- test/test_optim.py | 28 ++++++++++++++++++++++++++++ torch/optim/lbfgs.py | 9 ++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/test/test_optim.py b/test/test_optim.py index 6dd23d6328c89..de185725b5c2c 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -2305,6 +2305,34 @@ def test_non_empty_state(self, device, dtype, optim_info): for state in optim.state.values(): self.assertGreater(len(state), 0) + @parametrize("dtype", [torch.float32]) + def test_step_iteration(self, device, dtype): + def _get_model_and_input_tensor(device, dtype): + model = torch.nn.Sequential( + torch.nn.Conv2d(4, 2, 1, stride=2), + torch.nn.BatchNorm2d(2, eps=1e-05, momentum=0.1), + ) + input = torch.rand(1, 4, 16, 16, device=device, dtype=dtype) + model.to(dtype=dtype, device=device) + return model, input + + counter = 0 + + def fwd_bwd(optim, mod, i): + nonlocal counter + counter += 1 + optim.zero_grad() + loss = mod(i).sum() + loss.backward() + return loss + + model, input = _get_model_and_input_tensor(device, dtype) + optimizer = torch.optim.LBFGS( + model.parameters(), max_iter=1, max_eval=5, line_search_fn="strong_wolfe" + ) + optimizer.step(functools.partial(fwd_bwd, optimizer, model, input)) + self.assertEqual(counter, 6) + instantiate_device_type_tests(TestOptimRenewed, globals(), allow_mps=True) diff --git a/torch/optim/lbfgs.py b/torch/optim/lbfgs.py index 674aaaf268835..09f5f2ca8c882 100644 --- a/torch/optim/lbfgs.py +++ b/torch/optim/lbfgs.py @@ -442,7 +442,14 @@ def obj_func(x, t, d): return self._directional_evaluate(closure, x, t, d) loss, flat_grad, t, ls_func_evals = _strong_wolfe( - obj_func, x_init, t, d, loss, flat_grad, gtd + obj_func, + x_init, + t, + d, + loss, + flat_grad, + gtd, + max_ls=max_eval - current_evals, ) self._add_grad(t, d) opt_cond = flat_grad.abs().max() <= tolerance_grad From cef815dc2ce37f98e01a6469a15b69f15995c1f9 Mon Sep 17 00:00:00 2001 From: Xinya Zhang Date: Tue, 16 Sep 2025 12:48:45 +0000 Subject: [PATCH 298/693] [ROCm] Remove HIPBLASLT_ALLOW_TF32 from codebase (#162998) A few UT failures are caused by `HIPBLASLT_ALLOW_TF32` Fixes #157094, #157093, #157092, #157091, #157064, #157063, #157062, #157061, #157042, #157041, #157039, #157004 Pull Request resolved: https://github.com/pytorch/pytorch/pull/162998 Approved by: https://github.com/jeffdaily Co-authored-by: Jeff Daily --- aten/src/ATen/Context.cpp | 21 +------- test/dynamo/test_graph_region_tracker.py | 62 +++++++++--------------- test/dynamo/test_misc.py | 55 +++++++-------------- test/inductor/test_flex_decoding.py | 3 -- test/inductor/test_padding.py | 3 -- test/test_cuda.py | 52 -------------------- test/test_linalg.py | 30 +----------- test/test_transformers.py | 7 ++- torch/cuda/tunable.py | 1 - torch/testing/_internal/common_cuda.py | 22 +++------ 10 files changed, 54 insertions(+), 202 deletions(-) diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp index 4d48084b0ab89..7a8d02be530e3 100644 --- a/aten/src/ATen/Context.cpp +++ b/aten/src/ATen/Context.cpp @@ -180,7 +180,7 @@ void Context::setUserEnabledNNPACK(bool e) { } bool Context::allowTF32CuDNN(const std::string& op) const { - if (op.size() == 0){ + if (op.empty()){ bool allow_tf32_rnn = float32Precision("cuda", "rnn") == "tf32"; bool allow_tf32_conv = float32Precision("cuda", "conv") == "tf32"; TORCH_CHECK( @@ -281,9 +281,6 @@ bool Context::userEnabledOverrideableSDP() const { static constexpr const auto cublas_config_var_name = "CUBLAS_WORKSPACE_CONFIG"; static constexpr const std::array cublas_deterministic_configs = {":4096:8", ":16:8"}; -#ifdef USE_ROCM -static constexpr const auto hipblaslt_allow_tf32 = "HIPBLASLT_ALLOW_TF32"; -#endif bool Context::checkCuBLASConfigDeterministic() { // If using CUDA 10.2 or greater, need to make sure CuBLAS workspace config @@ -343,12 +340,6 @@ void Context::setImmediateMiopen(bool b) { } bool Context::allowTF32CuBLAS() const { -#ifdef USE_ROCM - const auto allow_tf32 = c10::utils::check_env(hipblaslt_allow_tf32); - if (allow_tf32 != true) { - return false; - } -#endif bool legacy_allow_tf32 = float32_matmul_precision != at::Float32MatmulPrecision::HIGHEST; bool allow_tf32_new = float32Precision("cuda", "matmul") == "tf32"; TORCH_CHECK( @@ -362,14 +353,6 @@ bool Context::allowTF32CuBLAS() const { } void Context::setAllowTF32CuBLAS(bool b) { -#ifdef USE_ROCM - const auto allow_tf32 = c10::utils::check_env(hipblaslt_allow_tf32); - if (allow_tf32 != true) { - C10_LOG_FIRST_N(INFO, 10) << "torch.backends.cuda.matmul.allow_tf32 is not supported on ROCm by default. " - << "Please set environment variable HIPBLASLT_ALLOW_TF32=1 to enable it."; - return; - } -#endif float32_matmul_precision = b ? at::Float32MatmulPrecision::HIGH : at::Float32MatmulPrecision::HIGHEST; setFloat32Precision("cuda", "matmul", b ? "tf32" : "ieee"); } @@ -443,7 +426,7 @@ void Context::setFloat32Precision(const std::string& backend, const std::string& std::string msg; auto iterp = _fp32_precisions.find(backend); TORCH_CHECK(iterp != _fp32_precisions.end()); - for (auto p : iterp->second) { + for (const auto& p : iterp->second) { msg += p; msg += " "; } diff --git a/test/dynamo/test_graph_region_tracker.py b/test/dynamo/test_graph_region_tracker.py index e930ff787a9a4..ce456596fd55e 100644 --- a/test/dynamo/test_graph_region_tracker.py +++ b/test/dynamo/test_graph_region_tracker.py @@ -1,6 +1,5 @@ # Owner(s): ["module: dynamo"] import contextlib -import os import torch import torch.fx @@ -196,21 +195,6 @@ def fn(x, y, z): ) def test_mismatched_global_state(self): - @contextlib.contextmanager - def _hip_allow_tf32(): - # for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new - # and only for MI300+ - hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None) - os.environ["HIPBLASLT_ALLOW_TF32"] = "1" - - try: - yield - finally: - if hip_allow_tf32 is not None: - os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32 - else: - del os.environ["HIPBLASLT_ALLOW_TF32"] - def inner_fn(x, y): x1 = x * 1 y1 = y + 1 @@ -251,31 +235,29 @@ def set_default_dtype_bfloat16(): def reset_default_dtype(): torch.set_default_dtype(old_dtype) - tf32_ctx = _hip_allow_tf32 if torch.version.hip else contextlib.nullcontext - with tf32_ctx(): - for ctx in [ - lambda: torch.set_grad_enabled(False), - torch.autograd.grad_mode.inference_mode, - lambda: torch.autograd.graph.disable_saved_tensors_hooks( - "This is not supported" - ), - # lambda: torch.set_num_threads(2), : Unsupported - (set_default_dtype_bfloat16, reset_default_dtype), - ( - lambda: torch.use_deterministic_algorithms(True), - lambda: torch.use_deterministic_algorithms(False), - ), - # (lambda: torch.use_deterministic_algorithms(True, warn_only=True), - # lambda: torch.use_deterministic_algorithms(False)), : Unsupported - create_toggle_fns("allow_bf16_reduced_precision_reduction"), - create_toggle_fns("allow_fp16_reduced_precision_reduction"), - create_toggle_fns("allow_tf32"), - ]: - self.assertExpectedInline( - self.get_result(fn, torch.rand(10, 10), torch.ones(10, 20), ctx), - """[[['x1_2', 'y1_2', 'sum_3', 'o0'], ['x1_3', 'y1_3', 'sum_4', 'o2']], \ + for ctx in [ + lambda: torch.set_grad_enabled(False), + torch.autograd.grad_mode.inference_mode, + lambda: torch.autograd.graph.disable_saved_tensors_hooks( + "This is not supported" + ), + # lambda: torch.set_num_threads(2), : Unsupported + (set_default_dtype_bfloat16, reset_default_dtype), + ( + lambda: torch.use_deterministic_algorithms(True), + lambda: torch.use_deterministic_algorithms(False), + ), + # (lambda: torch.use_deterministic_algorithms(True, warn_only=True), + # lambda: torch.use_deterministic_algorithms(False)), : Unsupported + create_toggle_fns("allow_bf16_reduced_precision_reduction"), + create_toggle_fns("allow_fp16_reduced_precision_reduction"), + create_toggle_fns("allow_tf32"), + ]: + self.assertExpectedInline( + self.get_result(fn, torch.rand(10, 10), torch.ones(10, 20), ctx), + """[[['x1_2', 'y1_2', 'sum_3', 'o0'], ['x1_3', 'y1_3', 'sum_4', 'o2']], \ [['x1', 'y1', 'sum_1', 'o4'], ['x1_1', 'y1_1', 'sum_2', 'o5']]]""", - ) + ) def test_mutation_tracking_simple(self): def fn(x, y, z): diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py index 3f08da0825468..3814c52d4e511 100644 --- a/test/dynamo/test_misc.py +++ b/test/dynamo/test_misc.py @@ -8478,43 +8478,24 @@ def write_state(state): def fn(x): return x + 1 - import contextlib - - @contextlib.contextmanager - def _hip_allow_tf32(): - # for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new - # and only for MI300+ - hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None) - os.environ["HIPBLASLT_ALLOW_TF32"] = "1" - - try: - yield - finally: - if hip_allow_tf32 is not None: - os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32 - else: - del os.environ["HIPBLASLT_ALLOW_TF32"] - - tf32_ctx = _hip_allow_tf32 if torch.version.hip else contextlib.nullcontext - with tf32_ctx(): - initial_state = read_state() - y = torch.randn(10) - try: - for round in range(3): - for i in range(len(initial_state)): - new_state = [False] * len(initial_state) - new_state[i] = True - write_state(new_state) - assert read_state() == new_state - last_state.clear() - fn(y) - assert last_state == new_state - if round == 0: - assert cnt == i + 1 - else: - assert cnt == len(initial_state) - finally: - write_state(initial_state) + initial_state = read_state() + y = torch.randn(10) + try: + for round in range(3): + for i in range(len(initial_state)): + new_state = [False] * len(initial_state) + new_state[i] = True + write_state(new_state) + assert read_state() == new_state + last_state.clear() + fn(y) + assert last_state == new_state + if round == 0: + assert cnt == i + 1 + else: + assert cnt == len(initial_state) + finally: + write_state(initial_state) def test_grad_state_mutated(self): prior = torch.is_grad_enabled() diff --git a/test/inductor/test_flex_decoding.py b/test/inductor/test_flex_decoding.py index 120d8d36b439d..849aefff8a965 100644 --- a/test/inductor/test_flex_decoding.py +++ b/test/inductor/test_flex_decoding.py @@ -43,9 +43,6 @@ Tolerances = namedtuple("Tolerances", ["atol", "rtol"]) -# In MI300, HIPBLASLT_ALLOW_TF32=1 is used to enable tf32 for matmul. -# In the current test, HIPBLASLT_ALLOW_TF32 is not set, according to the -# logic of allowTF32CuBLAS(), set float32_matmul_precision to highest. if torch.version.hip: torch.set_float32_matmul_precision("highest") else: diff --git a/test/inductor/test_padding.py b/test/inductor/test_padding.py index 9ef3a18e24234..c67bde87a369b 100644 --- a/test/inductor/test_padding.py +++ b/test/inductor/test_padding.py @@ -109,9 +109,6 @@ def setUpClass(cls): if HAS_GPU: cls.prior_float32_matmul_precision = torch.get_float32_matmul_precision() cls.prior_default_device = torch.get_default_device() - # In MI300, HIPBLASLT_ALLOW_TF32=1 is used to enable tf32 for matmul. - # In the current test, HIPBLASLT_ALLOW_TF32 is not set, according to the - # logic of allowTF32CuBLAS(), set float32_matmul_precision to highest. if torch.version.hip: torch.set_float32_matmul_precision("highest") else: diff --git a/test/test_cuda.py b/test/test_cuda.py index 6a4155825c7c6..64253f932999c 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -759,53 +759,7 @@ def check_workspace_size(inp): torch._C._cuda_clearCublasWorkspaces() - @contextlib.contextmanager - def _hip_allow_tf32(self): - # for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new - # and only for MI300+ - hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None) - os.environ["HIPBLASLT_ALLOW_TF32"] = "1" - - try: - yield - finally: - if hip_allow_tf32 is not None: - os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32 - else: - del os.environ["HIPBLASLT_ALLOW_TF32"] - - @unittest.skipIf(not TEST_WITH_ROCM, "not relevant for CUDA testing") - def test_hipblaslt_allow_tf32(self): - tf32_ctx = self._hip_allow_tf32 - with tf32_ctx(): - os.environ["HIPBLASLT_ALLOW_TF32"] = "0" - # Save original value of allow_tf32 - orig = torch.backends.cuda.matmul.allow_tf32 - # If allow_tf32 variable is declared as static in aten/src/ATen/Context.cpp - # then matmul.allow_tf32 will return False after this point even if - # HIP_BLASLT_ALLOW_TF32 is set to 1 and matmul.allow_tf32 is changed. - os.environ["HIPBLASLT_ALLOW_TF32"] = "1" - # Toggle torch.backends.cuda.matmul.allow_tf32 couple of times. - torch.backends.cuda.matmul.allow_tf32 = not orig - test1 = torch.backends.cuda.matmul.allow_tf32 - torch.backends.cuda.matmul.allow_tf32 = orig - test2 = torch.backends.cuda.matmul.allow_tf32 - self.assertNotEqual(test1, test2) - # Restore original value of allow_tf32 - torch.backends.cuda.matmul.allow_tf32 = orig - def test_cublas_allow_tf32_get_set(self): - """ - We only turn on TF32 for MI300 with a special env var. This is because TF32 - is only available in MI300+ and is in experimental mode (hipblaslt support - is current WIP) - """ - tf32_ctx = self._hip_allow_tf32 if torch.version.hip else contextlib.nullcontext - - with tf32_ctx(): - self._test_cublas_allow_tf32_get_set_inner() - - def _test_cublas_allow_tf32_get_set_inner(self): skip_tf32_cublas = "TORCH_ALLOW_TF32_CUBLAS_OVERRIDE" in os.environ and int( os.environ["TORCH_ALLOW_TF32_CUBLAS_OVERRIDE"] ) @@ -820,12 +774,6 @@ def _test_cublas_allow_tf32_get_set_inner(self): torch.backends.cuda.matmul.allow_tf32 = orig def test_float32_matmul_precision_get_set(self): - tf32_ctx = self._hip_allow_tf32 if torch.version.hip else contextlib.nullcontext - - with tf32_ctx(): - self._test_float32_matmul_precision_get_set_inner() - - def _test_float32_matmul_precision_get_set_inner(self): orig = torch.get_float32_matmul_precision() skip_tf32_cublas = "TORCH_ALLOW_TF32_CUBLAS_OVERRIDE" in os.environ and int( os.environ["TORCH_ALLOW_TF32_CUBLAS_OVERRIDE"] diff --git a/test/test_linalg.py b/test/test_linalg.py index ffae8ac18da22..4f8780dfc30ae 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -109,22 +109,6 @@ def get_tunableop_untuned_filename(): return untuned_filename class TestLinalg(TestCase): - @contextlib.contextmanager - def _hip_allow_tf32(self): - # for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new - # and only for MI300+. Environment variable will be removed in the future. - import os - hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None) - os.environ["HIPBLASLT_ALLOW_TF32"] = "1" - - try: - yield - finally: - if hip_allow_tf32 is not None: - os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32 - else: - del os.environ["HIPBLASLT_ALLOW_TF32"] - def setUp(self): super().setUp() torch.backends.cuda.matmul.allow_tf32 = False @@ -5542,13 +5526,8 @@ def test_scaled_gemm_tunableop(self, device, dtype): @runOnRocmArch(MI300_ARCH) @dtypes(torch.float) def test_tf32_tunableop(self, device, dtype): - # Test TunableOp with TF32. Supported by hipblasLT on MI300+. - # for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new - # and only for MI300+. Eventually this flag will go away. - tf32_ctx = self._hip_allow_tf32 if torch.version.hip else contextlib.nullcontext - try: - with self._tunableop_ctx(), tf32_ctx(): + with self._tunableop_ctx(): torch.backends.cuda.matmul.allow_tf32 = True torch.cuda.tunable.set_rotating_buffer_size(0) @@ -5611,13 +5590,8 @@ def test_tf32_offline_tunableop(self, device, dtype): # This test is the offline version of test_tf32_tunableop import os - # Test TunableOp with TF32. Supported by hipblasLT on MI300+. - # for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new - # and only for MI300+. Eventually this flag will go away. - tf32_ctx = self._hip_allow_tf32 if torch.version.hip else contextlib.nullcontext - try: - with self._tunableop_ctx(), tf32_ctx(): + with self._tunableop_ctx(): torch.backends.cuda.matmul.allow_tf32 = True ordinal = torch.cuda.current_device() torch.cuda.tunable.set_rotating_buffer_size(0) diff --git a/test/test_transformers.py b/test/test_transformers.py index b90b1ed86ef29..812baeb72dca6 100644 --- a/test/test_transformers.py +++ b/test/test_transformers.py @@ -51,7 +51,6 @@ PLATFORM_SUPPORTS_CUDNN_ATTENTION, tf32_on_and_off, tf32_enabled, - ROCM_VERSION, ) if TEST_FAIRSEQ: @@ -340,7 +339,7 @@ def test_train_with_pad_and_catch_error(self, device): l1_bool = nn.L1Loss()(test_train_bool[:, 0:2, :], test_eval_bool[:, 0:2, :]).item() self.assertTrue(l1_bool < 1e-4, "Eval/Train difference in pad_mask BOOL") - @tf32_on_and_off(0.001, only_if=(not TEST_WITH_ROCM or ROCM_VERSION < (7, 0))) + @tf32_on_and_off(0.001) @parametrize("attn_mask_dim", [2, 3, None]) @parametrize("key_padding_mask_dim", [2, None]) @parametrize("mask_dtype", [torch.bool, torch.float32]) @@ -524,7 +523,7 @@ def test_transformerencoder_fastpath(self, device, use_torchscript, enable_neste slowpath_output = slowpath_output.masked_fill(src_key_padding_mask.unsqueeze(-1), 0) self.assertEqual(fastpath_output_expanded, slowpath_output) - @tf32_on_and_off(0.001, only_if=(not TEST_WITH_ROCM or ROCM_VERSION < (7, 0))) + @tf32_on_and_off(0.001) @parametrize("with_no_grad", [True, False]) @parametrize("training", [True, False]) @parametrize("enable_nested_tensor", [False]) @@ -1110,7 +1109,7 @@ def forward( return_all_hiddens=False, )[0] - @tf32_on_and_off(0.003, only_if=(not TEST_WITH_ROCM or ROCM_VERSION < (7, 0))) + @tf32_on_and_off(0.003) @parametrize("input_dim,attn_mask_dim,is_causal", [(3, None, False), (3, 2, False), (3, 2, True), (3, 3, False), (3, 3, True), (4, None, False), (4, 2, False), (4, 2, True), (4, 4, False), (4, 4, True)], diff --git a/torch/cuda/tunable.py b/torch/cuda/tunable.py index c3982c33315e2..d1ac7fad7480b 100644 --- a/torch/cuda/tunable.py +++ b/torch/cuda/tunable.py @@ -591,7 +591,6 @@ def _process_single_offline_gemm(untuned_gemm_line: str, gpu_id: int) -> None: transA = layout[1] == "T" dtype = dtype_dict.get(data_type) if data_type == "tf32": - # User must still set HIPBLASLT_ALLOW_TF32=1 torch.backends.cuda.matmul.allow_tf32 = True else: torch.backends.cuda.matmul.allow_tf32 = False diff --git a/torch/testing/_internal/common_cuda.py b/torch/testing/_internal/common_cuda.py index be284429114f5..a085fbb9af643 100644 --- a/torch/testing/_internal/common_cuda.py +++ b/torch/testing/_internal/common_cuda.py @@ -170,34 +170,26 @@ def initialize_cuda_context_rng(): @contextlib.contextmanager def tf32_off(): - old_allow_tf32_matmul = torch.backends.cuda.matmul.allow_tf32 + old_fp32_precision = torch.backends.cuda.matmul.fp32_precision try: - torch.backends.cuda.matmul.allow_tf32 = False + torch.backends.cuda.matmul.fp32_precision = 'ieee' with torch.backends.cudnn.flags(enabled=None, benchmark=None, deterministic=None, allow_tf32=False): yield finally: - torch.backends.cuda.matmul.allow_tf32 = old_allow_tf32_matmul + torch.backends.cuda.matmul.fp32_precision = old_fp32_precision @contextlib.contextmanager def tf32_on(self, tf32_precision=1e-5): - if torch.version.hip: - hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None) - os.environ["HIPBLASLT_ALLOW_TF32"] = "1" - old_allow_tf32_matmul = torch.backends.cuda.matmul.allow_tf32 + old_fp32_precision = torch.backends.cuda.matmul.fp32_precision old_precision = self.precision try: - torch.backends.cuda.matmul.allow_tf32 = True + torch.backends.cuda.matmul.fp32_precision = 'tf32' self.precision = tf32_precision with torch.backends.cudnn.flags(enabled=None, benchmark=None, deterministic=None, allow_tf32=True): yield finally: - if torch.version.hip: - if hip_allow_tf32 is not None: - os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32 - else: - del os.environ["HIPBLASLT_ALLOW_TF32"] - torch.backends.cuda.matmul.allow_tf32 = old_allow_tf32_matmul + torch.backends.cuda.matmul.fp32_precision = old_fp32_precision self.precision = old_precision @@ -246,7 +238,7 @@ def tf32_enabled(): # if device is specified, it will check if device is cuda # if dtype is specified, it will check if dtype is float32 or complex64 # tf32 and fp32 are different only when all the three checks pass -def tf32_on_and_off(tf32_precision=1e-5, only_if=True): +def tf32_on_and_off(tf32_precision=1e-5, *, only_if=True): def with_tf32_disabled(self, function_call): with tf32_off(): function_call() From 4db203f8759634206e9431042cb5b0c86afc3a52 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Tue, 16 Sep 2025 12:52:57 +0000 Subject: [PATCH 299/693] Revert "[BE] Make PyObjectSlot use a global PyInterpreter (#162659)" This reverts commit 05ee8114f818a95745c812c3cd7aa8e784e61a9a. Reverted https://github.com/pytorch/pytorch/pull/162659 on behalf of https://github.com/jeanschmidt due to seems to have introduced errors in linting see https://github.com/pytorch/pytorch/actions/runs/17750689989/job/50444910643 ([comment](https://github.com/pytorch/pytorch/pull/162659#issuecomment-3298626136)) --- .github/workflows/pull.yml | 2 - c10/core/TensorImpl.h | 2 +- c10/core/impl/PyInterpreterHooks.h | 7 ++-- c10/core/impl/PyObjectSlot.cpp | 10 ++--- c10/core/impl/PyObjectSlot.h | 50 ++++++++++++++++++------- functorch/csrc/dim/dim.cpp | 3 +- torch/csrc/Module.cpp | 6 ++- torch/csrc/PyInterpreter.cpp | 6 ++- torch/csrc/Storage.cpp | 14 ++++--- torch/csrc/autograd/python_variable.cpp | 17 +++++---- 10 files changed, 74 insertions(+), 43 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index ff6e9ed107117..3f13fbf276882 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -127,8 +127,6 @@ jobs: uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: - # More memory is needed to build with asan - runner: linux.2xlarge.memory runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build-environment: linux-jammy-py3.10-clang18-asan docker-image-name: ci-image:pytorch-linux-jammy-py3-clang18-asan diff --git a/c10/core/TensorImpl.h b/c10/core/TensorImpl.h index 98867da60a7ff..972181327b1f6 100644 --- a/c10/core/TensorImpl.h +++ b/c10/core/TensorImpl.h @@ -3269,7 +3269,7 @@ class C10_TensorImpl_Size_Check_Dummy_Class : private TensorImpl { is_le(); is_le(); are_equal(); - are_equal(); + are_equal(); are_equal(); are_equal(); are_equal(); diff --git a/c10/core/impl/PyInterpreterHooks.h b/c10/core/impl/PyInterpreterHooks.h index 4fe025d2e778b..32a17ad9a8a0c 100644 --- a/c10/core/impl/PyInterpreterHooks.h +++ b/c10/core/impl/PyInterpreterHooks.h @@ -13,10 +13,11 @@ struct C10_API PyInterpreterHooksInterface { // Get the PyInterpreter instance // Stub implementation throws error when Python is not available - // We return nullptr rather than throwing an error since there are bits of c10 - // that expect an empty PyObjectSlot when python is not available. virtual PyInterpreter* getPyInterpreter() const { - return nullptr; + TORCH_CHECK( + false, + "PyTorch was compiled without Python support. " + "Cannot access Python interpreter from C++."); } }; diff --git a/c10/core/impl/PyObjectSlot.cpp b/c10/core/impl/PyObjectSlot.cpp index 7476ac1d4c39b..0f1bfb2110747 100644 --- a/c10/core/impl/PyObjectSlot.cpp +++ b/c10/core/impl/PyObjectSlot.cpp @@ -2,7 +2,7 @@ namespace c10::impl { -PyObjectSlot::PyObjectSlot() : pyobj_(nullptr) {} +PyObjectSlot::PyObjectSlot() : pyobj_interpreter_(nullptr), pyobj_(nullptr) {} PyObjectSlot::~PyObjectSlot() { maybe_destroy_pyobj(); @@ -10,9 +10,9 @@ PyObjectSlot::~PyObjectSlot() { void PyObjectSlot::maybe_destroy_pyobj() { if (owns_pyobj()) { - TORCH_INTERNAL_ASSERT(getGlobalPyInterpreter() != nullptr); + TORCH_INTERNAL_ASSERT(pyobj_interpreter_ != nullptr); TORCH_INTERNAL_ASSERT(pyobj_ != nullptr); - (*getGlobalPyInterpreter()) + (*pyobj_interpreter_.load(std::memory_order_acquire)) ->decref(_unchecked_untagged_pyobj(), /*has_pyobj_slot*/ true); // NB: this destructor can only be entered when there are no // references to this C++ object (obviously), NOR any references @@ -25,7 +25,7 @@ void PyObjectSlot::maybe_destroy_pyobj() { } PyInterpreter* PyObjectSlot::pyobj_interpreter() { - return getGlobalPyInterpreter(); + return pyobj_interpreter_.load(std::memory_order_acquire); } PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const { @@ -35,7 +35,7 @@ PyObject* PyObjectSlot::_unchecked_untagged_pyobj() const { } PyInterpreter& PyObjectSlot::load_pyobj_interpreter() const { - auto interpreter = getGlobalPyInterpreter(); + auto interpreter = pyobj_interpreter_.load(std::memory_order_acquire); if (interpreter) { return *interpreter; } diff --git a/c10/core/impl/PyObjectSlot.h b/c10/core/impl/PyObjectSlot.h index e7d78f8360c33..58b2490eba001 100644 --- a/c10/core/impl/PyObjectSlot.h +++ b/c10/core/impl/PyObjectSlot.h @@ -6,16 +6,9 @@ #include #include -namespace c10::impl { - -// Function pointer type for getting the global interpreter -using GetPyInterpreterFn = PyInterpreter* (*)(); +#include -// Global function pointer (set by csrc initialization) -C10_API extern GetPyInterpreterFn g_get_pyinterpreter_fn; - -// Helper function to get the global interpreter -C10_API PyInterpreter* getGlobalPyInterpreter(); +namespace c10::impl { struct C10_API PyObjectSlot { public: @@ -33,6 +26,8 @@ struct C10_API PyObjectSlot { // NB: THIS FUNCTION CAN RAISE AN EXCEPTION. Make sure to clean up after // PyObject if necessary! void init_pyobj(PyObject* pyobj) { + pyobj_interpreter_.store( + getGlobalPyInterpreter(), std::memory_order_relaxed); pyobj_ = pyobj; } @@ -60,15 +55,18 @@ struct C10_API PyObjectSlot { // @todo alban: I'm not too sure what's going on here, we can probably delete // it but it's worthwhile making sure - std::optional check_pyobj() const { - impl::PyInterpreter* interpreter = getGlobalPyInterpreter(); - if (interpreter == nullptr || pyobj_ == nullptr) { + std::optional check_pyobj(bool ignore_hermetic_tls = false) const { + impl::PyInterpreter* interpreter = + pyobj_interpreter_.load(std::memory_order_acquire); + if (interpreter == nullptr) { return std::nullopt; } - if (c10::impl::HermeticPyObjectTLS::get_state()) { + + if (!ignore_hermetic_tls && c10::impl::HermeticPyObjectTLS::get_state()) { return std::nullopt; + } else { + return _unchecked_untagged_pyobj(); } - return _unchecked_untagged_pyobj(); } PyInterpreter& load_pyobj_interpreter() const; @@ -78,6 +76,30 @@ struct C10_API PyObjectSlot { void set_owns_pyobj(bool b); private: + // This field contains the interpreter tag for this object. See + // Note [Python interpreter tag] for general context + // + // Note [Memory ordering on Python interpreter tag] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // What memory_order do we need when accessing this atomic? We don't + // need a single total modification order (as provided by + // memory_order_seq_cst) as pyobj_interpreter_ is monotonic: it can only + // transition from -1 to some positive integer and never changes afterwards. + // Because there is only one modification, it trivially already has a total + // modification order (e.g., we don't need fences or locked instructions on + // x86) + // + // In fact, one could make a reasonable argument that relaxed reads are OK, + // due to the presence of external locking (GIL) to ensure that interactions + // with other data structures are still correctly synchronized, so that + // we fall in the "Single-Location Data Structures" case as described in + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf + // However, on x86, it doesn't matter if I use acquire or relaxed on the load + // as I get the same assembly in both cases. So I just use the more + // conservative acquire (which will impede compiler optimizations but I don't + // care) + std::atomic pyobj_interpreter_; + // This field contains a reference to a PyObject representing this Tensor. // If pyobj is nullptr, when we transfer Tensor to Python, we allocate a new // PyObject for it and set this field. This field does not have to be diff --git a/functorch/csrc/dim/dim.cpp b/functorch/csrc/dim/dim.cpp index 5258ba52f99c5..8f1e561e2051b 100644 --- a/functorch/csrc/dim/dim.cpp +++ b/functorch/csrc/dim/dim.cpp @@ -1187,7 +1187,8 @@ int64_t _Tensor_ndim(mpy::handle h) { mpy::handle handle_from_tensor(Arena& A, TensorRef t) { // fast case: tensor is live in python std::optional mb_obj = - t->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); + t->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false); if (mb_obj.has_value() && !t->unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj()) { return *mb_obj; diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp index 3a3e8bfef0478..ac2b03d2651cc 100644 --- a/torch/csrc/Module.cpp +++ b/torch/csrc/Module.cpp @@ -403,9 +403,11 @@ static PyObject* THPModule_swap_tensor_impl(PyObject* _unused, PyObject* args) { // The TensorImpls contain PyObjectSlots that have a reference to the PyObject // associated with the TensorImpl. Swap this field as well. std::optional mb_obj_a = - a->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); + a->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false); std::optional mb_obj_b = - b->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); + b->cdata->unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false); TORCH_INTERNAL_ASSERT( mb_obj_a.has_value() && mb_obj_b.has_value(), "Both tensors should have PyObjects tagged by the current python interpreter"); diff --git a/torch/csrc/PyInterpreter.cpp b/torch/csrc/PyInterpreter.cpp index 993f8b8216a6b..e6016a7721e8b 100644 --- a/torch/csrc/PyInterpreter.cpp +++ b/torch/csrc/PyInterpreter.cpp @@ -614,7 +614,8 @@ static void set_tensor_attr_with_capsule( const c10::TensorImpl* tensor, py::capsule& capsule, const char* attr_name) { - std::optional mb_obj = tensor->pyobj_slot()->check_pyobj(); + std::optional mb_obj = tensor->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false); TORCH_CHECK( mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value"); auto obj = mb_obj.value(); @@ -641,7 +642,8 @@ static c10::ArrayRef get_set_cached_attr( const c10::TensorImpl* tensor, const char* base_attr_name, const py::object& obj) { - std::optional mb_obj = tensor->pyobj_slot()->check_pyobj(); + std::optional mb_obj = + tensor->pyobj_slot()->check_pyobj(getPyInterpreter()); TORCH_CHECK( mb_obj.has_value(), "Tensor subclass's PyInterpreter has no value"); auto tensor_obj = mb_obj.value(); diff --git a/torch/csrc/Storage.cpp b/torch/csrc/Storage.cpp index f6638bbd10c19..08112b41aaaed 100644 --- a/torch/csrc/Storage.cpp +++ b/torch/csrc/Storage.cpp @@ -41,8 +41,8 @@ PyObject* THPStorage_NewWithStorage( "Creating a Storage subclass from a class that does not inherit from ", "Storage is not possible. Make sure your class inherits from Storage."); - auto maybe_pyobj = - _storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj(); + auto maybe_pyobj = _storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false); if (maybe_pyobj.has_value() && maybe_pyobj.value()) { TORCH_CHECK( allow_preexisting_pyobj, @@ -93,7 +93,8 @@ PyObject* THPStorage_Wrap(c10::Storage storage) { } c10::impl::PyObjectSlot* pyobj_slot = storage_impl->pyobj_slot(); - std::optional maybe_pyobj = pyobj_slot->check_pyobj(); + std::optional maybe_pyobj = pyobj_slot->check_pyobj( + /*ignore_hermetic_tls=*/false); if (maybe_pyobj.has_value()) { auto obj = *maybe_pyobj; if (obj) { @@ -126,8 +127,8 @@ static bool THPStorage_isPreservable(THPStorage* self) { return false; } - if (storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj() != - (PyObject*)self) { + if (storage.unsafeGetStorageImpl()->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/true) != (PyObject*)self) { return false; } if (storage.use_count() <= 1) { @@ -144,7 +145,8 @@ static bool THPStorage_tryPreserve(THPStorage* self) { const auto& storage = THPStorage_Unpack(self); c10::StorageImpl* storage_impl = storage.unsafeGetStorageImpl(); - auto maybe_pyobj = storage_impl->pyobj_slot()->check_pyobj(); + auto maybe_pyobj = storage_impl->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/true); // NOTE: It is possible to just set the PyObjectSlot here, but the point is // that we should have already set PyObjectSlot when the storage PyObject // was created. diff --git a/torch/csrc/autograd/python_variable.cpp b/torch/csrc/autograd/python_variable.cpp index bbda3adc2b275..7ec4bf28e1604 100644 --- a/torch/csrc/autograd/python_variable.cpp +++ b/torch/csrc/autograd/python_variable.cpp @@ -265,7 +265,8 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) { } std::optional mb_obj = - var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); + var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false); if (mb_obj.has_value()) { auto obj = *mb_obj; if (obj) { @@ -328,8 +329,8 @@ static bool isResurrectable(THPVariable* self) { return false; } // Check if this is hermetic. If it is, no resurrection. - if (tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj() != - (PyObject*)self) { + if (tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false) != (PyObject*)self) { return false; } return true; @@ -354,7 +355,8 @@ static bool THPVariable_tryResurrect(THPVariable* self) { !tensor.unsafeGetTensorImpl()->pyobj_slot()->owns_pyobj()); c10::TensorImpl* tensor_impl = tensor.unsafeGetTensorImpl(); - auto maybe_pyobj = tensor_impl->pyobj_slot()->check_pyobj(); + auto maybe_pyobj = tensor_impl->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false); TORCH_INTERNAL_ASSERT( maybe_pyobj.has_value(), @@ -1932,8 +1934,8 @@ static int THPVariable_subclass_clear(THPVariable* self) { // because Tensor asked us to (it's already destructing). if (!self->cdata.unsafeIsBorrowed() && - tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj() == - (PyObject*)self) { + tensor.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false) == (PyObject*)self) { // TODO: empirically, on OS X this assert appears to be untrue // In test_py_tensors_multi_async_call - ProcessGroupRpcTestWithSpawn // distributed/rpc/test_process_group_agent.py @@ -2119,7 +2121,8 @@ static PyObject* THPVariable_NewWithVar( // This function overwrite the Tensor's pyobj field without extra checks // Make sure it is not set otherwise we would leak memory - auto mb_obj = _var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj(); + auto mb_obj = _var.unsafeGetTensorImpl()->pyobj_slot()->check_pyobj( + /*ignore_hermetic_tls=*/false); // Under some circumstances, we may attempt to create a new Python // object for a variable that already has a Python object. The most common From e7c3f802ffa7db2bce3ba57e41ac1f7499a4b81a Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Tue, 16 Sep 2025 15:14:36 +0000 Subject: [PATCH 300/693] Revert "[dynamo][hop] Introduce Local Map HOP (#161458)" This reverts commit 505458db803e1ffabac08a2fc150b566d3ea3a57. Reverted https://github.com/pytorch/pytorch/pull/161458 on behalf of https://github.com/jeffdaily due to broke rocm tests ([comment](https://github.com/pytorch/pytorch/pull/161458#issuecomment-3299230458)) --- test/dynamo/test_higher_order_ops.py | 1 - test/higher_order_ops/test_local_map.py | 203 ------------ test/inductor/test_compiled_autograd.py | 2 +- torch/_dynamo/variables/builder.py | 7 +- torch/_dynamo/variables/higher_order_ops.py | 111 ------- torch/_higher_order_ops/__init__.py | 2 - torch/_higher_order_ops/local_map.py | 327 -------------------- torch/distributed/tensor/_ops/_view_ops.py | 4 +- torch/testing/_internal/hop_db.py | 38 --- 9 files changed, 3 insertions(+), 692 deletions(-) delete mode 100644 test/higher_order_ops/test_local_map.py delete mode 100644 torch/_higher_order_ops/local_map.py diff --git a/test/dynamo/test_higher_order_ops.py b/test/dynamo/test_higher_order_ops.py index 78943b41bc262..9f093d4dc0cea 100644 --- a/test/dynamo/test_higher_order_ops.py +++ b/test/dynamo/test_higher_order_ops.py @@ -7197,7 +7197,6 @@ def false_branch(x): # aot_eager "map", # assert type(args[1].realize()) is TensorVariable "scan", # scan is not an OpOverload - "local_map_hop", # can't retrace # inductor "while_loop", # LoweringException: AssertionError "flex_attention", # LoweringException: AssertionError diff --git a/test/higher_order_ops/test_local_map.py b/test/higher_order_ops/test_local_map.py deleted file mode 100644 index 46ecacc2b330c..0000000000000 --- a/test/higher_order_ops/test_local_map.py +++ /dev/null @@ -1,203 +0,0 @@ -# Owner(s): ["module: higher order operators"] -# flake8: noqa: B950 - - -import unittest - -import torch -import torch._dynamo -import torch._functorch -import torch._inductor -import torch._inductor.decomposition -import torch.nn.functional as F -from torch import nn -from torch._dynamo.variables.higher_order_ops import LocalMapWrappedHigherOrderVariable - - -if torch.distributed.is_available(): - from torch.distributed._tensor.experimental import local_map - from torch.distributed.tensor.placement_types import Replicate, Shard - -from torch.testing._internal.common_utils import run_tests, TEST_WITH_CROSSREF, TestCase -from torch.testing._internal.triton_utils import requires_cuda_and_triton - - -nested_compile_region = torch.compiler.nested_compile_region - - -class MyTransform(torch.autograd.Function): - @staticmethod - def forward(ctx, x): - return x + 100 - - @staticmethod - def backward(ctx, grad): - return grad + 100 - - -def context_parallel_attention(query, key, value): - out = F.scaled_dot_product_attention( - query=query, key=key, value=value, is_causal=False - ) - return out - - -def create_model(attention_fn, nheads, dim1, dim2): - class LocalMapTransformerBlock(nn.Module): - def __init__(self, nheads, dim1, dim2): - super().__init__() - self.nheads = nheads - bias = False - self.wq = nn.Linear(dim1, dim1, bias=bias) - self.wk = nn.Linear(dim1, dim1, bias=bias) - self.wv = nn.Linear(dim1, dim1, bias=bias) - self.wo = nn.Linear(dim1, dim1, bias=bias) - self.w1 = nn.Linear(dim1, dim2, bias=bias) - self.w2 = nn.Linear(dim2, dim1, bias=bias) - - def forward(self, x): - q = self.wq(x) - k = self.wk(x) - v = self.wv(x) - - q = q.unflatten(-1, (self.nheads, -1)).permute(0, 2, 1, 3) - k = k.unflatten(-1, (self.nheads, -1)).permute(0, 2, 1, 3) - v = v.unflatten(-1, (self.nheads, -1)).permute(0, 2, 1, 3) - - o = attention_fn(q, k, v) - o = o.permute(0, 2, 1, 3).flatten(-2) - - o = self.wo(o) - - o0 = o + x - - o = self.w1(o0) - o = torch.nn.functional.relu(o) - o = self.w2(o) - - o = o0 + o - return o - - return LocalMapTransformerBlock(nheads, dim1, dim2) - - -class TestLocalMap(TestCase): - @requires_cuda_and_triton - @unittest.skipIf( - not torch.distributed.is_available(), "Torch distributed not available." - ) - def test_simple(self): - @local_map( - out_placements=((Shard(0), Shard(1), Shard(2)),), - in_placements=( - (Shard(0), Shard(1), Shard(2)), # query - (Shard(0), Shard(1), Replicate()), # key - (Shard(0), Shard(1), Replicate()), # value - ), - redistribute_inputs=True, - in_grad_placements=None, - device_mesh=None, - ) - def cp_decorated(query, key, value): - return context_parallel_attention(query, key, value) - - cp_function = local_map( - context_parallel_attention, - out_placements=(Shard(0), Shard(1), Shard(2)), - in_placements=( - (Shard(0), Shard(1), Shard(2)), # query - (Shard(0), Shard(1), Replicate()), # key - (Shard(0), Shard(1), Replicate()), # value - ), - redistribute_inputs=True, - in_grad_placements=None, - device_mesh=None, - ) - bs = 8 * 1 - dim1 = 96 - dim2 = dim1 * 4 - nheads = 16 - seq_len = 16 - - from torch._dynamo.testing import EagerAndRecordGraphs, normalize_gm - - backend = EagerAndRecordGraphs() - - model = create_model(cp_decorated, nheads, dim1, dim2).cuda() - inputs = (torch.randn(bs, seq_len, dim1, requires_grad=True).cuda(),) - with LocalMapWrappedHigherOrderVariable.enable(): - out = torch.compile(model, backend=backend)(*inputs) - out.sum().backward() - - model = create_model(cp_function, nheads, dim1, dim2).cuda() - inputs = (torch.randn(bs, seq_len, dim1, requires_grad=True).cuda(),) - with LocalMapWrappedHigherOrderVariable.enable(): - out = torch.compile(model, backend=backend)(*inputs) - out.sum().backward() - - if not TEST_WITH_CROSSREF: - self.assertEqual(len(backend.graphs), 2) - # should see local_map_hop in both - self.assertExpectedInline( - normalize_gm(backend.graphs[0].print_readable(print_output=False)), - """\ -class GraphModule(torch.nn.Module): - def forward(self, L_self_modules_wq_parameters_weight_: "f32[96, 96]", L_x_: "f32[8, 16, 96]", L_self_modules_wk_parameters_weight_: "f32[96, 96]", L_self_modules_wv_parameters_weight_: "f32[96, 96]", L_self_modules_wo_parameters_weight_: "f32[96, 96]", L_self_modules_w1_parameters_weight_: "f32[384, 96]", L_self_modules_w2_parameters_weight_: "f32[96, 384]"): - l_self_modules_wq_parameters_weight_ = L_self_modules_wq_parameters_weight_ - l_x_ = L_x_ - l_self_modules_wk_parameters_weight_ = L_self_modules_wk_parameters_weight_ - l_self_modules_wv_parameters_weight_ = L_self_modules_wv_parameters_weight_ - l_self_modules_wo_parameters_weight_ = L_self_modules_wo_parameters_weight_ - l_self_modules_w1_parameters_weight_ = L_self_modules_w1_parameters_weight_ - l_self_modules_w2_parameters_weight_ = L_self_modules_w2_parameters_weight_ - - q: "f32[8, 16, 96]" = torch._C._nn.linear(l_x_, l_self_modules_wq_parameters_weight_, None); l_self_modules_wq_parameters_weight_ = None - - k: "f32[8, 16, 96]" = torch._C._nn.linear(l_x_, l_self_modules_wk_parameters_weight_, None); l_self_modules_wk_parameters_weight_ = None - - v: "f32[8, 16, 96]" = torch._C._nn.linear(l_x_, l_self_modules_wv_parameters_weight_, None); l_self_modules_wv_parameters_weight_ = None - - unflatten: "f32[8, 16, 16, 6]" = q.unflatten(-1, (16, -1)); q = None - q_1: "f32[8, 16, 16, 6]" = unflatten.permute(0, 2, 1, 3); unflatten = None - - unflatten_1: "f32[8, 16, 16, 6]" = k.unflatten(-1, (16, -1)); k = None - k_1: "f32[8, 16, 16, 6]" = unflatten_1.permute(0, 2, 1, 3); unflatten_1 = None - - unflatten_2: "f32[8, 16, 16, 6]" = v.unflatten(-1, (16, -1)); v = None - v_1: "f32[8, 16, 16, 6]" = unflatten_2.permute(0, 2, 1, 3); unflatten_2 = None - - subgraph_0 = self.subgraph_0 - local_map_hop = torch.ops.higher_order.local_map_hop(subgraph_0, q_1, k_1, v_1); subgraph_0 = q_1 = k_1 = v_1 = None - o: "f32[8, 16, 16, 6]" = local_map_hop[0]; local_map_hop = None - - permute_3: "f32[8, 16, 16, 6]" = o.permute(0, 2, 1, 3); o = None - o_1: "f32[8, 16, 96]" = permute_3.flatten(-2); permute_3 = None - - o_2: "f32[8, 16, 96]" = torch._C._nn.linear(o_1, l_self_modules_wo_parameters_weight_, None); o_1 = l_self_modules_wo_parameters_weight_ = None - - o0: "f32[8, 16, 96]" = o_2 + l_x_; o_2 = l_x_ = None - - o_3: "f32[8, 16, 384]" = torch._C._nn.linear(o0, l_self_modules_w1_parameters_weight_, None); l_self_modules_w1_parameters_weight_ = None - - o_4: "f32[8, 16, 384]" = torch.nn.functional.relu(o_3); o_3 = None - - o_5: "f32[8, 16, 96]" = torch._C._nn.linear(o_4, l_self_modules_w2_parameters_weight_, None); o_4 = l_self_modules_w2_parameters_weight_ = None - - o_6: "f32[8, 16, 96]" = o0 + o_5; o0 = o_5 = None - return (o_6,) - - class subgraph_0(torch.nn.Module): - def forward(self, q_1: "f32[8, 16, 16, 6]", k_1: "f32[8, 16, 16, 6]", v_1: "f32[8, 16, 16, 6]"): - out: "f32[8, 16, 16, 6]" = torch._C._nn.scaled_dot_product_attention(query = q_1, key = k_1, value = v_1, is_causal = False); q_1 = k_1 = v_1 = None - return (out,) -""", - ) - - self.assertEqual( - normalize_gm(backend.graphs[0].print_readable(print_output=False)), - normalize_gm(backend.graphs[1].print_readable(print_output=False)), - ) - - -if __name__ == "__main__": - run_tests() diff --git a/test/inductor/test_compiled_autograd.py b/test/inductor/test_compiled_autograd.py index e0cd8b99a6b3d..6014a6e698607 100644 --- a/test/inductor/test_compiled_autograd.py +++ b/test/inductor/test_compiled_autograd.py @@ -5354,7 +5354,7 @@ def wrap_test_class(orig_cls): test_dtensor.TestDTensorCompile ) -xfail_hops = {"local_map_hop"} +xfail_hops = {} class TestCompiledAutogradOpInfo(TestCase): diff --git a/torch/_dynamo/variables/builder.py b/torch/_dynamo/variables/builder.py index 660042b33b875..20b88759ef324 100644 --- a/torch/_dynamo/variables/builder.py +++ b/torch/_dynamo/variables/builder.py @@ -206,10 +206,7 @@ UserMethodVariable, WrapperUserFunctionVariable, ) -from .higher_order_ops import ( - LocalMapWrappedHigherOrderVariable, - TorchHigherOrderOperatorVariable, -) +from .higher_order_ops import TorchHigherOrderOperatorVariable from .iter import ItertoolsVariable from .lazy import LazyVariableTracker from .lists import ( @@ -853,8 +850,6 @@ def build_key_value(i, k, v): return build_checkpoint_variable(source=self.source) elif is_invoke_subgraph(value): return build_invoke_subgraph_variable(source=self.source) - elif LocalMapWrappedHigherOrderVariable.should_wrap_in_hop(value): - return LocalMapWrappedHigherOrderVariable.build(source=self.source) elif isinstance(value, functools.partial): func_src = AttrSource(self.get_source(), "func") func_obj = VariableBuilder(self.tx, func_src)(value.func) diff --git a/torch/_dynamo/variables/higher_order_ops.py b/torch/_dynamo/variables/higher_order_ops.py index 7c20578b51bf7..5ac883c7d3932 100644 --- a/torch/_dynamo/variables/higher_order_ops.py +++ b/torch/_dynamo/variables/higher_order_ops.py @@ -3383,7 +3383,6 @@ def _call_function( lambda a: a.node.meta["example_value"], body_r.as_proxy(), ) - p_kwargs = {key: value.as_proxy() for key, value in kwargs.items()} return _call_function_and_unflatten_output( tx, self.value, p_args, p_kwargs, flat_example_value, treespec @@ -3498,115 +3497,6 @@ def _call_function( ) -class LocalMapWrappedHigherOrderVariable(WrapHigherOrderVariable): - supports_input_mutation = False - supports_aliasing = False - - # Subclasses aren't supported by speculate_subgraph yet - # So this HOP is only usable with plain tensors - _enabled = False - - @classmethod - @contextlib.contextmanager - def enable(cls): - """Context manager to temporarily enable local map wrapping. - Will be removed when speculate_subgraph supports subclass inputs: - https://github.com/pytorch/pytorch/issues/161456. - - Usage: - with LocalMapWrappedHigherOrderVariable.enable_wrapping(): - # Code where should_wrap_in_hop will return True - pass - """ - old_value = cls._enabled - cls._enabled = True - try: - yield - finally: - cls._enabled = old_value - - @classmethod - def should_wrap_in_hop(cls, value): - if not torch.distributed.is_available(): - return False - - from torch.distributed.tensor.experimental._func_map import _local_map_wrapped - - # check is important to avoid subclass dispatch - if type(value) != type(_local_map_wrapped): - return False - - return value == _local_map_wrapped and cls._enabled - - @staticmethod - def build(**options): - return TorchHigherOrderOperatorVariable.make( - torch._higher_order_ops.local_map_hop, - **options, - ) - - def python_type(self): - return type(self.value) - - def _call_function( - self, - tx: "InstructionTranslator", - args: "list[VariableTracker]", - kwargs: "dict[str, VariableTracker]", - ) -> "VariableTracker": - """ - Goal of this function is to rewrite local_map usage as a HOP: - local_map(func, ...) -> local_map_hop(gm, ...) - """ - - ( - user_func, - out_placements, - in_placements, - in_grad_placements, - device_mesh, - redistribute_inputs, - *user_args, - ) = args - - ( - p_args, - p_kwargs, - example_value, - body_r, - treespec, - body_gmod, - body_name, - ) = self.create_wrapped_node( - tx, user_func, user_args, kwargs, self.value._name, subgraph_name="subgraph" - ) - - # Treat as const, so we don't have to deal with Placement types in fx IR - # Guarded with EQUALS_MATCH on local_map call's arguments - body_gmod.meta["local_map_kwargs"] = { - "out_placements": out_placements.value, - "in_placements": in_placements.value, - "redistribute_inputs": redistribute_inputs.value, - "in_grad_placements": in_grad_placements.value, - "device_mesh": device_mesh.value, - } - - assert len(p_kwargs) == 0 - - flat_example_value = pytree.tree_map_only( - torch.fx.Proxy, - lambda a: a.node.meta["example_value"], - body_r.as_proxy(), - ) - - p_kwargs = {key: value.as_proxy() for key, value in kwargs.items()} - out = _call_function_and_unflatten_output( - tx, self.value, p_args, p_kwargs, flat_example_value, treespec - ) - - return out - - # Map operator names to their corresponding variable for fast TorchHigherOrderOperatorVariable.make() _hop_name_to_variable_class = { "cond": CondHigherOrderVariable, @@ -3635,5 +3525,4 @@ def _call_function( "auto_functionalized_v2": AutoFunctionalizeHigherOrderVariable, "invoke_subgraph": InvokeSubgraphHigherOrderVariable, "custom_function_call": CustomFunctionHigherOrderOperatorVariable, - "local_map_hop": LocalMapWrappedHigherOrderVariable, } diff --git a/torch/_higher_order_ops/__init__.py b/torch/_higher_order_ops/__init__.py index 516d58bdf314e..e809c729dc424 100644 --- a/torch/_higher_order_ops/__init__.py +++ b/torch/_higher_order_ops/__init__.py @@ -21,7 +21,6 @@ from torch._higher_order_ops.foreach_map import _foreach_map, foreach_map from torch._higher_order_ops.hints_wrap import hints_wrapper from torch._higher_order_ops.invoke_subgraph import invoke_subgraph -from torch._higher_order_ops.local_map import local_map_hop from torch._higher_order_ops.map import map from torch._higher_order_ops.out_dtype import out_dtype from torch._higher_order_ops.run_const_graph import run_const_graph @@ -74,5 +73,4 @@ "aoti_call_delegate", "map", "while_loop_stack_output", - "local_map_hop", ] diff --git a/torch/_higher_order_ops/local_map.py b/torch/_higher_order_ops/local_map.py deleted file mode 100644 index 22cb2af50f1f7..0000000000000 --- a/torch/_higher_order_ops/local_map.py +++ /dev/null @@ -1,327 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. -# -# This source code is licensed under the BSD license found in the -# LICENSE file in the root directory of this source tree. - -# NOTE: this file may be removed once we move to a dynamo frontend - -import functools -from collections.abc import Generator -from contextlib import contextmanager -from typing import Any, Callable, Optional - -import torch -import torch.utils._pytree as pytree -from torch._C import DispatchKey -from torch._higher_order_ops.utils import ( - clone_outputs_aliasing_inputs, - save_tensors_and_symints_for_backward, - saved_tensors_and_symints, -) -from torch._ops import HigherOrderOperator -from torch._subclasses.fake_tensor import FakeTensorMode -from torch.fx import GraphModule -from torch.fx.experimental.proxy_tensor import ProxyTorchDispatchMode, track_tensor_tree - - -# Proxy the HOP instead of inlining into it -_DEFER_INLINING = False - - -@contextmanager -def defer_inlining() -> Generator[None, None, None]: - global _DEFER_INLINING - prior = _DEFER_INLINING - try: - _DEFER_INLINING = True - yield - finally: - _DEFER_INLINING = prior - - -class LocalMapHOP(HigherOrderOperator): - def __init__(self) -> None: - super().__init__("local_map_hop") - - def __call__(self, fw_gm: GraphModule, *args: Any, **kwargs: Any) -> Any: - return super().__call__(fw_gm, *args, **kwargs) - - -local_map_hop = LocalMapHOP() - - -def create_hop_fw_bw( - fw_gm: GraphModule, - *_args: Any, -) -> tuple[GraphModule, GraphModule, int, int, set[int]]: - """ - Traces a joint, applies passes and partitions it - """ - # Keeping these imports here - # Avoid circular dependencies once we upstream with dynamo frontend - from torch._dispatch.python import suspend_functionalization - from torch._functorch.aot_autograd import AOTConfig, create_joint - from torch._guards import detect_fake_mode - from torch._subclasses.fake_tensor import FakeTensor, FakeTensorMode - from torch._subclasses.functional_tensor import disable_functional_mode - from torch.fx.experimental.proxy_tensor import disable_proxy_modes_tracing, make_fx - - dummy_aot_config = AOTConfig( - fw_compiler=None, # type: ignore[arg-type] - bw_compiler=None, # type: ignore[arg-type] - partition_fn=None, # type: ignore[arg-type] - decompositions={}, - num_params_buffers=0, - aot_id=0, - keep_inference_input_mutations=False, - ) - - with suspend_functionalization(), disable_functional_mode(): - with disable_proxy_modes_tracing(): - # create a tensor (fake) from a compiler wrapped FunctionalTensor - def _from_fun(t: Any) -> Any: - if isinstance(t, torch.Tensor): - return torch.empty_strided( - t.size(), - t.stride(), - device=t.device, - dtype=t.dtype, - requires_grad=t.requires_grad, - ) - return t - - # If someone runs this hop under the default compiler backend ("eager") - # Then this path will be run with the actual user inputs. We convert them - # to fake tensors in order to not perform any actual compute. - - fake_mode = detect_fake_mode(_args) - if fake_mode is None: - fake_mode = FakeTensorMode(allow_non_fake_inputs=True) - - with fake_mode: - fw_inputs = pytree.tree_map(_from_fun, _args) - - assert all( - isinstance(t, (FakeTensor, int, torch.SymInt)) for t in fw_inputs - ), f"Unexpected element in {fw_inputs=}" - - example_grads = pytree.tree_map( - _from_fun, - fw_gm(*fw_inputs), - ) - if not isinstance(example_grads, (list, tuple)): - example_grads = [example_grads] - - num_fw_inputs = len(fw_inputs) - num_fw_outputs = len(example_grads) - - def joint_f( - *primals_and_tangents: list[torch.Tensor], - ) -> Any: - primals = primals_and_tangents[:num_fw_inputs] - tangents = primals_and_tangents[num_fw_inputs:] - - def prepare_fw_with_masks(fn: Callable[..., Any]) -> Callable[..., Any]: - def fw_with_masks(*args: Any) -> tuple[tuple[Any], list[bool]]: - fw_out = fn(*args) - assert isinstance(fw_out, tuple), ( - "Dynamo traced submodule should return tuple" - ) - return fw_out, [ - True - if isinstance(ret, torch.Tensor) and ret.requires_grad - else False - for ret in fw_out - ] - - return fw_with_masks - - fw_outs, grads = create_joint( - prepare_fw_with_masks(fw_gm), aot_config=dummy_aot_config - )(primals, tangents) - - maybe_clone = clone_outputs_aliasing_inputs(primals_and_tangents) - # put grads first to work with existing hop utils - return pytree.tree_map(maybe_clone, (*grads, *fw_outs)) - - filtered_grads_idx = set() - for i, example_grad in enumerate(example_grads): - # Filter out grads that are None or do not require_grad. - # The AOTAutograd utils we rely on force this assumption. - # We must also filter the runtime tangents too. - if example_grad is not None and ( - isinstance(example_grad, torch.Tensor) and example_grad.requires_grad - ): - filtered_grads_idx.add(i) - - primals_and_tangents = [ - *fw_inputs, - *[example_grads[i] for i in filtered_grads_idx], - ] - joint_hop_gm = make_fx(joint_f)(*primals_and_tangents) - - from torch._functorch._aot_autograd.graph_compile import prepare_for_partitioner - from torch._inductor.compile_fx import partition_fn - - # Match partitioner convention - prepped_joint_hop_gm = prepare_for_partitioner( - joint_hop_gm, num_fw_inputs, num_fw_outputs - ) - # Also runs joint passes - new_fw_gm, new_bw_gm = partition_fn( - prepped_joint_hop_gm, - [], - num_fwd_outputs=num_fw_outputs, - static_lifetime_input_indices=[], - ) - - # Propagate meta onto fw/bw graphs, later will be set on proxied nodes - local_map_kwargs = fw_gm.meta["local_map_kwargs"] # type: ignore[attr-defined] - - new_fw_gm.meta["local_map_kwargs"] = local_map_kwargs - new_bw_gm.meta["local_map_kwargs"] = {**local_map_kwargs} - # Okay because Autoparallel assumes same sharding between param and grads - new_bw_gm.meta["local_map_kwargs"]["in_placements"] = local_map_kwargs[ - "out_placements" - ] - new_bw_gm.meta["local_map_kwargs"]["out_placements"] = local_map_kwargs[ - "in_placements" - ] - - return new_fw_gm, new_bw_gm, num_fw_inputs, num_fw_outputs, filtered_grads_idx - - -class LocalMapAutogradOp(torch.autograd.Function): - @staticmethod - def forward( - ctx: Any, - fw_gm: GraphModule, - bw_gm: GraphModule, - num_fw_ins: int, - num_fw_outs: int, - filtered_grads_idx: set[int], - *args: Any, - **kwargs: Any, - ) -> tuple[Optional[torch.Tensor], ...]: - ctx.bw_gm = bw_gm - ctx.num_fw_ins = num_fw_ins - ctx.filtered_grads_idx = filtered_grads_idx - - with torch._C._AutoDispatchBelowAutograd(): - fw_outs_with_saved_activations = local_map_hop(fw_gm, *args, **kwargs) - - fw_outs = fw_outs_with_saved_activations[:num_fw_outs] - saved_activations = fw_outs_with_saved_activations[num_fw_outs:] - save_tensors_and_symints_for_backward(ctx, saved_activations) - - return fw_outs - - @staticmethod - def backward( - ctx: Any, *_grads: tuple[torch.Tensor] - ) -> tuple[Optional[torch.Tensor], ...]: - saved_activations = saved_tensors_and_symints(ctx) - with torch._C._AutoDispatchBelowAutograd(): - # Filter out grads that are None or do not require_grad. - # The AOTAutograd utils we rely on force this assumption. - grads = [_grads[i] for i in ctx.filtered_grads_idx] - grad_ins = local_map_hop(ctx.bw_gm, *saved_activations, *grads) - if len(grad_ins) != ctx.num_fw_ins: - raise RuntimeError( - f"Expected {ctx.num_fw_ins} grad_ins, got {len(grad_ins)}" - ) - return None, None, None, None, None, *grad_ins - - -@local_map_hop.py_impl(torch._C.DispatchKey.Autograd) -def autograd_key( - fw_gm: GraphModule, - *args: Any, - **kwargs: Any, -) -> Any: - if _DEFER_INLINING: - fw_gm, bw_gm, num_fw_ins, num_fw_outs, filtered_grads_idx = create_hop_fw_bw( - fw_gm, *args - ) - return LocalMapAutogradOp.apply( - fw_gm, bw_gm, num_fw_ins, num_fw_outs, filtered_grads_idx, *args, **kwargs - ) - - return fw_gm(*args, **kwargs) - - -@local_map_hop.py_functionalize_impl -def functional_mode_key( - ctx: Any, fw_gm: GraphModule, *args: Any, **kwargs: Any -) -> tuple[torch.Tensor]: - assert not kwargs - - unwrapped_inputs = ctx.unwrap_tensors(args) - with ctx.redispatch_to_next(): - out = local_map_hop(fw_gm, *unwrapped_inputs) - return ctx.wrap_tensors(out) - - -@local_map_hop.py_impl(FakeTensorMode) -def fake_mode_key( - mode: FakeTensorMode, - fw_gm: GraphModule, - *args: Any, - **kwargs: Any, -) -> tuple[torch.Tensor]: - with mode: - return fw_gm(*args, **kwargs) - - -def proxy_mode_key_common( - call_hop: Callable[..., Any], - proxy_mode: ProxyTorchDispatchMode, - gm: GraphModule, - *args: Any, - **kwargs: Any, -) -> tuple[torch.Tensor]: - assert proxy_mode is not None, ( - "Mode should always be enabled for python fallback key" - ) - assert len(kwargs) == 0 - - example_out = call_hop(*args, **kwargs) - proxy_args = pytree.tree_map(proxy_mode.tracer.unwrap_proxy, args) # type: ignore[union-attr] - - out_proxy = proxy_mode.tracer.create_proxy( - "call_function", call_hop, proxy_args, {} - ) - - # extract local_map args, post-dispatch operates on GraphModules - assert gm.meta["local_map_kwargs"] - local_map_kwargs = gm.meta["local_map_kwargs"] - - # propagate local_map args to the call_function node - out_proxy.node.meta["local_map_kwargs"] = local_map_kwargs - return track_tensor_tree( - example_out, out_proxy, constant=None, tracer=proxy_mode.tracer - ) - - -@local_map_hop.py_impl(ProxyTorchDispatchMode) -def proxy_mode_key( - proxy_mode: ProxyTorchDispatchMode, - fw_gm: GraphModule, - *args: Any, - **kwargs: Any, -) -> tuple[torch.Tensor]: - # TODO: get rid of this when we can install as a subgraph - def call_local_map(*_args: Any, **_kwargs: Any) -> Any: - return functools.partial(local_map_hop, fw_gm)(*_args, **_kwargs) - - return proxy_mode_key_common(call_local_map, proxy_mode, fw_gm, *args, **kwargs) - - -# Running HOP in eager with real tensors -@local_map_hop.py_impl(DispatchKey.CompositeExplicitAutograd) -def real_impl( - fw_gm: GraphModule, - *args: Any, - **kwargs: Any, -) -> tuple[torch.Tensor]: - return fw_gm(*args, **kwargs) diff --git a/torch/distributed/tensor/_ops/_view_ops.py b/torch/distributed/tensor/_ops/_view_ops.py index 80a0491f694cc..62e8c68e9be9d 100644 --- a/torch/distributed/tensor/_ops/_view_ops.py +++ b/torch/distributed/tensor/_ops/_view_ops.py @@ -490,9 +490,7 @@ def propagate_shape_and_sharding( - An output dimension that is a split of the input dimension can only be sharded if the leftmost split size is divisible by the mesh dimension """ - assert len(input_src_placements) == len(mesh_sizes), ( - f"{input_src_placements} != {mesh_sizes}" - ) + assert len(input_src_placements) == len(mesh_sizes) # for each input dim, for each mesh dim, provides a list of possible shardable dimensions mesh_ndim = len(mesh_sizes) shardable_dims: dict[int, list[bool]] = {} diff --git a/torch/testing/_internal/hop_db.py b/torch/testing/_internal/hop_db.py index a927bbaa42f4d..2a0883408892f 100644 --- a/torch/testing/_internal/hop_db.py +++ b/torch/testing/_internal/hop_db.py @@ -212,31 +212,6 @@ def body_fn(iter_t, x): return torch._higher_order_ops.while_loop_stack_output(cond_fn, body_fn, (iter_t, x), tuple()) -def sample_inputs_local_map_hop(opinfo, device, dtype, requires_grad, **kwargs): - # TODO: once HOPs support DTensor inputs, we should also test DTensors - make_arg = functools.partial( - make_tensor, device=device, dtype=dtype, requires_grad=False - ) - yield SampleInput( - make_arg(2, 3, 4, low=0.1, high=2), - make_arg(2, 3, 4, low=0.1, high=2), - ) - - -def simple_local_map_hop(inp1, inp2): - def body_gm(inp1, inp2): - return inp1.cos() + inp2.sin() - gm = torch.fx.symbolic_trace(body_gm) - - assert torch.distributed.is_available() - from torch.distributed.tensor.placement_types import Replicate - gm.meta["local_map_kwargs"] = { - "in_placements": (Replicate(), Replicate(), Replicate()), - "out_placements": ((Replicate(), Replicate(), Replicate()),) - } - - return torch._higher_order_ops.local_map_hop(gm, inp1, inp2) - def sample_inputs_scan(opinfo, device, dtype, requires_grad, **kwargs): make_arg = functools.partial( make_tensor, device=device, dtype=dtype, requires_grad=requires_grad @@ -476,17 +451,4 @@ def fn(x): ), decorators=[onlyCUDA], ), - OpInfo( - name="local_map_hop", - variant_test_name="simple", - op=simple_local_map_hop, - sample_inputs_func=sample_inputs_local_map_hop, - dtypes=custom_types(torch.float16, torch.float32), - supports_out=False, - check_batched_grad=False, - check_batched_gradgrad=False, - check_batched_forward_grad=False, - check_inplace_batched_forward_grad=False, - decorators=[onlyCUDA, unittest.skipIf(not torch.distributed.is_available(), "requires distributed build")], - ), ] From 3ee071aa85beac542f3b8caa4696546073233001 Mon Sep 17 00:00:00 2001 From: James Wu Date: Mon, 15 Sep 2025 21:00:37 -0700 Subject: [PATCH 301/693] Allow aot_module_simplified to return a serializable output (#162527) This PR refactors AOTAutograd slightly: - It adds `simple_wraps` to various wrappers so that the reference to inner functions is stored in the output of AOTAutograd. - It saves a `serialize()` method on the result of `aot_stage2`, in the event of an eager backward compile. I discussed the lazy backward case with @bdhirsh, and we agreed that serialization in that case would probably use a different, more AOT API anyway, so we do not implement a serialize function for the lazy backward case. AOT precompile, at least initially, will always eagerly compile the backward. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162527 Approved by: https://github.com/zhxchen17 ghstack dependencies: #162171 --- test/dynamo/test_aot_compile.py | 34 +++++++++- torch/_dynamo/aot_compile.py | 45 ++++--------- .../_aot_autograd/autograd_cache.py | 64 ++++++++++++------- .../_functorch/_aot_autograd/graph_compile.py | 18 +++++- .../_aot_autograd/runtime_wrappers.py | 32 +++++++++- torch/_functorch/_aot_autograd/utils.py | 1 + torch/_functorch/aot_autograd.py | 13 ++++ 7 files changed, 147 insertions(+), 60 deletions(-) diff --git a/test/dynamo/test_aot_compile.py b/test/dynamo/test_aot_compile.py index 9c72c86fef5c1..194bd2093c1f2 100644 --- a/test/dynamo/test_aot_compile.py +++ b/test/dynamo/test_aot_compile.py @@ -10,7 +10,7 @@ import torch._inductor.test_case import torch.onnx.operators import torch.utils.cpp_extension -from torch._dynamo.aot_compile import ModelInput +from torch._dynamo.aot_compile import ModelInput, SerializableCallable from torch._dynamo.exc import PackageError, Unsupported from torch._dynamo.package import DynamoCache from torch._dynamo.precompile_context import PrecompileContext @@ -307,6 +307,38 @@ def eval_mode(model): model.train() expected.sum().backward() + def test_aot_module_simplified_serializable_autograd(self): + mod = SimpleLinearModule() + compiled_fn: SerializableCallable = torch.compile( + mod, fullgraph=True, backend="inductor" + ).forward.aot_compile(((torch.randn(3, 3),), {})) + backend_result = compiled_fn._artifacts.compiled_fn + self.assertTrue( + isinstance( + backend_result, + torch._dynamo.aot_compile.BundledAOTAutogradSerializableCallable, + ) + ) + assert hasattr(backend_result.compiled_fn, "serialize") + self.assertIsNotNone(backend_result.compiled_fn.serialize) + + def test_aot_module_simplified_serializable_inference(self): + def fn(x): + return x.sin() + + compiled_fn: SerializableCallable = torch.compile( + fn, fullgraph=True, backend="inductor" + ).aot_compile(((torch.randn(3, 3),), {})) + backend_result = compiled_fn._artifacts.compiled_fn + self.assertTrue( + isinstance( + backend_result, + torch._dynamo.aot_compile.BundledAOTAutogradSerializableCallable, + ) + ) + assert hasattr(backend_result.compiled_fn, "serialize") + self.assertIsNotNone(backend_result.compiled_fn.serialize) + if __name__ == "__main__": from torch._dynamo.test_case import run_tests diff --git a/torch/_dynamo/aot_compile.py b/torch/_dynamo/aot_compile.py index c93f9cc397f03..9f668005bce20 100644 --- a/torch/_dynamo/aot_compile.py +++ b/torch/_dynamo/aot_compile.py @@ -13,7 +13,7 @@ import torch import torch.fx from torch._dynamo.graph_utils import _graph_uses_non_cpu -from torch._dynamo.precompile_context import PrecompileContext, SystemInfo +from torch._dynamo.precompile_context import SystemInfo from . import convert_frame from .hooks import Hooks @@ -145,14 +145,13 @@ class BundledAOTAutogradSerializableCallable(SerializableCallable): We'll do that refactor in a later PR. """ - def __init__(self, artifact: Any) -> None: + def __init__(self, compiled_fn: Any) -> None: """ Takes in a BundledAOTAutogradCacheArtifact, which is the serialized form of a compiled function generated by AOTAutograd. """ - - self.compiled_fn = artifact.after_deserialization() - self.data = artifact.content + assert hasattr(compiled_fn, "serialize") + self.compiled_fn = compiled_fn def __getattr__(self, attr: Any) -> Any: if hasattr(self, attr): @@ -160,35 +159,22 @@ def __getattr__(self, attr: Any) -> Any: else: return getattr(self.compiled_fn, attr) - @classmethod - def from_backend_id( - cls, backend_id: str - ) -> "BundledAOTAutogradSerializableCallable": - """ - Takes in a backend_id, and returns a BundledAOTAutogradSerializableCallable - that wraps around the compiled function generated by AOTAutograd. - """ - artifact = PrecompileContext.serialize_artifact_by_key(backend_id) - if artifact is None: - raise RuntimeError("No artifact found for backend_id: " + backend_id) - return cls(artifact) - @classmethod def serialize_compile_artifacts( cls, fn: "BundledAOTAutogradSerializableCallable" ) -> bytes: - return fn.data + with torch._functorch.config.patch("bundled_autograd_cache", True): + result = pickle.dumps(fn.compiled_fn.serialize()) + return result @classmethod def deserialize_compile_artifacts(cls, data: bytes) -> Any: from torch._functorch._aot_autograd.autograd_cache import ( - BundledAOTAutogradCacheArtifact, + deserialize_bundled_cache_entry, ) - # The key in the artifact is not important here since we're not populating a cache, - # we just want to grab the callable back out of the serialized entry - artifact = BundledAOTAutogradCacheArtifact("", data) - return cls(artifact) + compiled_fn = deserialize_bundled_cache_entry(data) + return cls(compiled_fn) def __call__(self, *args: Any, **kwargs: Any) -> Any: return self.compiled_fn(*args, **kwargs) @@ -284,13 +270,10 @@ def new_guard_filter_fn( compiled_fn = backend( backend_input.graph_module, backend_input.example_inputs ) - - # If Inductor backend is used, grab the compiled_fn from PrecompileContext - # TODO: this should be replaced once we make the backend return the SerializableCallable directly. - if isinstance(backend, torch._TorchCompileInductorWrapper): - compiled_fn = BundledAOTAutogradSerializableCallable.from_backend_id( - backend_input.backend_id - ) + # If Inductor backend is used, grab the compiled_fn from PrecompileContext + # TODO: this should be replaced once we make the backend return the SerializableCallable directly. + if isinstance(backend, torch._TorchCompileInductorWrapper): + compiled_fn = BundledAOTAutogradSerializableCallable(compiled_fn) if not isinstance(compiled_fn, SerializableCallable): if hasattr(backend, "compiler_fn"): diff --git a/torch/_functorch/_aot_autograd/autograd_cache.py b/torch/_functorch/_aot_autograd/autograd_cache.py index 2ac1e0d34d088..17c925eeaf9bd 100644 --- a/torch/_functorch/_aot_autograd/autograd_cache.py +++ b/torch/_functorch/_aot_autograd/autograd_cache.py @@ -71,9 +71,11 @@ FunctionalizedRngRuntimeWrapper, post_compile, RuntimeWrapper, + SerializableCompiledFunction, SubclassMeta, ) from .schemas import AOTAutogradCacheInfo, AOTConfig, ViewAndMutationMeta # noqa: F401 +from .utils import simple_wraps if TYPE_CHECKING: @@ -963,6 +965,7 @@ def wrap_post_compile( fw_metadata=self.runtime_metadata, try_save_cache_entry=None, ) + else: compiled_function = RuntimeWrapper( indices_of_inps_to_detach=self.indices_of_inps_to_detach, @@ -972,6 +975,11 @@ def wrap_post_compile( compiled_fw_func, aot_config, runtime_metadata=self.runtime_metadata ) + # Add serialization function back onto object + compiled_function = SerializableCompiledFunction( + compiled_function, lambda: self + ) + compiled_function, _ = post_compile( self.dispatch_wrappers, compiled_function, @@ -1051,6 +1059,37 @@ def type(): return "aot_autograd" +def deserialize_bundled_cache_entry(data: bytes) -> Callable: + entry = pickle.loads(data) + # In the precompile use case, guards are already serialized + # by dynamo, so we don't need to add them to the environment + entry.guards_expr = None + # TODO: this isn't exactly right, because cudagraphs needs to be a shared config + # which is set by compile_fx. But in precompile, we never actually call compile_fx + # so we don't have a place to track cudagraphs here. + cudagraphs = torch._inductor.config.triton.cudagraphs + boxed_forward_device_index = BoxedDeviceIndex(None) + compiled_fn = entry.wrap_post_compile( + [], + entry.sanitized_aot_config, + { + "cudagraphs": cudagraphs, + "boxed_forward_device_index": boxed_forward_device_index, + }, + ) + + # TODO: this ignores flat_params, which can exist + # if inline_builtin_nn_modules=False + @simple_wraps(compiled_fn) + def forward(*runtime_args: tuple[Any]): + return compiled_fn(list(runtime_args)) + + assert hasattr(compiled_fn, "serialize") + forward.serialize = compiled_fn.serialize # type: ignore[attr-defined] + + return forward + + @CacheArtifactFactory.register class BundledAOTAutogradCacheArtifact(PrecompileCacheArtifact[Callable]): @override @@ -1060,30 +1099,7 @@ def type(): @override def after_deserialization(self) -> Callable: - entry = pickle.loads(self.content) - # In the precompile use case, guards are already serialized - # by dynamo, so we don't need to add them to the environment - entry.guards_expr = None - # TODO: this isn't exactly right, because cudagraphs needs to be a shared config - # which is set by compile_fx. But in precompile, we never actually call compile_fx - # so we don't have a place to track cudagraphs here. - cudagraphs = torch._inductor.config.triton.cudagraphs - boxed_forward_device_index = BoxedDeviceIndex(None) - compiled_fn = entry.wrap_post_compile( - [], - entry.sanitized_aot_config, - { - "cudagraphs": cudagraphs, - "boxed_forward_device_index": boxed_forward_device_index, - }, - ) - - # TODO: this ignores flat_params, which can exist - # if inline_builtin_nn_modules=False - def forward(*runtime_args: tuple[Any]): - return compiled_fn(list(runtime_args)) - - return forward + return deserialize_bundled_cache_entry(self.content) class AOTAutogradCache(GuardedCache[GenericAOTAutogradCacheEntry]): diff --git a/torch/_functorch/_aot_autograd/graph_compile.py b/torch/_functorch/_aot_autograd/graph_compile.py index d02d29cba199b..2ae1263c3ae9a 100644 --- a/torch/_functorch/_aot_autograd/graph_compile.py +++ b/torch/_functorch/_aot_autograd/graph_compile.py @@ -51,6 +51,7 @@ from .. import config from .autograd_cache import ( AOTAutogradCache, + GenericAOTAutogradCacheEntry, serialize_graph_module, should_bundle_autograd_cache, should_use_remote_autograd_cache, @@ -73,6 +74,7 @@ post_compile, pre_compile, RuntimeWrapper, + SerializableCompiledFunction, ) from .schemas import ( AOTConfig, @@ -363,6 +365,7 @@ def should_save_cache(): AOTAutogradCache.save( cache_info.cache_key, entry, remote=should_use_remote_autograd_cache() ) + compiled_fw = SerializableCompiledFunction(compiled_fw, lambda: entry) compiled_fw = fakified_out_wrapper.post_compile( compiled_fw, @@ -1315,7 +1318,8 @@ def _log_structured_logs(): def aot_stage2_autograd( - aot_state: AOTState, aot_graph_capture: AOTGraphCapture + aot_state: AOTState, + aot_graph_capture: AOTGraphCapture, ) -> DispatchReturn: """ Autograd logic. Generates a joint graph, partitions it, manipulates the input with various wrappers, @@ -1832,6 +1836,7 @@ def aot_stage2_autograd( make_runtime_safe(fw_metadata, maybe_subclass_meta) try_save_cache_entry: Optional[Callable] = None + entry: Optional[GenericAOTAutogradCacheEntry] = None if aot_config.cache_info is not None: forward_time_taken_ns = time.time_ns() - aot_config.cache_info.start_time_ns @@ -1844,7 +1849,7 @@ def try_save_cache_entry( # noqa: F811 bw_module: torch.fx.GraphModule, _fw_metadata: ViewAndMutationMeta, aot_config: AOTConfig, - ): + ) -> Optional[GenericAOTAutogradCacheEntry]: cache_info = aot_config.cache_info def should_save_cache(): @@ -1891,10 +1896,14 @@ def should_save_cache(): ) remote = should_use_remote_autograd_cache() AOTAutogradCache.save(cache_info.cache_key, entry, remote) + return entry + return None if compiled_bw_func is not None: # If we already compiled the backward, we save its cache entry now - try_save_cache_entry(compiled_bw_func, bw_module, fw_metadata, aot_config) + entry = try_save_cache_entry( + compiled_bw_func, bw_module, fw_metadata, aot_config + ) try_save_cache_entry = None compiled_fn = AOTDispatchAutograd.post_compile( @@ -1911,6 +1920,9 @@ def should_save_cache(): try_save_cache_entry=try_save_cache_entry, ) + if entry is not None: + compiled_fn = SerializableCompiledFunction(compiled_fn, lambda: entry) + if config.debug_assert: flat_requires_grad: list[Optional[bool]] = [ a.requires_grad if isinstance(a, Tensor) else None for a in flat_args diff --git a/torch/_functorch/_aot_autograd/runtime_wrappers.py b/torch/_functorch/_aot_autograd/runtime_wrappers.py index f1cce86403209..5a5536913813c 100644 --- a/torch/_functorch/_aot_autograd/runtime_wrappers.py +++ b/torch/_functorch/_aot_autograd/runtime_wrappers.py @@ -11,6 +11,7 @@ import collections import contextlib import copy +import functools import itertools import pprint from contextlib import AbstractContextManager, nullcontext @@ -307,6 +308,7 @@ def record_runtime_wrapper_prologue_exit( if cm is not None: cm.__exit__(None, None, None) + @simple_wraps(compiled_fn) def runtime_wrapper(args: list[Any]): # Create context manager for profiler cm = record_runtime_wrapper_prologue_enter() @@ -465,6 +467,7 @@ def runtime_wrapper(args: list[Any]): return runtime_wrapper # Disabling saved tensors hooks + @simple_wraps(runtime_wrapper) def _runtime_wrapper(*args, **kwargs): with _disable_saved_tensors_hooks(): return runtime_wrapper(*args, **kwargs) @@ -1929,6 +1932,33 @@ def _disable_saved_tensors_hooks(): ) +@dataclass +class SerializableCompiledFunction: + """ + Represents a result of AOTDispatch after calling the inner compiler + that can be serialized + """ + + compiled_fn: Callable + serialize_fn: Callable + + def __init__(self, compiled_fn: Callable, serialize_fn: Callable): + self.compiled_fn = compiled_fn + self.serialize_fn = serialize_fn + # Equivalent to functools.wraps + functools.update_wrapper( + self, + compiled_fn, + assigned=("__doc__", "__annotations__", "__type_params__"), + ) + + def serialize(self) -> Any: + return self.serialize_fn() + + def __call__(self, *args, **kwargs): + return self.compiled_fn(*args, **kwargs) + + # This is wrapped in a class just for namespacing purposes # No need to make it into an actual CompilerWrapper because it doesn't fit the abstract as cleanly class AOTDispatchAutograd: @@ -2037,7 +2067,7 @@ def post_compile( aot_config: AOTConfig, *, fw_metadata: ViewAndMutationMeta, # runtime metadata - try_save_cache_entry: Optional[Callable], # Save cache entry after compilation + try_save_cache_entry: Optional[Callable], # Serialization function ): # For additional context see Note [CUDA Graph Safe RNG Functionalization] # Each pair forward, backward rng states must be equal prior to its invocation on any diff --git a/torch/_functorch/_aot_autograd/utils.py b/torch/_functorch/_aot_autograd/utils.py index f028b63b3a8c7..8f6c7d1478e22 100644 --- a/torch/_functorch/_aot_autograd/utils.py +++ b/torch/_functorch/_aot_autograd/utils.py @@ -99,6 +99,7 @@ def _get_autocast_states(): def make_boxed_func(f): + @simple_wraps(f) def g(args): return f(*args) diff --git a/torch/_functorch/aot_autograd.py b/torch/_functorch/aot_autograd.py index 3215472292346..2b0df0be370ab 100644 --- a/torch/_functorch/aot_autograd.py +++ b/torch/_functorch/aot_autograd.py @@ -106,6 +106,7 @@ from ._aot_autograd.runtime_wrappers import ( # noqa: F401 AOTDedupeWrapper, AOTSyntheticBaseWrapper, + SerializableCompiledFunction, ) from ._aot_autograd.schemas import ( # noqa: F401 AOTConfig, @@ -1111,6 +1112,7 @@ def aot_module_simplified( # the inputs so that they can be freed before the end of this scope. # For overhead reasons, this is not the default wrapper, see comment: # https://github.com/pytorch/pytorch/pull/122535/files#r1560096481 + @simple_wraps(compiled_fn) def forward(runtime_args: list[Any]): flat_args = [] flat_args.extend(params_buffers_flat) @@ -1124,6 +1126,7 @@ def forward(runtime_args: list[Any]): # historically returned a function that was not the boxed calling # convention. This should get fixed... # NB: GraphModule/nn.Module rely on the non-boxed calling convention here + @simple_wraps(compiled_fn) def forward(*runtime_args: tuple[Any]): full_args = [] full_args.extend(params_buffers_flat) @@ -1135,6 +1138,16 @@ def forward(*runtime_args: tuple[Any]): forward.named_parameters = mod.named_parameters forward.named_buffers = mod.named_buffers + # Add a serialize function + def grab_serialize_fn(fn): + if isinstance(fn, SerializableCompiledFunction): + return fn.serialize_fn + elif hasattr(fn, "__wrapped__"): + return grab_serialize_fn(fn.__wrapped__) + else: + return None + + forward.serialize = grab_serialize_fn(forward) # type: ignore[attr-defined] return forward From f638854e1da6b33e78dcc9f3e28c98c4cdce4e86 Mon Sep 17 00:00:00 2001 From: Prachi Gupta Date: Tue, 16 Sep 2025 15:35:35 +0000 Subject: [PATCH 302/693] [ROCm][SymmMem] re-enable UTs (#162811) After the UT suite moved to `MultiProcContinuousTest`, `skipIfRocm` decorator started failing rather than skipping UTs because now we spawn multiple threads before the skip decorator is taken into account and the skip decorator was raising an exception to exit the process. But, the parent process treated the child process exiting as a crash rather than a skip. Additionally, in `MultiProcContinuousTest`, if one UT fails all subsequent ones are also skipped which makes sense since there's one setup for the entire suite. However, this showed up as many failing/skipped UTs in the parity. I added multiprocess version of skip decorators for ROCm, including, `skip_if_rocm_arch_multiprocess` and `skip_if_rocm_ver_lessthan_multiprocess`. These are needed as symmetric memory feature is only supported on MI300 onwards and we need to skip them for other archs and some UTs only work after ROCm7.0. Fixes #161249 Fixes #161187 Fixes #161078 Fixes #160989 Fixes #160881 Fixes #160768 Fixes #160716 Fixes #160665 Fixes #160621 Fixes #160549 Fixes #160506 Fixes #160445 Fixes #160347 Fixes #160203 Fixes #160177 Fixes #160049 Fixes #159921 Fixes #159764 Fixes #159643 Fixes #159499 Fixes #159397 Fixes #159396 Fixes #159347 Fixes #159067 Fixes #159066 Fixes #158916 Fixes #158760 Fixes #158759 Fixes #158422 Fixes #158138 Fixes #158136 Fixes #158135 Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/162811 Approved by: https://github.com/jeffdaily --- test/distributed/test_symmetric_memory.py | 97 +++++++++++++------ torch/testing/_internal/common_distributed.py | 62 ++++++++++-- torch/testing/_internal/common_utils.py | 2 +- 3 files changed, 123 insertions(+), 38 deletions(-) diff --git a/test/distributed/test_symmetric_memory.py b/test/distributed/test_symmetric_memory.py index e512b37432fbb..d4fc073d1d6f3 100644 --- a/test/distributed/test_symmetric_memory.py +++ b/test/distributed/test_symmetric_memory.py @@ -27,18 +27,18 @@ from torch.testing._internal.common_distributed import ( MultiProcContinuousTest, MultiProcessTestCase, + PLATFORM_SUPPORTS_SYMM_MEM, requires_multicast_support, skip_if_lt_x_gpu, + skip_if_rocm_multiprocess, + skip_if_rocm_ver_lessthan_multiprocess, ) from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, - MI300_ARCH, parametrize, requires_cuda, requires_cuda_p2p_access, run_tests, - runOnRocmArch, - skipIfRocm, TEST_WITH_ROCM, TestCase, ) @@ -67,7 +67,9 @@ def test_has_multicast_support(self) -> None: self.assertFalse(_SymmetricMemory.has_multicast_support(DeviceType.CPU, 0)) # NOTE: DeviceType.CUDA is implicitly tested through @requires_multicast_support - @skipIfRocm + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(2) def test_get_backend(self) -> None: backend = symm_mem.get_backend(torch.device("cuda")) @@ -75,7 +77,7 @@ def test_get_backend(self) -> None: backend = symm_mem.get_backend("cuda") self.assertIsNotNone(backend) - @skipIfRocm + @skip_if_rocm_multiprocess @skip_if_lt_x_gpu(2) def test_cuda_nvlink_connectivity_detection(self) -> None: from torch._C._distributed_c10d import _detect_dma_connectivity @@ -87,12 +89,16 @@ def test_cuda_nvlink_connectivity_detection(self) -> None: for row in connectivity.matrix: self.assertEqual(len(row), torch.cuda.device_count()) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) def test_large_alloc(self) -> None: t = symm_mem.empty(2 * 1024**3, dtype=torch.uint8, device="cuda") self.assertEqual(t.numel() * t.element_size(), 2 * 1024**3) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(2) def test_get_signal_pad(self) -> None: self._init_process() @@ -133,7 +139,9 @@ def test_get_signal_pad(self) -> None: t.fill_(0) self.assertTrue(signal_pad.eq(42).all()) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @requires_cuda def test_allow_overlapping_devices(self) -> None: os.environ["TORCH_SYMM_MEM_ALLOW_OVERLAPPING_DEVICES"] = "1" @@ -152,7 +160,9 @@ def test_allow_overlapping_devices(self) -> None: os.environ["TORCH_SYMM_MEM_ALLOW_OVERLAPPING_DEVICES"] = "0" - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(2) @parametrize("symm_mem_input", [True, False]) def test_low_contention_all_gather(self, symm_mem_input: bool) -> None: @@ -177,7 +187,9 @@ def test_low_contention_all_gather(self, symm_mem_input: bool) -> None: for r in range(self.world_size): self.assertTrue(chunks[r].eq(r).all()) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(2) @parametrize("reduce_op", ["sum", "avg"]) @parametrize("symm_mem_input", [True, False]) @@ -213,7 +225,9 @@ def test_low_contention_reduce_scatter( raise AssertionError(f"Unexpected reduce_op: {reduce_op}") self.assertTrue(res.eq(expect).all()) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(4) def test_subgroup(self) -> None: self._init_process() @@ -270,7 +284,9 @@ def _init_process(self): torch.set_deterministic_debug_mode("warn") torch.utils.deterministic.fill_uninitialized_memory = True - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(2) @parametrize("gather_dim", [0, 1]) def test_fused_all_gather_matmul(self, gather_dim: int) -> None: @@ -300,7 +316,7 @@ def test_fused_all_gather_matmul(self, gather_dim: int) -> None: assert torch.allclose(mm_output_0, mm_output_1) assert mm_output_0.stride(), mm_output_1.stride() - @skipIfRocm # this requires async_input_mm support + @skip_if_rocm_multiprocess # this requires async_input_mm support @skipIf( not SM90OrLater, "_fused_all_gather_matmul_native currently only supports sm>=90", @@ -397,7 +413,9 @@ def test_multimem_all_gather_matmul(self) -> None: torch.testing.assert_close(ag_target, ag_baseline) torch.testing.assert_close(mm_target[0], mm_baseline[0]) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(2) @parametrize("gather_dim", [0, 1]) @parametrize( @@ -483,7 +501,9 @@ def test_fused_all_gather_scaled_matmul( self.assertEqual(mm_output_0.stride(), mm_output_1.stride()) self.assertEqual(mm_output_0.dtype, mm_output_1.dtype) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(2) @parametrize("scatter_dim", [0, 1]) def test_fused_matmul_reduce_scatter(self, scatter_dim: int) -> None: @@ -510,7 +530,7 @@ def test_fused_matmul_reduce_scatter(self, scatter_dim: int) -> None: assert torch.allclose(output_0, output_1) assert output_0.stride() == output_1.stride() - @skipIfRocm # AsyncTP support changed _fused_scaled_matmul_reduce_scatter_fallback API, need more changes + @skip_if_rocm_multiprocess # AsyncTP support changed _fused_scaled_matmul_reduce_scatter_fallback API, need more changes @skip_if_lt_x_gpu(2) @parametrize("scatter_dim", [0, 1]) @parametrize("rowwise", [True, False]) @@ -560,7 +580,9 @@ def test_fused_scaled_matmul_reduce_scatter( assert outputs[0].stride() == outputs[1].stride() self.assertEqual(outputs[0], outputs[1]) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @parametrize("dim", [0, 1, 2]) def test_optimal_layout(self, dim: int) -> None: t = torch.rand(8, 64, 32, 16) @@ -644,7 +666,9 @@ def _verify_symmetric_memory(self, symm_mem_hdl): symm_mem_hdl.barrier() - @skipIfRocm + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(2) @parametrize("set_device", [True, False]) def test_empty_strided_p2p(self, set_device: bool) -> None: @@ -663,7 +687,10 @@ def test_empty_strided_p2p(self, set_device: bool) -> None: del t self._verify_symmetric_memory(symm_mem_hdl) - @skipIfRocm # started failing during ROCm 6.4 CI upgrade + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) + @skip_if_rocm_ver_lessthan_multiprocess((7, 0)) @skip_if_lt_x_gpu(2) @parametrize("set_device", [True, False]) def test_empty_strided_p2p_persistent(self, set_device: bool) -> None: @@ -732,7 +759,7 @@ def _init_process(self): # the linux kernel to create a core dump of the host application. The functionality # is there, meaning timeout is happening correctly. However, there isn't a nice way # to test it as the current executing thread will coredump and exit. - @skipIfRocm + @skip_if_rocm_multiprocess @skip_if_lt_x_gpu(2) def test_barrier_timeout(self) -> None: self._init_process() @@ -758,7 +785,7 @@ def test_barrier_timeout(self) -> None: # the linux kernel to create a core dump of the host application. The functionality # is there, meaning timeout is happening correctly. However, there isn't a nice way # to test it as the current executing thread will coredump and exit. - @skipIfRocm + @skip_if_rocm_multiprocess @skip_if_lt_x_gpu(2) def test_put_signal_timeout(self) -> None: self._init_process() @@ -787,7 +814,7 @@ def test_put_signal_timeout(self) -> None: # the linux kernel to create a core dump of the host application. The functionality # is there, meaning timeout is happening correctly. However, there isn't a nice way # to test it as the current executing thread will coredump and exit. - @skipIfRocm + @skip_if_rocm_multiprocess @skip_if_lt_x_gpu(2) def test_wait_signal_timeout(self) -> None: self._init_process() @@ -876,7 +903,9 @@ def test_multimem_one_shot_all_reduce( gathered_inps.sum(dim=0), res, rtol=1e-03, atol=1e-05 ) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(4) def test_one_shot_all_reduce(self) -> None: self._init_process() @@ -907,7 +936,9 @@ def test_one_shot_all_reduce(self) -> None: ) self._verify_all_reduce_result(local_inp if copy else inp[offset:], res) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(4) def test_two_shot_all_reduce(self) -> None: self._init_process() @@ -957,7 +988,9 @@ def _verify_all_reduce_result(self, inp, res): gathered_inps.sum(dim=0), res, rtol=1e-01, atol=1e-01 ) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(4) def test_reduce_scatter(self) -> None: self._init_process() @@ -994,7 +1027,9 @@ def test_reduce_scatter(self) -> None: self.assertTrue(t[shift + numel :].eq(0).all().item()) self._verify_reduce_scatter_result(inp, out) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @skip_if_lt_x_gpu(4) def test_reduce_scatter_corner_cases(self) -> None: self._init_process() @@ -1070,7 +1105,7 @@ def device(self) -> torch.device: return torch.device(device_type, self.rank) @skip("Fails with 'one_shot_all_reduce' not found in AOT graph, TODO: fix") - @skipIfRocm # requires registered-buffer support + @skip_if_rocm_multiprocess # requires registered-buffer support @skip_if_lt_x_gpu(2) @fresh_cache() def test_lowering_one_shot_all_reduce(self): @@ -1130,7 +1165,9 @@ class SymmMemSingleProcTest(TestCase): not TEST_WITH_ROCM and _get_torch_cuda_version() < (12, 0), "stream_write_value32 currently only supports cuda version>=12.0", ) - @runOnRocmArch(MI300_ARCH) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) def test_stream_write_value32(self): tensor = torch.zeros(4, dtype=torch.uint32, device="cuda") expect = torch.tril(torch.ones(4, 4, device="cuda")).to(torch.uint32) @@ -1145,8 +1182,10 @@ def test_stream_write_value32(self): with self.assertRaises(RuntimeError): _SymmetricMemory.stream_write_value32(tensor, offset=0, val=4294967296) + @skipIf( + not PLATFORM_SUPPORTS_SYMM_MEM, "SymmMem is not supported on this ROCm arch" + ) @requires_cuda - @runOnRocmArch(MI300_ARCH) def test_memset32(self): t = _SymmetricMemory.empty_strided_p2p( (64,), diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py index c1f75697fe889..5b7c368232946 100644 --- a/torch/testing/_internal/common_distributed.py +++ b/torch/testing/_internal/common_distributed.py @@ -36,6 +36,7 @@ FILE_SCHEMA, find_free_port, IS_SANDCASTLE, + LazyVal, retry_on_connect_failures, skip_but_pass_in_sandcastle, skip_but_pass_in_sandcastle_if, @@ -421,17 +422,62 @@ def requires_multicast_support(): ) +def evaluate_platform_supports_symm_mem(): + if TEST_WITH_ROCM: + arch_list = ["gfx942", "gfx950"] + for arch in arch_list: + if arch in torch.cuda.get_device_properties(0).gcnArchName: + return True + if TEST_CUDA: + return True + + return False + + +PLATFORM_SUPPORTS_SYMM_MEM: bool = LazyVal( + lambda: evaluate_platform_supports_symm_mem() +) + + def skip_if_rocm_multiprocess(func): - """Skips a test for ROCm""" - func.skip_if_rocm_multiprocess = True + """Skips a test for ROCm multiprocess UTs""" + return unittest.skipIf(TEST_WITH_ROCM, TEST_SKIPS["skipIfRocm"].message)(func) - @wraps(func) - def wrapper(*args, **kwargs): - if not TEST_WITH_ROCM: - return func(*args, **kwargs) - sys.exit(TEST_SKIPS["skipIfRocm"].exit_code) - return wrapper +def skip_if_rocm_arch_multiprocess(arch: tuple[str, ...]): + """Skips a test for given ROCm archs - multiprocess UTs""" + + def decorator(func): + prop = torch.cuda.get_device_properties(0).gcnArchName.split(":")[0] + arch_match = prop in arch + reason = None + if TEST_WITH_ROCM and arch_match: + reason = f"skip_if_rocm_arch_multiprocess: test skipped on {arch}" + + return unittest.skipIf(reason is not None, reason)(func) + + return decorator + + +def skip_if_rocm_ver_lessthan_multiprocess(version=None): + """Skips a test for ROCm based on ROCm ver - multiprocess UTs""" + + def decorator(func): + reason = None + if TEST_WITH_ROCM: + rocm_version = str(torch.version.hip) + rocm_version = rocm_version.split("-", maxsplit=1)[0] # ignore git sha + rocm_version_tuple = tuple(int(x) for x in rocm_version.split(".")) + if ( + rocm_version_tuple is None + or version is None + or rocm_version_tuple < tuple(version) + ): + reason = f"skip_if_rocm_ver_lessthan_multiprocess: ROCm {rocm_version_tuple} is available but {version} required" + + return unittest.skipIf(reason is not None, reason)(func) + + return decorator def skip_if_win32(): diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index e29f36020e9c2..cd0f26fe29f45 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -102,7 +102,7 @@ MI300_ARCH = ("gfx942",) - +MI200_ARCH = ("gfx90a") def freeze_rng_state(*args, **kwargs): return torch.testing._utils.freeze_rng_state(*args, **kwargs) From 9de22bc5da272d4e0d27303cc2ad06a597aa3539 Mon Sep 17 00:00:00 2001 From: Howard Huang Date: Tue, 16 Sep 2025 07:11:36 -0700 Subject: [PATCH 303/693] Inspect schedule IR comms (#162996) Small change to util to allow us to see comms (e.g. `SEND`, `RECV`, etc.) in the schedule IR Pull Request resolved: https://github.com/pytorch/pytorch/pull/162996 Approved by: https://github.com/fegin --- .../pipelining/_schedule_visualizer.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/torch/distributed/pipelining/_schedule_visualizer.py b/torch/distributed/pipelining/_schedule_visualizer.py index 81be2b178343b..1230adc35bde5 100644 --- a/torch/distributed/pipelining/_schedule_visualizer.py +++ b/torch/distributed/pipelining/_schedule_visualizer.py @@ -17,6 +17,7 @@ _Action, _ComputationType, _PipelineSchedule, + _PipelineScheduleRuntime, get_schedule_class, PipelineScheduleMulti, PipelineScheduleSingle, @@ -36,6 +37,7 @@ def get_schedule_ops( num_microbatches: int, num_stages_per_rank: Optional[int] = None, add_spacing: bool = False, + with_comms: bool = False, ) -> list[list[Optional[_Action]]]: """ Get all actions for a given schedule, pp_degree, and num_microbatches. The actions are returned in a list of lists @@ -43,6 +45,8 @@ def get_schedule_ops( The schedule can be specified as a string which is passed into get_schedule_class() or a _PipelineSchedule instance. """ + if add_spacing and with_comms: + raise ValueError("Cannot add spacing and view comms at the same time") if isinstance(schedule, str): schedule_class = get_schedule_class(schedule) @@ -78,11 +82,18 @@ def get_schedule_ops( # Instantiate the schedule class schedule_instance = schedule_class(stages, num_microbatches) + assert schedule_instance.pipeline_order is not None # Convert to List[List[_Action]] - all_actions = [] - for rank in range(pp_degree): - all_actions.append(schedule_instance.pipeline_order[rank]) + all_actions: list[list[Optional[_Action]]] = [] + if with_comms: + runtime = _PipelineScheduleRuntime(stages, num_microbatches) + runtime._prepare_schedule_with_comms(schedule_instance.pipeline_order) + for rank in range(pp_degree): + all_actions.append(list(runtime.pipeline_order_with_comms[rank])) + else: + for rank in range(pp_degree): + all_actions.append(schedule_instance.pipeline_order[rank]) # Add spacing if add_spacing: From b6a48ff69f82c00935eff7e2530e2551bfaf13e2 Mon Sep 17 00:00:00 2001 From: jiannanWang Date: Tue, 16 Sep 2025 17:01:02 +0000 Subject: [PATCH 304/693] [BE] Add Documentation for Device APIs (#162834) Added documentation for torch.cuda APIs. Fixed docstring for xpu and mtia is_bf16_supported API. Pull Request resolved: https://github.com/pytorch/pytorch/pull/162834 Approved by: https://github.com/janeyx99 Co-authored-by: Jane (Yuan) Xu <31798555+janeyx99@users.noreply.github.com> --- docs/source/conf.py | 4 ---- docs/source/cuda.md | 2 ++ docs/source/mtia.md | 1 + docs/source/xpu.md | 1 + torch/cuda/__init__.py | 1 + torch/mtia/__init__.py | 1 + 6 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index fe43dae728757..264bf16a7c4a7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -210,10 +210,6 @@ coverage_ignore_functions = [ # torch "typename", - # torch.cuda - "check_error", - "cudart", - "is_bf16_supported", # torch.cuda._sanitizer "zip_arguments", "zip_by_key", diff --git a/docs/source/cuda.md b/docs/source/cuda.md index 24830cacdd4f6..8db30cfed7f3f 100644 --- a/docs/source/cuda.md +++ b/docs/source/cuda.md @@ -15,6 +15,7 @@ StreamContext can_device_access_peer + check_error current_blas_handle current_device current_stream @@ -34,6 +35,7 @@ init ipc_collect is_available + is_bf16_supported is_initialized is_tf32_supported memory_usage diff --git a/docs/source/mtia.md b/docs/source/mtia.md index 3229b80c3d91b..b0644a2ec6682 100644 --- a/docs/source/mtia.md +++ b/docs/source/mtia.md @@ -22,6 +22,7 @@ The MTIA backend is implemented out of the tree, only interfaces are be defined device_count init is_available + is_bf16_supported is_initialized memory_stats get_device_capability diff --git a/docs/source/xpu.md b/docs/source/xpu.md index 46d36451d4b8a..53a5fadeca357 100644 --- a/docs/source/xpu.md +++ b/docs/source/xpu.md @@ -25,6 +25,7 @@ get_stream_from_external init is_available + is_bf16_supported is_initialized set_device set_stream diff --git a/torch/cuda/__init__.py b/torch/cuda/__init__.py index 70a7269d4404e..192cb16e2a62c 100644 --- a/torch/cuda/__init__.py +++ b/torch/cuda/__init__.py @@ -500,6 +500,7 @@ def __init__(self, code: int) -> None: def check_error(res: int) -> None: + r"""Raise an error if the result of a CUDA runtime API call is not success.""" if res != _cudart.cudaError.success: raise CudaError(res) diff --git a/torch/mtia/__init__.py b/torch/mtia/__init__.py index 4c4ee32024732..14871d4259696 100644 --- a/torch/mtia/__init__.py +++ b/torch/mtia/__init__.py @@ -205,6 +205,7 @@ def attach_out_of_memory_observer( def is_bf16_supported(including_emulation: bool = True): + r"""Return a bool indicating if the current MTIA device supports dtype bfloat16.""" return True From 457b27f92fb38b7451b09d4c0191e4445e055d74 Mon Sep 17 00:00:00 2001 From: Anshul Sinha Date: Mon, 15 Sep 2025 18:08:53 -0700 Subject: [PATCH 305/693] [FSDP][Collectives] skipping reduce_scatter when world size is 1 (#162021) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Summary:** In its current state, FSDP collectives uses cuda synchronizations and communication ops regardless of what the world size is. However, now that replicate will use FSDP, there will be instances where group size = 1 and these synchronizations and ops will be used needlessly. I have updated fsdp_collectives to skip reduce_scatter in the foreach_reduce API when world_size ‎ = 1. I have created edited a test that uses CommDebugMode to verify that the reduce_scatter has been removed. I also edited an affected test which used 1-way FSDP by verifying and changing its assert statements for CommDebugMode. I have also added a test command. **Test Cases** 1. pytest test/distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_single_worldsize1 2. pytest test/distributed/_composable/test_composability/test_2d_composability.py -k test_tp_with_fsdp_offloading Pull Request resolved: https://github.com/pytorch/pytorch/pull/162021 Approved by: https://github.com/mori360 --- .../fsdp/test_fully_shard_training.py | 8 ++- .../test_2d_composability.py | 4 +- .../fsdp/_fully_shard/_fsdp_collectives.py | 50 ++++++++++++------- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/test/distributed/_composable/fsdp/test_fully_shard_training.py b/test/distributed/_composable/fsdp/test_fully_shard_training.py index e7e262cb1d6a3..3d02e053edd2a 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_training.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_training.py @@ -1490,8 +1490,8 @@ def world_size(self) -> int: @skip_if_lt_x_gpu(1) def test_train_parity_single_worldsize1(self): """ - Tests train parity with DDP for a single FSDP group when sharding - parameters on dim-0. + Tests train parity with DDP for a single FSDP group + when sharding parameters on dim-0. """ self.run_subtests( { @@ -1539,9 +1539,7 @@ def _shard_placement_fn(param: nn.Parameter) -> Optional[Shard]: losses.append(model(*inp).sum()) losses[-1].backward() - # Before there was 1 all-gather and 1 reduce-scatter - # Now therre is 1 reduce-scatter - self.assertEqual(comm_mode.get_total_counts(), 1) + self.assertEqual(comm_mode.get_total_counts(), 0) optim.step() self.assertEqual(losses[0], losses[1]) diff --git a/test/distributed/_composable/test_composability/test_2d_composability.py b/test/distributed/_composable/test_composability/test_2d_composability.py index 3fd84fbe9e739..925f3a647fef2 100644 --- a/test/distributed/_composable/test_composability/test_2d_composability.py +++ b/test/distributed/_composable/test_composability/test_2d_composability.py @@ -294,11 +294,11 @@ def test_tp_with_fsdp_offloading(self): with CommDebugMode() as bwd_comm_mode: loss.backward() bwd_comm_counts = bwd_comm_mode.get_comm_counts() - self.assertEqual(len(bwd_comm_counts), 2) + self.assertEqual(len(bwd_comm_counts), 1) # First MLP's input gradient does not need to be all-reduced self.assertEqual(bwd_comm_counts[funcol.all_reduce], num_mlps - 1) self.assertEqual(bwd_comm_counts[c10d_ops._allgather_base_], 0) - self.assertEqual(bwd_comm_counts[c10d_ops._reduce_scatter_base_], num_mlps) + self.assertEqual(bwd_comm_counts[c10d_ops._reduce_scatter_base_], 0) ref_loss.backward() optim.step() diff --git a/torch/distributed/fsdp/_fully_shard/_fsdp_collectives.py b/torch/distributed/fsdp/_fully_shard/_fsdp_collectives.py index 90b4b91a5cc7a..ea624cb092bdf 100644 --- a/torch/distributed/fsdp/_fully_shard/_fsdp_collectives.py +++ b/torch/distributed/fsdp/_fully_shard/_fsdp_collectives.py @@ -472,6 +472,7 @@ def foreach_reduce( ``unsharded_grads`` owns the references to the gradients computed by autograd, so clearing the list frees the gradients. """ + grad_dtypes = {grad.dtype for grad in unsharded_grads} if len(grad_dtypes) != 1: # Check this at runtime since it could be a real runtime error if e.g. @@ -492,14 +493,21 @@ def foreach_reduce( ) ) world_size = reduce_scatter_group.size() - for i, (fsdp_param, unsharded_grad) in enumerate(zip(fsdp_params, unsharded_grads)): - if (shard_dim := fsdp_param.fsdp_placement.dim) == 0: - continue - assert unsharded_grad.size(shard_dim) % world_size == 0, ( - f"Shard({shard_dim}) requires even sharding: {unsharded_grad.size()=} {world_size=}" - ) - chunks = torch.chunk(unsharded_grad, world_size, dim=shard_dim) - unsharded_grads[i] = torch.cat(chunks, dim=0) + device_handle = _get_device_handle(device.type) + current_stream = device_handle.current_stream() + + if world_size > 1: + for i, (fsdp_param, unsharded_grad) in enumerate( + zip(fsdp_params, unsharded_grads) + ): + if (shard_dim := fsdp_param.fsdp_placement.dim) == 0: + continue + assert unsharded_grad.size(shard_dim) % world_size == 0, ( + f"Shard({shard_dim}) requires even sharding: {unsharded_grad.size()=} {world_size=}" + ) + chunks = torch.chunk(unsharded_grad, world_size, dim=shard_dim) + unsharded_grads[i] = torch.cat(chunks, dim=0) + padded_unsharded_sizes = tuple( _get_dim0_padded_size(grad.size(), world_size) for grad in unsharded_grads ) @@ -510,14 +518,15 @@ def foreach_reduce( dtype=reduce_dtype, device=device, ) - device_handle = _get_device_handle(device.type) + foreach_reduce_scatter_copy_in(unsharded_grads, reduce_scatter_input, world_size) - current_stream = device_handle.current_stream() + # Only after the copy-in finishes can we free the gradients unsharded_grads.clear() reduce_scatter_stream.wait_stream(current_stream) all_reduce_input = None all_reduce_event = None + with device_handle.stream(reduce_scatter_stream): reduce_output = reduce_scatter_comm.allocate( (reduce_scatter_output_numel,), @@ -525,12 +534,16 @@ def foreach_reduce( device=device, ) _div_if_needed(reduce_scatter_input, predivide_factor) - reduce_scatter_comm( - output_tensor=reduce_output, - input_tensor=reduce_scatter_input, - group=reduce_scatter_group, - op=reduce_scatter_op, - ) + if world_size > 1: + reduce_scatter_comm( + output_tensor=reduce_output, + input_tensor=reduce_scatter_input, + group=reduce_scatter_group, + op=reduce_scatter_op, + ) + else: + # For single GPU, just copy the input to output (no actual reduce-scatter needed) + reduce_output.copy_(reduce_scatter_input) reduce_scatter_event = reduce_scatter_stream.record_event() post_reduce_stream = reduce_scatter_stream if all_reduce_group is not None: # HSDP @@ -551,7 +564,10 @@ def foreach_reduce( if partial_reduce_output is not None: reduce_output += partial_reduce_output post_reduce_stream = all_reduce_stream - all_reduce_stream.wait_stream(reduce_scatter_stream) + if world_size >= 1: + all_reduce_stream.wait_stream(reduce_scatter_stream) + else: + all_reduce_stream.wait_stream(current_stream) with device_handle.stream(all_reduce_stream): dist.all_reduce( reduce_output, From ddf3124b053cb88a3863b516dfbf023f1fd6fefc Mon Sep 17 00:00:00 2001 From: Anshul Sinha Date: Thu, 11 Sep 2025 16:14:44 -0700 Subject: [PATCH 306/693] [FSDP][Replicate] tests replicate input device movements (#162629) **Summary:** This test verifies that the replicate function automatically moves forward pass inputs to the correct device. **Test Cases** 1. pytest test/distributed/_composable/test_replicate_training.py -k test_root_move_forward_input_to_device Pull Request resolved: https://github.com/pytorch/pytorch/pull/162629 Approved by: https://github.com/mori360 --- .../_composable/test_replicate_training.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 test/distributed/_composable/test_replicate_training.py diff --git a/test/distributed/_composable/test_replicate_training.py b/test/distributed/_composable/test_replicate_training.py new file mode 100644 index 0000000000000..d9a633ad99220 --- /dev/null +++ b/test/distributed/_composable/test_replicate_training.py @@ -0,0 +1,53 @@ +# Owner(s): ["oncall: distributed"] + + +import torch +import torch.nn as nn +from torch.distributed._composable.replicate_with_fsdp import replicate +from torch.testing._internal.common_distributed import skip_if_lt_x_gpu +from torch.testing._internal.common_fsdp import FSDPTestMultiThread, get_devtype +from torch.testing._internal.common_utils import run_tests + + +c10d_ops = torch.ops.c10d +funcol = torch.ops.c10d_functional + + +device_type = torch.device(get_devtype()) + + +class TestReplicateForwardInputs(FSDPTestMultiThread): + @property + def world_size(self) -> int: + return 2 + + @skip_if_lt_x_gpu(1) + def test_root_move_forward_input_to_device(self): + device = torch.device(device_type.type, 0) + + class ParamlessModule(nn.Module): + def forward(self, x: torch.Tensor, ys: tuple[torch.Tensor, ...]): + # Check that Replicate moved the inputs to GPU, including recursing + # into the tuple data structure + assert x.device == device, f"Expects {device} but got {x.device}" + assert ys[0].device == device, ( + f"Expects {device} but got {ys[0].device}" + ) + assert ys[1].device == device, ( + f"Expects {device} but got {ys[1].device}" + ) + y = ys[0] + ys[1] + return x + y + 1 + + model = ParamlessModule().to(device) + replicate(model).to(device) + x = torch.randn((3,)) + ys = (torch.randn((3,)), torch.randn((3,))) + self.assertEqual(x.device, torch.device("cpu")) + self.assertEqual(ys[0].device, torch.device("cpu")) + self.assertEqual(ys[1].device, torch.device("cpu")) + model(x, ys) + + +if __name__ == "__main__": + run_tests() From 6702f545d880fd82700811e4a3508cdd76da9a69 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 16 Sep 2025 17:37:06 +0000 Subject: [PATCH 307/693] Restore environment after NcclUserBufferRegistrationTest (#163063) This test sets "NCCL_ALGO=NVLS" in NcclUserBufferRegistrationTest which affects tests run in the same process such as `test_on_completion_hook_*` that fail with > invalid usage (run with NCCL_DEBUG=WARN for details), NCCL version 2.26.2 > ncclInvalidUsage: This usually reflects invalid usage of NCCL library. > Last error: > Error : no algorithm/protocol available for function Broadcast with datatype ncclInt8. NCCL_ALGO was set to NVLS. Pull Request resolved: https://github.com/pytorch/pytorch/pull/163063 Approved by: https://github.com/ezyang --- test/distributed/test_c10d_nccl.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py index 0d55845228da7..f44394e3148ce 100644 --- a/test/distributed/test_c10d_nccl.py +++ b/test/distributed/test_c10d_nccl.py @@ -3145,19 +3145,24 @@ def test_invalid_nccl_blocking_wait_env(self): class NcclUserBufferRegistrationTest(MultiProcessTestCase): def setUp(self): super().setUp() - # TORCH_NCCL_BLOCKING_WAIT overrides TORCH_NCCL_ASYNC_ERROR_HANDLING hence tests - # that use TORCH_NCCL_BLOCKING_WAIT will test it as expected. - os.environ["TORCH_NCCL_ASYNC_ERROR_HANDLING"] = "1" nccl_debug_file = tempfile.NamedTemporaryFile() - os.environ["NCCL_ALGO"] = "NVLS" - os.environ["NCCL_DEBUG"] = "INFO" - os.environ["NCCL_DEBUG_SUBSYS"] = "NVLS" + nccl_env = { + # TORCH_NCCL_BLOCKING_WAIT overrides TORCH_NCCL_ASYNC_ERROR_HANDLING hence tests + # that use TORCH_NCCL_BLOCKING_WAIT will test it as expected. + "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1", + "NCCL_ALGO": "NVLS", + "NCCL_DEBUG": "INFO", + "NCCL_DEBUG_SUBSYS": "NVLS", + "NCCL_DEBUG_FILE": nccl_debug_file.name, + } if torch.cuda.nccl.version() >= (2, 24, 3): - os.environ["NCCL_DEBUG_SUBSYS"] = "REG,TUNING" - os.environ["NCCL_DEBUG_FILE"] = nccl_debug_file.name + nccl_env["NCCL_DEBUG_SUBSYS"] = "REG,TUNING" + self.env_patcher = mock.patch.dict(os.environ, nccl_env) + self.env_patcher.start() self._spawn_processes() def tearDown(self): + self.env_patcher.stop() super().tearDown() try: os.remove(self.file_name) From 559e8d1c203bda45d5e961cd022d2eea9018d73e Mon Sep 17 00:00:00 2001 From: joshuamarkovic <52184130+joshuamarkovic@users.noreply.github.com> Date: Tue, 16 Sep 2025 17:42:15 +0000 Subject: [PATCH 308/693] [doc]: Small typos (#162982) Small typo fixes Pull Request resolved: https://github.com/pytorch/pytorch/pull/162982 Approved by: https://github.com/ezyang, https://github.com/zou3519 --- aten/src/ATen/dlpack.h | 2 +- aten/src/ATen/native/ChanelShuffle.cpp | 2 +- aten/src/ATen/test/mps_test_objc_interface.mm | 2 +- c10/util/TypeCast.h | 2 +- test/fx/quantization.py | 2 +- test/inductor/test_torchinductor.py | 2 +- test/test_cuda.py | 2 +- test/test_mps.py | 2 +- torch/_dynamo/source.py | 4 ++-- torch/_dynamo/symbolic_convert.py | 4 ++-- torch/_dynamo/variables/higher_order_ops.py | 2 +- torch/_functorch/_aot_autograd/graph_capture_wrappers.py | 2 +- torch/_functorch/eager_transforms.py | 2 +- torch/_functorch/partitioners.py | 4 ++-- torch/_higher_order_ops/auto_functionalize.py | 4 ++-- torch/_inductor/codegen/cpp.py | 2 +- torch/_inductor/codegen/cpp_utils.py | 2 +- torch/_inductor/fx_passes/quantization.py | 2 +- torch/_inductor/ir.py | 2 +- torch/_inductor/kernel/flex/flex_decoding.py | 2 +- .../data_sparsifier/lightning/callbacks/README.md | 2 +- torch/csrc/api/include/torch/nn/modules/loss.h | 4 ++-- torch/csrc/autograd/profiler_python.cpp | 2 +- torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp | 2 +- .../distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp | 2 +- torch/csrc/dynamo/guards.cpp | 2 +- torch/csrc/jit/codegen/fuser/README.md | 2 +- torch/csrc/jit/codegen/onednn/README.md | 2 +- torch/csrc/profiler/collection.cpp | 2 +- torch/cuda/tunable.py | 2 +- torch/distributed/_symmetric_memory/__init__.py | 2 +- torch/distributed/checkpoint/staging.py | 2 +- torch/distributed/fsdp/_optim_utils.py | 2 +- torch/distributed/tensor/_random.py | 2 +- torch/distributed/tensor/experimental/_attention.py | 2 +- torch/testing/_internal/optests/make_fx.py | 2 +- torch/utils/benchmark/utils/timer.py | 2 +- torch/utils/cpp_extension.py | 2 +- 38 files changed, 43 insertions(+), 43 deletions(-) diff --git a/aten/src/ATen/dlpack.h b/aten/src/ATen/dlpack.h index 82c0668211188..891d9d7b5b503 100644 --- a/aten/src/ATen/dlpack.h +++ b/aten/src/ATen/dlpack.h @@ -269,7 +269,7 @@ typedef struct DLManagedTensor { void (*deleter)(struct DLManagedTensor * self); } DLManagedTensor; -// bit masks used in in the DLManagedTensorVersioned +// bit masks used in the DLManagedTensorVersioned /*! \brief bit mask to indicate that the tensor is read only. */ #define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL) diff --git a/aten/src/ATen/native/ChanelShuffle.cpp b/aten/src/ATen/native/ChanelShuffle.cpp index 64fdd56c0e665..d043014b3820e 100644 --- a/aten/src/ATen/native/ChanelShuffle.cpp +++ b/aten/src/ATen/native/ChanelShuffle.cpp @@ -81,7 +81,7 @@ Tensor math_channel_shuffle(const Tensor& self, int64_t groups) { // TODO: contiguous can be made to preserve the memory format // of the input. However since the above reshape clobbers h and w // it may not be safe to do that, since channels_last contiguous - // may think oc and and the last dim correspond to h,w? + // may think oc and the last dim correspond to h,w? // It is not clear, however from initial looking around it feels that // this may not be correct. // In this case channels last will likely require custom implementation diff --git a/aten/src/ATen/test/mps_test_objc_interface.mm b/aten/src/ATen/test/mps_test_objc_interface.mm index f59ca955d00d2..45811ed804802 100644 --- a/aten/src/ATen/test/mps_test_objc_interface.mm +++ b/aten/src/ATen/test/mps_test_objc_interface.mm @@ -42,7 +42,7 @@ kernel void add_arrays(device const float* inA, id customKernelLibrary = [device newLibraryWithSource: [NSString stringWithUTF8String:CUSTOM_KERNEL] options: nil error: &error]; - TORCH_CHECK(customKernelLibrary, "Failed to to create custom kernel library, error: ", error.localizedDescription.UTF8String); + TORCH_CHECK(customKernelLibrary, "Failed to create custom kernel library, error: ", error.localizedDescription.UTF8String); id customFunction = [customKernelLibrary newFunctionWithName: @"add_arrays"]; TORCH_CHECK(customFunction, "Failed to create function state object for the kernel"); diff --git a/c10/util/TypeCast.h b/c10/util/TypeCast.h index 3291fce2c41bb..d8a92c2eaa8c2 100644 --- a/c10/util/TypeCast.h +++ b/c10/util/TypeCast.h @@ -52,7 +52,7 @@ struct maybe_bool { template struct maybe_bool { C10_HOST_DEVICE static inline decltype(auto) apply(src_t src) { - // Don't use bool operator so as to to also compile for ComplexHalf. + // Don't use bool operator so as to also compile for ComplexHalf. return src.real() || src.imag(); } }; diff --git a/test/fx/quantization.py b/test/fx/quantization.py index 33550702ca6c7..96c100ef82ffa 100644 --- a/test/fx/quantization.py +++ b/test/fx/quantization.py @@ -341,7 +341,7 @@ def copy_recursive(node): lambda a: map_arg(a, lambda n: load_arg(n, quantized=True)), ) if r is NotImplemented: - # quantizer choose to to quantize the node take the entire match, and just copy it over + # quantizer choose to quantize the node take the entire match, and just copy it over env[node.name] = copy_recursive(node) else: quant_env[node.name] = r diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py index 6a6e3c674179b..7e7008a799d91 100644 --- a/test/inductor/test_torchinductor.py +++ b/test/inductor/test_torchinductor.py @@ -14095,7 +14095,7 @@ def add_test_failures( """ In-place modifies the given dictionary of `test_failures` to add the contents of `added_test_failures` by unioning the test_failure.suffixes, and - or-ing the the is_skip value. + or-ing the is_skip value. """ for name, new_failure in added_test_failures.items(): if name in test_failures: diff --git a/test/test_cuda.py b/test/test_cuda.py index 64253f932999c..be6670f0ab359 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -7253,7 +7253,7 @@ def test_graph_external_wait_and_record(self): # This writes allows wait_for_cpu to proceed # This is an atomic store at system scope according to this rule: - # "the scope is thread_scope_system and and it is a load or store that affects a naturally-aligned object of sizes 1, 2, 4, 8, or 16 bytes on mapped memory" # noqa: B950 + # "the scope is thread_scope_system and it is a load or store that affects a naturally-aligned object of sizes 1, 2, 4, 8, or 16 bytes on mapped memory" # noqa: B950 # https://nvidia.github.io/cccl/libcudacxx/extended_api/memory_model.html#atomicity # Note that every CPU store is implicitly system scope, diff --git a/test/test_mps.py b/test/test_mps.py index b29d24ee32386..03cc4fe8b2128 100644 --- a/test/test_mps.py +++ b/test/test_mps.py @@ -12234,7 +12234,7 @@ class TestConsistency(TestCaseMPS): 'arange', 'linspace', 'special.xlog1py', - # CPU accumulates sequantially, but GPU does in in parallel + # CPU accumulates sequantially, but GPU does in parallel '_unsafe_masked_index_put_accumulate', } diff --git a/torch/_dynamo/source.py b/torch/_dynamo/source.py index c1906eeee710c..b17ccfe09daee 100644 --- a/torch/_dynamo/source.py +++ b/torch/_dynamo/source.py @@ -697,7 +697,7 @@ def is_dict_key(self) -> bool: # Used to access an item from the dictionary @dataclasses.dataclass(frozen=True) class DictGetItemSource(ChainedSource): - # Key to access in the dictionary. It can be one of the the following types + # Key to access in the dictionary. It can be one of the following types # 1) ConstDictKeySource # 2) constant - like string, integer index: Any @@ -734,7 +734,7 @@ def name(self) -> str: # torch.compile does not run the overridden __getitem__ method @dataclasses.dataclass(frozen=True) class DictSubclassGetItemSource(ChainedSource): - # Key to access in the dictionary. It can be one of the the following types + # Key to access in the dictionary. It can be one of the following types # 1) ConstDictKeySource # 2) constant - like string, integer index: Any diff --git a/torch/_dynamo/symbolic_convert.py b/torch/_dynamo/symbolic_convert.py index beebea05a0e3e..3df339b9418f8 100644 --- a/torch/_dynamo/symbolic_convert.py +++ b/torch/_dynamo/symbolic_convert.py @@ -1055,7 +1055,7 @@ class ExceptionStack: """ # Exception handling in CPython is a bit confusing and some of the bytecode - # have a slightly different behavior than what is is documented. While reading + # have a slightly different behavior than what is documented. While reading # the documentation, is important to notice that the terms "current exception" # and "stack" sometimes refers to a C variable with the same name and the # exception stack, respectively. @@ -4384,7 +4384,7 @@ def __init__( # because we dont mutate them in transform_code_object (those # instructions are for the top most Instruction translator). Also, we # have to be careful about not using _cached_cleaned_instructions here - # because that function is global, while we want the the cache to be + # because that function is global, while we want the cache to be # alive only during a compmilation. tracing_ctx = parent.output.tracing_context instructions = None diff --git a/torch/_dynamo/variables/higher_order_ops.py b/torch/_dynamo/variables/higher_order_ops.py index 5ac883c7d3932..7135bba36dfa9 100644 --- a/torch/_dynamo/variables/higher_order_ops.py +++ b/torch/_dynamo/variables/higher_order_ops.py @@ -418,7 +418,7 @@ def unspecialize_carried_inputs(tx, carry) -> VariableTracker: source_target=self.value, # NOTE [why we cannot use "automatic" for while_loop]: # The reason is that we want to enforce - # the ordering of inputs and outputs to be consistent and the the ordering + # the ordering of inputs and outputs to be consistent and the ordering # of cond_fn and body_fn to the consistent. # e.g. suppose we use "automatic" and we have: # diff --git a/torch/_functorch/_aot_autograd/graph_capture_wrappers.py b/torch/_functorch/_aot_autograd/graph_capture_wrappers.py index 0a2dc525cc070..b2d96620b4bc6 100644 --- a/torch/_functorch/_aot_autograd/graph_capture_wrappers.py +++ b/torch/_functorch/_aot_autograd/graph_capture_wrappers.py @@ -806,7 +806,7 @@ def _post_forward(primals): # Here, we perform extra checks for primals that were mutated in the **backward** # We're doing the checks here instead of doing them with the rest of the input mutation handling because: # - We need to detect inputs that were mutated in the backward **separately** from mutations that happened - # during the forward, because the handling is different: some input mutations from the the forward + # during the forward, because the handling is different: some input mutations from the forward # can be only handled in a fw-only runtime epilogue, and in theory if we wanted to handle those same # types of mutations in the backward we would need a bw-only runtime epilogue. # - We could in theory have our analysis pass differentiate mutations in the fw from mutations in diff --git a/torch/_functorch/eager_transforms.py b/torch/_functorch/eager_transforms.py index d99995b86f2ba..828f5e8decc6e 100644 --- a/torch/_functorch/eager_transforms.py +++ b/torch/_functorch/eager_transforms.py @@ -1593,7 +1593,7 @@ def forward(self, a_1): If you call `functionalize(f)` on a function that takes views / mutations of non-local state, functionalization will simply no-op and pass the view/mutation calls directly to the backend. - One way to work around this is is to ensure that any non-local state creation + One way to work around this is to ensure that any non-local state creation is wrapped into a larger function, which you then call functionalize on. (3) `resize_()` has some limitations: functionalize will only work on programs that use resize_()` as long as the tensor being resized is not a view. diff --git a/torch/_functorch/partitioners.py b/torch/_functorch/partitioners.py index 9030cfc3c17ca..457cdb0867b6d 100644 --- a/torch/_functorch/partitioners.py +++ b/torch/_functorch/partitioners.py @@ -1792,7 +1792,7 @@ def ban_recomputation_if_allowed(node): # If someone saves a input for backward as-is and backward # returns that tensor as-is as a grad input, then the node x would # be both a required_bw_node and an input. In this case we - # (1) connect x_in to to the source, (2) x_out to the sink, and + # (1) connect x_in to the source, (2) x_out to the sink, and # (3) assign the proper weight to the x_in-x_out edge, so that # x would be part of cut nodes. A case where this happens is if # NestedTensor saves a offset tensor as part of the singleton int @@ -2535,7 +2535,7 @@ def has_same_nodes(joint_graph): # proxy to check if the graph is the same across different GPUs. # We only consider the name and order of nodes. A more robust way # would be to check the hash of the whole graph (disregarding input shapes), - # this is is a reasonable first-order approximation. + # this is a reasonable first-order approximation. node_str = "/".join(x.name for x in joint_graph.nodes) inputs = hashlib.sha256(node_str.encode("utf-8")).hexdigest() all_inputs = [None for _ in range(torch.distributed.get_world_size())] diff --git a/torch/_higher_order_ops/auto_functionalize.py b/torch/_higher_order_ops/auto_functionalize.py index d5aa0d09c8b18..d8374c356ab2e 100644 --- a/torch/_higher_order_ops/auto_functionalize.py +++ b/torch/_higher_order_ops/auto_functionalize.py @@ -508,7 +508,7 @@ def do_auto_functionalize( normalized_kwargs[arg.name] = kwargs[arg.name] elif idx < len(args): # if its out of bounds we don't need to do anything - # as it means the the optional arg was passed with its default + # as it means the optional arg was passed with its default # value normalized_kwargs[arg.name] = args[idx] else: @@ -625,7 +625,7 @@ def _functionalize_callable(arg: Any): normalized_kwargs[arg.name] = kwargs[arg.name] elif idx < len(args): # if its out of bounds we don't need to do anything - # as it means the the optional arg was passed with its default + # as it means the optional arg was passed with its default # value normalized_kwargs[arg.name] = args[idx] else: diff --git a/torch/_inductor/codegen/cpp.py b/torch/_inductor/codegen/cpp.py index b339ee75262d2..4754a57fa73cc 100644 --- a/torch/_inductor/codegen/cpp.py +++ b/torch/_inductor/codegen/cpp.py @@ -5408,7 +5408,7 @@ def define_kernel(self, src_code, nodes, kernel_args=None): src_code = src_code.replace("#pragma CMT", "//") # Get the lines in the source code representing the function definition, - # excluding the the first line including cpp_prefix.h. + # excluding the first line including cpp_prefix.h. first_char = src_code.rfind('extern "C"') last_char = src_code.find(")", first_char) if _IS_WINDOWS: diff --git a/torch/_inductor/codegen/cpp_utils.py b/torch/_inductor/codegen/cpp_utils.py index 929c227039463..a2d9878f22235 100644 --- a/torch/_inductor/codegen/cpp_utils.py +++ b/torch/_inductor/codegen/cpp_utils.py @@ -429,7 +429,7 @@ def localize_nodes( `local_buf`. This helps the fused loops to work on smaller-sized local buffers for better data locality. - The the data access of `local_buf` is assumed to be contiguous with the + The data access of `local_buf` is assumed to be contiguous with the same order as the `global_buf`. """ assert len(nodes) > 0 diff --git a/torch/_inductor/fx_passes/quantization.py b/torch/_inductor/fx_passes/quantization.py index 01f62bdf608ce..0ba2a1f16458f 100644 --- a/torch/_inductor/fx_passes/quantization.py +++ b/torch/_inductor/fx_passes/quantization.py @@ -3876,7 +3876,7 @@ def quant_lift_up(graph_module: torch.fx.GraphModule): ADD SOFTMAX - We want to lift up the the quant nodes from matmul before view like nodes + We want to lift up the quant nodes from matmul before view like nodes as the output of Linear node. DQ diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index 6cab868b916b3..9f105ebcf0534 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -5926,7 +5926,7 @@ def require_strides( if is_storage_and_layout(x): if isinstance(x.get_layout(), FlexibleLayout): if order: - # If the the FlexibleLayout already has the size and stride in the required order, + # If the FlexibleLayout already has the size and stride in the required order, # freeze it to a FixedLayout by using its current size and stride. # The behavior of using its current size and stride or the given order can be different # if the size and stride has ambiguilty, for example for a 4D input where the iC = 1: diff --git a/torch/_inductor/kernel/flex/flex_decoding.py b/torch/_inductor/kernel/flex/flex_decoding.py index 7cee221189046..0f7d58402551a 100644 --- a/torch/_inductor/kernel/flex/flex_decoding.py +++ b/torch/_inductor/kernel/flex/flex_decoding.py @@ -34,7 +34,7 @@ def _use_flex_decoding(query, kv_indices, value, kernel_options, enable_gqa) -> bool: """Decide which kernel to use, return true if use flex decoding kernel. Note: - Since the number of splits is calculated based of the the number of batch and head dims + Since the number of splits is calculated based of the number of batch and head dims we need to ensure that the batch and head dims are statically known. Otherwise we just use the main flex_attention kernel. """ diff --git a/torch/ao/pruning/_experimental/data_sparsifier/lightning/callbacks/README.md b/torch/ao/pruning/_experimental/data_sparsifier/lightning/callbacks/README.md index f36342edf0b4a..7e33e05341602 100644 --- a/torch/ao/pruning/_experimental/data_sparsifier/lightning/callbacks/README.md +++ b/torch/ao/pruning/_experimental/data_sparsifier/lightning/callbacks/README.md @@ -3,7 +3,7 @@ **These are callback scripts for lightning and does not introduce pytorch lightning dependency on PyTorch.** ## Introduction -Callbacks for PytorchLightning that specifies on when and how to to sparsify the data weights of the model. +Callbacks for PytorchLightning that specifies on when and how to sparsify the data weights of the model. ## Types of Data Sparsity Callbacks There are 2 types of data sparsity callbacks diff --git a/torch/csrc/api/include/torch/nn/modules/loss.h b/torch/csrc/api/include/torch/nn/modules/loss.h index 52be4f612b59f..76b35621c37f8 100644 --- a/torch/csrc/api/include/torch/nn/modules/loss.h +++ b/torch/csrc/api/include/torch/nn/modules/loss.h @@ -703,7 +703,7 @@ struct TORCH_API NLLLossImpl : public Cloneable { /// The options with which this `Module` was constructed. NLLLossOptions options; - /// A manual rescaling weight given to to each class. + /// A manual rescaling weight given to each class. Tensor weight; }; @@ -743,7 +743,7 @@ struct TORCH_API CrossEntropyLossImpl : public Cloneable { /// The options with which this `Module` was constructed. CrossEntropyLossOptions options; - /// A manual rescaling weight given to to each class. + /// A manual rescaling weight given to each class. Tensor weight; }; diff --git a/torch/csrc/autograd/profiler_python.cpp b/torch/csrc/autograd/profiler_python.cpp index 78a0c6eeec7ac..0e895312cbd12 100644 --- a/torch/csrc/autograd/profiler_python.cpp +++ b/torch/csrc/autograd/profiler_python.cpp @@ -167,7 +167,7 @@ class CallTypeHelper final { // // During post processing we: // 1) Determine the type represented by a TraceKey by checking which -// sub-cache it appears in in the thread local cache. +// sub-cache it appears in the thread local cache. // 2) Look up the pair of CallKeys from the thread local cache. // 3) Look up the expanded values of each CallKey from the global value cache. // diff --git a/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp b/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp index f7a3a28caceb3..e3ac4c09a9b01 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp @@ -1201,7 +1201,7 @@ class TORCH_API ProcessGroupNCCL : public Backend { // Returns the global rank of the device. This function assumes that users // always create a default global process group(PG) which includes all // devices. It is called in the constructor of ProcessGroupNCCL, so it always - // return the rank_ of the the very first PG created, aka, default global PG. + // return the rank_ of the very first PG created, aka, default global PG. const int& globalRank() const; const c10::intrusive_ptr& globalStore() const; diff --git a/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp b/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp index 225304faca652..b705e7099d12c 100644 --- a/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp +++ b/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.cpp @@ -123,7 +123,7 @@ void IpcChannel::send_fd(int dst_pid, int fd) { msg.msg_controllen = 0; } - // Finally send the the message + // Finally send the message TORCH_CHECK( sendmsg(socket_, &msg, 0) > 0, "Failed to send fd: ", diff --git a/torch/csrc/dynamo/guards.cpp b/torch/csrc/dynamo/guards.cpp index b1c631f13f560..f55b9da262bef 100644 --- a/torch/csrc/dynamo/guards.cpp +++ b/torch/csrc/dynamo/guards.cpp @@ -2336,7 +2336,7 @@ class DUAL_LEVEL_MATCH : public LeafGuard { * Relational guards compare more than one value. We implement Relational * guards by capturing some state in the guard object. For example for tensor * aliasing guards - tensor X is not tensor Y - we construct one leaf guard - * and and install it at as a leaf of two guard managers (one for X and + * and install it at as a leaf of two guard managers (one for X and * another for Y). Therefore, this guard is run twice. In the first * invocation, it saves the first value (state) and returns True. In the * second invocation, it compares the saved value with the new value and diff --git a/torch/csrc/jit/codegen/fuser/README.md b/torch/csrc/jit/codegen/fuser/README.md index e115f999188e6..3fdc6f371a5c9 100644 --- a/torch/csrc/jit/codegen/fuser/README.md +++ b/torch/csrc/jit/codegen/fuser/README.md @@ -7,7 +7,7 @@ The fuser accepts subgraphs wrapped in "fusion nodes" and tries to execute them The fuser is designed hierarchically with device-independent logic eventually deferring to device-specific logic and implementation. The device-specific code is (mostly) found in each devices' subdirectory. The device-independent logic has six components: * The Interface (interface.h/cpp) has functions to register and run fusions, interrogate fusion functionality, and perform debugging. -* The Compiler (compiler.h/cpp) performs "upfront" and "runtime" compilation. When fusions are registered, upfront compilation produces fallback code and and performs some shape inference. When a fusion is run, runtime compilation invokes code generation and the device-specific compilation logic. +* The Compiler (compiler.h/cpp) performs "upfront" and "runtime" compilation. When fusions are registered, upfront compilation produces fallback code and performs some shape inference. When a fusion is run, runtime compilation invokes code generation and the device-specific compilation logic. * The Code Generator (codegen.h/cpp) produces the string to be compiled on the device. * The Executor (executor.h/cpp) runs requested fusions. It performs shape inference, expands tensors as necessary, determines the device to run on, acquires a cached compiled kernel or requests the Compiler produce a new one, invokes device-specific code to launch the kernel and updates the stack. * The Fallback (fallback.h/cpp) runs subgraphs that can't be fused because shape inference didn't determine a common tensor size or the device the tensors are on doesn't support fusion. diff --git a/torch/csrc/jit/codegen/onednn/README.md b/torch/csrc/jit/codegen/onednn/README.md index fb309abc3bc0e..d8b81bbb79818 100644 --- a/torch/csrc/jit/codegen/onednn/README.md +++ b/torch/csrc/jit/codegen/onednn/README.md @@ -81,7 +81,7 @@ cmake/Modules/FindMKLDNN.cmake cmake/Dependencies.cmake ``` -To map another op to oneDNN Graph, you should add an entry for it in in createOperator in torch/csrc/jit/codegen/onednn/graph_helper.cpp. +To map another op to oneDNN Graph, you should add an entry for it in createOperator in torch/csrc/jit/codegen/onednn/graph_helper.cpp. If it has an inplace variant, you should add it in the lambda being passed to RemoveTensorMutation in torch/csrc/jit/codegen/onednn/interface.cpp. You might also want to add it to canFuseNode in torch/csrc/jit/codegen/onednn/register_interface.cpp. diff --git a/torch/csrc/profiler/collection.cpp b/torch/csrc/profiler/collection.cpp index c7f759cd077c9..4d6a538c07724 100644 --- a/torch/csrc/profiler/collection.cpp +++ b/torch/csrc/profiler/collection.cpp @@ -172,7 +172,7 @@ bool InputOutputEncoder::isSupportedScalarList( return true; } -// This function returns a lambda which is is a custom-iterator-like getter. +// This function returns a lambda which is a custom-iterator-like getter. // Each invocation of the lambda returns input values for one op. // // io_type is used to filter the ivalues between 'Shapes' and 'Concrete Args'. diff --git a/torch/cuda/tunable.py b/torch/cuda/tunable.py index d1ac7fad7480b..a1fbd4fdddc27 100644 --- a/torch/cuda/tunable.py +++ b/torch/cuda/tunable.py @@ -600,7 +600,7 @@ def _process_single_offline_gemm(untuned_gemm_line: str, gpu_id: int) -> None: assert count in [6, 7] untuned_gemm_temp = untuned_gemm[0].split("_") # dtypeC = might not be FP8 type, keep track - # of the the number of underscores + # of the number of underscores op_sig = untuned_gemm_temp[0] data_typeA = untuned_gemm_temp[1] + "_" + untuned_gemm_temp[2] data_typeB = untuned_gemm_temp[3] + "_" + untuned_gemm_temp[4] diff --git a/torch/distributed/_symmetric_memory/__init__.py b/torch/distributed/_symmetric_memory/__init__.py index 8154cd9809139..77e05cf9b1622 100644 --- a/torch/distributed/_symmetric_memory/__init__.py +++ b/torch/distributed/_symmetric_memory/__init__.py @@ -1289,7 +1289,7 @@ def _fused_scaled_matmul_reduce_scatter_impl( def chunk_producer(rank: int, out: torch.Tensor) -> None: mm_out_op(A_shards[rank], B, scale_a=A_scale_shards[rank], **kwargs, out=out) - # Stacked partials will be the 2D outputs of the the pipelined scaled mm, and will + # Stacked partials will be the 2D outputs of the pipelined scaled mm, and will # have the shape (A_with_scatter_dim_0_tensor.shape[0], B.shape[1]) to align with the formula: # (a*b,c) @ (c,d) = (a*b,d) stacked_partials = A_with_scatter_dim_0.new_empty( diff --git a/torch/distributed/checkpoint/staging.py b/torch/distributed/checkpoint/staging.py index e7acf4975173c..c463b66ddd3f8 100644 --- a/torch/distributed/checkpoint/staging.py +++ b/torch/distributed/checkpoint/staging.py @@ -57,7 +57,7 @@ class AsyncStager(Protocol): 3. If AsyncStager.should_synchronize_after_execute is True, this method will be called immediately after the serialization thread starts and before returning from dcp.async_save. If this is set to False, - the assumption is the user has defined a custom synchronization point for the the purpose of further + the assumption is the user has defined a custom synchronization point for the purpose of further optimizing save latency in the training loop (for example, by overlapping staging with the forward/backward pass), and it is the respondsibility of the user to call `AsyncStager.synchronize_staging` at the appropriate time. diff --git a/torch/distributed/fsdp/_optim_utils.py b/torch/distributed/fsdp/_optim_utils.py index 671995671c75b..5fb52c7c281cc 100644 --- a/torch/distributed/fsdp/_optim_utils.py +++ b/torch/distributed/fsdp/_optim_utils.py @@ -426,7 +426,7 @@ def _flatten_optim_state_dict( Note that ``_flatten_tensor_optim_state`` does not need ``optim`` to flatten/shard the state. However, NamedOptimizer and KeyedOptimizer require all the states even if the corresponding parameters are empty. To this end, - ``optim`` will be used to to get the initial state of the empty parameters. + ``optim`` will be used to get the initial state of the empty parameters. ``optim`` should only be non-None if the ``optim` is KeyedOptimizer or NamedOptimizer. diff --git a/torch/distributed/tensor/_random.py b/torch/distributed/tensor/_random.py index dc3a1fb10e4b3..68a3fe3f329a3 100644 --- a/torch/distributed/tensor/_random.py +++ b/torch/distributed/tensor/_random.py @@ -82,7 +82,7 @@ def manual_seed(seed: int, device_mesh: DeviceMesh) -> None: # "DTensor manual_seed() is deprecated, since DTensor no longer maintains a separate copy of generator state. " # "Use `torch.manual_seed` instead" # ) - # Note: we still need to ensure setting `run_state_sync=False` to support the the pp case + # Note: we still need to ensure setting `run_state_sync=False` to support the pp case # instantiate a RNG tracker if haven't. By default DTensor uses an # OffsetBasedRNGTracker to perform random operators. diff --git a/torch/distributed/tensor/experimental/_attention.py b/torch/distributed/tensor/experimental/_attention.py index 6cd06727cd2b2..891bfe91e7f75 100644 --- a/torch/distributed/tensor/experimental/_attention.py +++ b/torch/distributed/tensor/experimental/_attention.py @@ -457,7 +457,7 @@ def _templated_ring_attention( else: # Round-robin load balancing case, and i > rank. # We need to do SPDA with only the second half of the q, and update - # only the the second part of logsumexp. So partial is True. + # only the second part of logsumexp. So partial is True. # Note that q, k, v, each contains two chunks. q, k, v, partial = query.chunk(2, dim=2)[1], key, value, True diff --git a/torch/testing/_internal/optests/make_fx.py b/torch/testing/_internal/optests/make_fx.py index 83cefd18bc059..970a0be1b3695 100644 --- a/torch/testing/_internal/optests/make_fx.py +++ b/torch/testing/_internal/optests/make_fx.py @@ -55,7 +55,7 @@ def run(f, *args, **kwargs): # If any argument is a torch.Size(), maybe get dynamic shapes for it by: # - Create a temporary Tensor whose size is the torch.Size() we want. Note that # we use an expanded Tensor as we cannot pass "meta" Tensors to make_fx. -# - Pass it to make_fx such that it is is converted to a proxy Tensor +# - Pass it to make_fx such that it is converted to a proxy Tensor # - Unpack the size in the wrapper to get a torch.Size with dynamic shapes (in # symbolic mode, a no-op otherwise) def handle_sizes_for_dynamic_shapes(func, args, kwargs): diff --git a/torch/utils/benchmark/utils/timer.py b/torch/utils/benchmark/utils/timer.py index 1889f6756e70f..377fc1221dbe4 100644 --- a/torch/utils/benchmark/utils/timer.py +++ b/torch/utils/benchmark/utils/timer.py @@ -484,7 +484,7 @@ def collect_callgrind( the fact that a small number of iterations is generally sufficient to obtain good measurements. - In order to to use this method `valgrind`, `callgrind_control`, and + In order to use this method `valgrind`, `callgrind_control`, and `callgrind_annotate` must be installed. Because there is a process boundary between the caller (this process) diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py index 43caf5782fee5..2309429594ee8 100644 --- a/torch/utils/cpp_extension.py +++ b/torch/utils/cpp_extension.py @@ -206,7 +206,7 @@ def _join_sycl_home(*paths) -> str: "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" "Your compiler (%s) is not compatible with the compiler Pytorch was" "built with for this platform, which is %s on %s. Please" - "use %s to to compile your extension. Alternatively, you may" + "use %s to compile your extension. Alternatively, you may" "compile PyTorch from source using %s, and then you can also use" "%s to compile your extension." "See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help" From 6db37d72069c39c01092ff2bb095773303200357 Mon Sep 17 00:00:00 2001 From: Isalia20 Date: Tue, 16 Sep 2025 17:48:02 +0000 Subject: [PATCH 309/693] [MPS] zeros like, narrow and enable tests (#163011) zeros like, narrow and enable tests for SparseMPS Pull Request resolved: https://github.com/pytorch/pytorch/pull/163011 Approved by: https://github.com/malfet --- aten/src/ATen/native/native_functions.yaml | 4 ++-- test/test_sparse.py | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 45c69690c9563..238df68acd1a0 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -4372,7 +4372,7 @@ variants: function, method dispatch: CPU: narrow_copy_dense_cpu - SparseCPU, SparseCUDA: narrow_copy_sparse + SparseCPU, SparseCUDA, SparseMPS: narrow_copy_sparse CompositeExplicitAutogradNonFunctional: narrow_copy_dense_symint tags: view_copy @@ -6660,7 +6660,7 @@ - func: zeros.out(SymInt[] size, *, Tensor(a!) out) -> Tensor(a!) dispatch: CompositeExplicitAutograd: zeros_out - SparseCPU, SparseCUDA, SparseMeta: zeros_sparse_out + SparseCPU, SparseCUDA, SparseMPS, SparseMeta: zeros_sparse_out - func: zeros_like(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, MemoryFormat? memory_format=None) -> Tensor dispatch: diff --git a/test/test_sparse.py b/test/test_sparse.py index 9e7c797d38087..bd49998e0951d 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -479,8 +479,8 @@ def func(indices, values, shape, is_coalesced): "cannot set is_coalesced to true if indices correspond to uncoalesced COO tensor"): torch.autograd.gradcheck(func, (t._indices(), t._values().requires_grad_(True), shape, True)) - @expectedFailureMPS @dtypes(*floating_and_complex_types_and(torch.float16, torch.bfloat16)) + @dtypesIfMPS(*all_mps_types()) @unittest.skipIf(TEST_WITH_CROSSREF, "generator unsupported triggers assertion error") @gradcheck_semantics() def test_to_dense_with_gradcheck(self, device, dtype, gradcheck): @@ -505,7 +505,8 @@ def fn(x): x.requires_grad_(True) gradcheck(fn, (x,)) - for value_type in [torch.double, torch.cdouble]: + values_types = [torch.double, torch.cdouble] if device != "mps:0" else [torch.float32, torch.complex64] + for value_type in values_types: i = self.index_tensor([ [0, 1, 2, 2], [0, 0, 0, 3], @@ -859,8 +860,8 @@ def test_shape(sparse_dims, nnz, with_size): test_shape(3, 0, [0, 0, 100, 5, 5, 5, 0]) @coalescedonoff - @expectedFailureMPS @dtypes(torch.double, torch.cdouble, torch.bfloat16) + @dtypesIfMPS(torch.float32, torch.complex64, torch.bfloat16) @precisionOverride({torch.bfloat16: 2e-2}) def test_Sparse_to_Sparse_copy_(self, device, dtype, coalesced): # This is for testing torch.copy_(SparseTensor, SparseTensor) @@ -883,7 +884,7 @@ def test_Sparse_to_Sparse_copy_(self, device, dtype, coalesced): x1.copy_(x2) self.assertEqual(x1_dtype, x1.dtype) - x2 = x2.to(torch.float64) + x2 = x2.to(torch.float64) if device != "mps:0" else x2.to(torch.float32) x1_dtype = x1.dtype x1.copy_(x2) self.assertEqual(x1_dtype, x1.dtype) @@ -2275,8 +2276,8 @@ def test_shape(i_shapes, v_shapes, shape, nnzs): test_shape([2, 3, 4], [0, 4, 5, 6], [2, 3, 0], [9, 12]) @coalescedonoff - @expectedFailureMPS @dtypes(torch.double, torch.cdouble) + @dtypesIfMPS(torch.float32, torch.complex64) def test_zeros_like(self, device, dtype, coalesced): def _test_zeros_like(nnzs, template_shape_i, template_shape_v=None): template_shape_v = template_shape_v or [] @@ -2416,8 +2417,8 @@ def _all_narrow_combs(self, shape): yield [dim, start, length] @coalescedonoff - @expectedFailureMPS @dtypes(torch.double, torch.cdouble) + @dtypesIfMPS(torch.float32, torch.complex64) def test_narrow(self, device, dtype, coalesced): shape = [3, 3, 4, 2] input, _, _ = self._gen_sparse(4, 19, shape, dtype, device, coalesced) @@ -3278,8 +3279,8 @@ def test_change_tensor_metadata(self, device, dtype): self.assertEqual(list(t.coalesce().values().size()), [1, 3]) @coalescedonoff - @expectedFailureMPS @dtypes(torch.double) + @dtypesIfMPS(torch.float32) def test_pickle(self, device, dtype, coalesced): import pickle From 0819de412dafbe653399a3d79e5f5028b5d5039e Mon Sep 17 00:00:00 2001 From: "Yu, Guangye" Date: Mon, 15 Sep 2025 13:07:09 +0000 Subject: [PATCH 310/693] Add a new API torch.xpu.can_device_access_peer for Intel GPU (#162705) # Motivation Aligned with other backends, this PR introduces an new API `torch.xpu.can_device_access_peer`, which is used in vllm distributed [scenarios](https://github.com/vllm-project/vllm/blob/2048c4e37909a42847cd2f51c7e0cf92e3b63466/vllm/distributed/device_communicators/custom_all_reduce.py#L37) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162705 Approved by: https://github.com/EikanWang, https://github.com/ezyang --- aten/src/ATen/xpu/XPUContext.cpp | 19 +++++++++++++++++++ aten/src/ATen/xpu/XPUContext.h | 2 ++ docs/source/xpu.md | 1 + test/test_xpu.py | 10 ++++++++++ torch/_C/__init__.pyi.in | 1 + torch/csrc/xpu/Module.cpp | 5 +++++ torch/xpu/__init__.py | 17 +++++++++++++++++ 7 files changed, 55 insertions(+) diff --git a/aten/src/ATen/xpu/XPUContext.cpp b/aten/src/ATen/xpu/XPUContext.cpp index 2157e34648b82..e956ec9a16599 100644 --- a/aten/src/ATen/xpu/XPUContext.cpp +++ b/aten/src/ATen/xpu/XPUContext.cpp @@ -76,4 +76,23 @@ int32_t getGlobalIdxFromDevice(DeviceIndex device) { return device_global_idxs[device]; } +// Check if a device can access the memory of a peer device directly. +bool canDeviceAccessPeer(DeviceIndex device, DeviceIndex peer) { + if (device == -1) { + device = c10::xpu::current_device(); + } + if (peer == -1) { + peer = c10::xpu::current_device(); + } + check_device_index(device); + check_device_index(peer); + // A device can always access itself + if (device == peer) { + return true; + } + return c10::xpu::get_raw_device(device).ext_oneapi_can_access_peer( + c10::xpu::get_raw_device(peer), + sycl::ext::oneapi::peer_access::access_supported); +} + } // namespace at::xpu diff --git a/aten/src/ATen/xpu/XPUContext.h b/aten/src/ATen/xpu/XPUContext.h index fb8fbe9c0aa42..a473f317ca3d1 100644 --- a/aten/src/ATen/xpu/XPUContext.h +++ b/aten/src/ATen/xpu/XPUContext.h @@ -17,4 +17,6 @@ TORCH_XPU_API DeviceProp* getDeviceProperties(DeviceIndex device); TORCH_XPU_API int32_t getGlobalIdxFromDevice(DeviceIndex device); +TORCH_XPU_API bool canDeviceAccessPeer(DeviceIndex device, DeviceIndex peer); + } // namespace at::xpu diff --git a/docs/source/xpu.md b/docs/source/xpu.md index 53a5fadeca357..1496a7f82c587 100644 --- a/docs/source/xpu.md +++ b/docs/source/xpu.md @@ -12,6 +12,7 @@ :nosignatures: StreamContext + can_device_access_peer current_device current_stream device diff --git a/test/test_xpu.py b/test/test_xpu.py index 04d045b00d8bc..3474e4031ef23 100644 --- a/test/test_xpu.py +++ b/test/test_xpu.py @@ -585,6 +585,16 @@ def test_get_arch_list(self): for arch in arch_list: self.assertTrue(arch in flags) + @unittest.skipIf(not TEST_MULTIXPU, "only one GPU detected") + def test_can_device_access_peer(self): + device_count = torch.xpu.device_count() + for device in range(device_count): + for peer in range(device_count): + self.assertEqual( + torch.xpu.can_device_access_peer(device, peer), + torch.xpu.can_device_access_peer(peer, device), + ) + def test_torch_version_xpu(self): self.assertEqual(len(torch.version.xpu), 8) compiler_version = int(torch.version.xpu) diff --git a/torch/_C/__init__.pyi.in b/torch/_C/__init__.pyi.in index e55137c3d2bfd..3b183c8af835d 100644 --- a/torch/_C/__init__.pyi.in +++ b/torch/_C/__init__.pyi.in @@ -2369,6 +2369,7 @@ def _xpu_memoryStats(device: _int) -> dict[str, Any]: ... def _xpu_resetAccumulatedMemoryStats(device: _int) -> None: ... def _xpu_resetPeakMemoryStats(device: _int) -> None: ... def _xpu_getMemoryInfo(device: _int) -> tuple[_int, _int]: ... +def _xpu_canDeviceAccessPeer(device: _int, peer: _int) -> _bool: ... class _XpuDeviceProperties: name: str diff --git a/torch/csrc/xpu/Module.cpp b/torch/csrc/xpu/Module.cpp index d49fc0539a087..8f1aead1900c6 100644 --- a/torch/csrc/xpu/Module.cpp +++ b/torch/csrc/xpu/Module.cpp @@ -415,6 +415,11 @@ static void initXpuMethodBindings(PyObject* module) { return std::make_tuple( stream.id(), stream.device_index(), stream.device_type()); }); + m.def( + "_xpu_canDeviceAccessPeer", + [](c10::DeviceIndex device, c10::DeviceIndex peer) { + return at::xpu::canDeviceAccessPeer(device, peer); + }); } // Callback for python part. Used for additional initialization of python diff --git a/torch/xpu/__init__.py b/torch/xpu/__init__.py index 79aae38a31685..6e15bf4380e3e 100644 --- a/torch/xpu/__init__.py +++ b/torch/xpu/__init__.py @@ -280,6 +280,22 @@ def _get_device(device: Union[int, str, torch.device]) -> torch.device: return device +def can_device_access_peer(device: _device_t, peer: _device_t) -> bool: + r"""Query whether a device can access a peer device's memory. + + Args: + device (torch.device or int or str): selected device. + peer (torch.device or int or str): peer device to query access to. + + Returns: + bool: ``True`` if ``device`` can access ``peer``, ``False`` otherwise. + """ + _lazy_init() + device = _get_device_index(device, optional=True) + peer = _get_device_index(peer, optional=True) + return torch._C._xpu_canDeviceAccessPeer(device, peer) + + class StreamContext: r"""Context-manager that selects a given stream. @@ -518,6 +534,7 @@ def _get_rng_state_offset(device: Union[int, str, torch.device] = "xpu") -> int: "Event", "Stream", "StreamContext", + "can_device_access_peer", "current_device", "current_stream", "default_generators", From bb3f3cc65e259d8075223b43e26c8b7f7c55d7c6 Mon Sep 17 00:00:00 2001 From: Zhengxu Chen Date: Tue, 16 Sep 2025 18:27:45 +0000 Subject: [PATCH 311/693] [precompile] Store traced file information with CompileArtifacts. (#162983) Summary: Add some metadata to CompileArtifacts, so that it contains the source code information about the original code while they are being traced. For now, we will not provide a verification method to end user and instead we just provide which files are inlined. It's up to user to verify the content from these files are not changed (because it's optional for many users to validate source code changes anyway in aot precompile) Test Plan: buck run @mode/opt test/dynamo:test_dynamo -- -k test_file_change buck run @mode/opt test/dynamo:test_dynamo -- -k test_aot_compile_source_info Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/162983 Approved by: https://github.com/yushangdi --- test/dynamo/test_aot_compile.py | 25 +++++++++++++++ torch/_dynamo/aot_compile.py | 19 +++++++++-- torch/_dynamo/output_graph.py | 1 + torch/_dynamo/package.py | 57 +++++++++++++++++++++------------ 4 files changed, 79 insertions(+), 23 deletions(-) diff --git a/test/dynamo/test_aot_compile.py b/test/dynamo/test_aot_compile.py index 194bd2093c1f2..fb87aca71013a 100644 --- a/test/dynamo/test_aot_compile.py +++ b/test/dynamo/test_aot_compile.py @@ -19,6 +19,9 @@ from torch.testing._internal.common_utils import instantiate_parametrized_tests +MY_LAMBDA = lambda x: x + 1 # noqa: E731 + + class CustomCompiledFunction(torch._dynamo.aot_compile.SerializableCallable): def __init__(self, gm: torch.fx.GraphModule, example_inputs: list[torch.Tensor]): self.gm = gm @@ -143,6 +146,28 @@ def backend(gm, example_inputs): actual = compiled_fn(*example_inputs) self.assertEqual(expected, actual) + def test_aot_compile_source_info(self): + from torch._dynamo.package import SourceInfo + + def fn(x, y): + return MY_LAMBDA(x) + y + + compiled_fn = torch.compile(fn, fullgraph=True).aot_compile( + ((torch.randn(3, 4), torch.randn(3, 4)), {}) + ) + + source_info = compiled_fn.source_info() + self.assertIsInstance(source_info, SourceInfo) + self.assertEqual(len(source_info.inlined_sources), 2) + self.assertEqual(next(iter(source_info.inlined_sources)).module, __name__) + compiled_fn.save_compiled_function(self.path()) + with open(self.path(), "rb") as f: + compiled_fn = torch.compiler.load_compiled_function(f) + source_info = compiled_fn.source_info() + self.assertIsInstance(source_info, SourceInfo) + self.assertEqual(len(source_info.inlined_sources), 2) + self.assertEqual(next(iter(source_info.inlined_sources)).module, __name__) + def test_aot_compile_graph_break_error_fmt(self): def foo(x, y): a = x + x diff --git a/torch/_dynamo/aot_compile.py b/torch/_dynamo/aot_compile.py index 9f668005bce20..52015df082c73 100644 --- a/torch/_dynamo/aot_compile.py +++ b/torch/_dynamo/aot_compile.py @@ -8,7 +8,7 @@ import types from contextlib import AbstractContextManager, ExitStack from dataclasses import dataclass -from typing import Any, Callable, Optional +from typing import Any, Callable, Optional, TYPE_CHECKING import torch import torch.fx @@ -19,6 +19,11 @@ from .hooks import Hooks +if TYPE_CHECKING: + from .guards import GuardManagerWrapper + from .package import SourceInfo + + log = logging.getLogger(__name__) @@ -46,13 +51,14 @@ def bind_locals( class CompileArtifacts: signature: inspect.Signature bytecode: types.CodeType - guard_manager: Optional[torch._dynamo.guards.GuardManagerWrapper] + guard_manager: Optional["GuardManagerWrapper"] guards_state: bytes import_sources: dict[str, str] backend_id: str compiled_fn: SerializableCallable original_code: types.CodeType closure: Optional[tuple[Any, ...]] + source_info: "SourceInfo" use_cuda: bool system_info: SystemInfo = dataclasses.field(default_factory=SystemInfo.current) @@ -102,6 +108,9 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any: raise RuntimeError(f"GuardManager check failed, reason: {reason}") return self.fn(*args, **kwargs) + def source_info(self) -> "SourceInfo": + return self._artifacts.source_info + def save_compiled_function(self, path: str) -> None: with open(path, "wb") as f: f.write(type(self).serialize(self)) @@ -187,6 +196,7 @@ def aot_compile_fullgraph( backend: Callable[[torch.fx.GraphModule, list[torch.Tensor]], SerializableCallable], ) -> AOTCompiledFunction: from torch._dynamo.guards import CheckFunctionManager + from torch._dynamo.package import SourceInfo from torch._dynamo.utils import dynamo_timed, get_metrics_context from torch._guards import compile_context, CompileContext, TracingContext @@ -285,6 +295,10 @@ def new_guard_filter_fn( + f"from backend {compiler_fn}) does not implement SerializableCallable." ) + source_info = SourceInfo(inlined_sources=set()) + for traced_code in output_graph.traced_code: + source_info.add_code(traced_code) + artifacts = CompileArtifacts( signature=signature, bytecode=dynamo_output.bytecode, @@ -295,6 +309,7 @@ def new_guard_filter_fn( compiled_fn=compiled_fn, original_code=fn.__code__, closure=fn.__closure__, + source_info=source_info, use_cuda=use_cuda, ) aot_compiled_fn = AOTCompiledFunction(_artifacts=artifacts) diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index 996431e32ebc9..13b3de0280e24 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -506,6 +506,7 @@ def __init__( ) self.tracing_context: TracingContext = TracingContext(fake_mode) self.tracing_context.traced_code.append(f_code) + self.traced_code = self.tracing_context.traced_code self.dynamo_compile_id: Optional[CompileId] = ( CompileContext.current_compile_id() ) diff --git a/torch/_dynamo/package.py b/torch/_dynamo/package.py index 7df89a39798f0..3de1f4994c3fc 100644 --- a/torch/_dynamo/package.py +++ b/torch/_dynamo/package.py @@ -113,6 +113,35 @@ class InlinedSource: firstlineno: int lastlineno: int checksum: str + content: str + + +@functools.cache +def _get_module_content(module: types.ModuleType) -> str: + return inspect.getsource(module) + + +@dataclasses.dataclass +class SourceInfo: + inlined_sources: set[InlinedSource] + + def add_code(self, code: types.CodeType) -> None: + module = inspect.getmodule(code) + if module is None: + return + sourcelines, firstlineno = inspect.getsourcelines(code) + lastlineno = firstlineno + len(sourcelines) + source = "".join(sourcelines) + assert source == "".join(_get_sourcelines(module, firstlineno, lastlineno)) + self.inlined_sources.add( + InlinedSource( + module=module.__name__, + firstlineno=firstlineno, + lastlineno=lastlineno, + checksum=_hash_source(source), + content=_get_module_content(module), + ) + ) @dataclasses.dataclass @@ -278,7 +307,7 @@ def _find_code_source(obj: Any) -> Optional[str]: @dataclasses.dataclass class _DynamoCacheEntry: codes: list[_DynamoCodeCacheEntry] - inlined_sources: set[InlinedSource] + source_info: SourceInfo use_cuda: bool system_info: SystemInfo = dataclasses.field(default_factory=SystemInfo.current) @@ -383,7 +412,7 @@ def __init__( # For debugging/testing purpose only. self._cached_backends: dict[_BackendId, Any] = {} - self._inlined_sources: set[InlinedSource] = set() + self._source_info: SourceInfo = SourceInfo(inlined_sources=set()) self._resume_codes: set[types.CodeType] = set() self._initialized = False if fn is not None: @@ -403,14 +432,14 @@ def initialize( from .eval_frame import innermost_fn assert not self._initialized - self._inlined_sources = set() + self._source_info = SourceInfo(inlined_sources=set()) self._innermost_fn = innermost_fn(fn) # type: ignore[assignment] assert self._innermost_fn is not None if dynamo is not None: assert isinstance(dynamo, _DynamoCacheEntry) dynamo.check_versions() if not ignore_inlined_sources: - for code in dynamo.inlined_sources: + for code in dynamo.source_info.inlined_sources: m = importlib.import_module(code.module) checksum = _hash_sourcelines(m, code.firstlineno, code.lastlineno) if checksum != code.checksum: @@ -418,7 +447,7 @@ def initialize( f"Source code changes detected for {code.module} (line {code.firstlineno} - line {code.lastlineno})" ) - self._inlined_sources = dynamo.inlined_sources + self._source_info = dynamo.source_info main, *codes = dynamo.codes self._codes = {self._innermost_fn.__code__: main} @@ -522,21 +551,7 @@ def add_inlined_source(self, sources: list[types.CodeType]) -> None: for code in sources: if code in self._resume_codes: continue - module = inspect.getmodule(code) - if module is None: - continue - sourcelines, firstlineno = inspect.getsourcelines(code) - lastlineno = firstlineno + len(sourcelines) - source = "".join(sourcelines) - assert source == "".join(_get_sourcelines(module, firstlineno, lastlineno)) - self._inlined_sources.add( - InlinedSource( - module=module.__name__, - firstlineno=firstlineno, - lastlineno=lastlineno, - checksum=_hash_source(source), - ) - ) + self._source_info.add_code(code) def update_use_cuda(self, graph: Optional[torch.fx.Graph]) -> None: self._use_cuda = _graph_uses_non_cpu(graph) @@ -678,7 +693,7 @@ def cache_entry(self) -> _DynamoCacheEntry: self.validate() return _DynamoCacheEntry( codes=list(self._codes.values()), - inlined_sources=self._inlined_sources, + source_info=self._source_info, use_cuda=self._use_cuda, ) From 5937861eba645ca6a3f15a070fb75984adf5e068 Mon Sep 17 00:00:00 2001 From: Aidyn-A Date: Tue, 16 Sep 2025 18:28:47 +0000 Subject: [PATCH 312/693] [TEST][CUDA] Use proper dtype in test_cuda_tensor_pow_scalar_tensor_cuda (#163070) The test `test_binary_ufuncs.py::TestBinaryUfuncsCUDA::test_cuda_tensor_pow_scalar_tensor_cuda` fails with a mismatched `dtype`: ```Python AssertionError: The values for attribute 'dtype' do not match: torch.float32 != torch.float64. ``` This PR forces both arguments to use the same `dtype` to fix the test failure. Pull Request resolved: https://github.com/pytorch/pytorch/pull/163070 Approved by: https://github.com/eqy --- test/test_binary_ufuncs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_binary_ufuncs.py b/test/test_binary_ufuncs.py index 1c31d5445f915..fbbcd831397af 100644 --- a/test/test_binary_ufuncs.py +++ b/test/test_binary_ufuncs.py @@ -1480,8 +1480,8 @@ def to_np(value): self.assertRaisesRegex(RuntimeError, regex, base.pow_, exponent) elif torch.can_cast(torch.result_type(base, exponent), base.dtype): actual2 = actual.pow_(exponent) - self.assertEqual(actual, expected) - self.assertEqual(actual2, expected) + self.assertEqual(actual, expected.to(actual)) + self.assertEqual(actual2, expected.to(actual2)) else: self.assertRaisesRegex( RuntimeError, From cb7f45fd34b890fa7665837573ebb25744889568 Mon Sep 17 00:00:00 2001 From: Gael Le Lan Date: Tue, 16 Sep 2025 18:57:52 +0000 Subject: [PATCH 313/693] remove unnecessary sync point in AveragedModel update (#158017) Summary: The test `bool(self.n_averaged == 0)` is a CPU/GPU synchronization point that is called for each update. This test is only meant to know whether the AveragedModel copy has been initialized or not. This diff introduces a CPU-based variable for that purpose. When loading from checkpoint we also make sure the parameter is refreshed. After this fix, each `update_parameter` call is reduced to 6ms from 333ms (98% reduction). Test Plan: contbuild & OSS CI Test plan from GitHub: CI Rollback Plan: Differential Revision: D78074709 Pull Request resolved: https://github.com/pytorch/pytorch/pull/158017 Approved by: https://github.com/janeyx99 --- test/optim/test_swa_utils.py | 39 ++++++++++++++++++++++++++- torch/optim/swa_utils.py | 52 +++++++++++++++++++++++++++--------- 2 files changed, 78 insertions(+), 13 deletions(-) diff --git a/test/optim/test_swa_utils.py b/test/optim/test_swa_utils.py index ae9ff2cf01b69..8f6ce3e6f7a8c 100644 --- a/test/optim/test_swa_utils.py +++ b/test/optim/test_swa_utils.py @@ -76,7 +76,6 @@ def _test_averaged_model(self, net_device, swa_device, ema): # Check that AveragedModel is on the correct device self.assertTrue(p_swa.device == swa_device) self.assertTrue(p_avg.device == net_device) - self.assertTrue(averaged_dnn.n_averaged.device == swa_device) def _run_averaged_steps(self, dnn, swa_device, ema): ema_decay = 0.999 @@ -150,6 +149,44 @@ def test_averaged_model_state_dict(self): self.assertEqual(p_swa, p_swa2) self.assertTrue(averaged_dnn.n_averaged == averaged_dnn2.n_averaged) + def test_averaged_model_backward_compatibility(self): + """Test that AveragedModel correctly handles old checkpoints with tensor n_averaged.""" + dnn = torch.nn.Sequential( + torch.nn.Conv2d(1, 5, kernel_size=3), torch.nn.Linear(5, 10) + ) + averaged_dnn = AveragedModel(dnn) + + # Update parameters a few times + n_updates = 5 + for _ in range(n_updates): + for p in dnn.parameters(): + p.detach().add_(torch.randn_like(p)) + averaged_dnn.update_parameters(dnn) + + # Manually create a state dict with tensor n_averaged (simulating old checkpoint) + state_dict = averaged_dnn.state_dict() + # Create an old-style tensor n_averaged + old_n_averaged = torch.tensor(n_updates, dtype=torch.long) + state_dict["n_averaged"] = old_n_averaged + + # Create new model and load the old-style state dict + averaged_dnn2 = AveragedModel(dnn) + averaged_dnn2.load_state_dict(state_dict) + + # Check that n_averaged was correctly loaded as a Python int + self.assertEqual(averaged_dnn2.n_averaged, n_updates) + self.assertIsInstance(averaged_dnn2.n_averaged, int) + + # Verify that parameters are correctly loaded + for p_swa, p_swa2 in zip(averaged_dnn.parameters(), averaged_dnn2.parameters()): + self.assertEqual(p_swa, p_swa2) + + # Test that we can continue to update parameters without issues + for p in dnn.parameters(): + p.detach().add_(torch.randn_like(p)) + averaged_dnn2.update_parameters(dnn) + self.assertEqual(averaged_dnn2.n_averaged, n_updates + 1) + def test_averaged_model_default_avg_fn_picklable(self): dnn = torch.nn.Sequential( torch.nn.Conv2d(1, 5, kernel_size=3), diff --git a/torch/optim/swa_utils.py b/torch/optim/swa_utils.py index da4f005820c68..610a44f0d0d11 100644 --- a/torch/optim/swa_utils.py +++ b/torch/optim/swa_utils.py @@ -116,6 +116,28 @@ def swa_update( return swa_update +def _load_state_dict_pre_hook( + module, + state_dict, + prefix, + local_metadata, + strict, + missing_keys, + unexpected_keys, + error_msgs, +): + """Pre-hook to handle backward compatibility with tensor n_averaged.""" + # Check if the old tensor n_averaged is present in the state dict + n_averaged_key = prefix + "n_averaged" + if n_averaged_key in state_dict: + # Convert tensor n_averaged to Python int for backward compatibility + n_averaged_tensor = state_dict[n_averaged_key] + if isinstance(n_averaged_tensor, Tensor): + module.n_averaged = int(n_averaged_tensor.item()) + # Remove the old tensor buffer from state_dict to avoid loading it + del state_dict[n_averaged_key] + + class AveragedModel(Module): r"""Implements averaged model for Stochastic Weight Averaging (SWA) and Exponential Moving Average (EMA). @@ -215,7 +237,7 @@ class AveragedModel(Module): https://paperswithcode.com/method/polyak-averaging """ - n_averaged: Tensor + n_averaged: int def __init__( self, @@ -234,17 +256,25 @@ def __init__( self.module = deepcopy(model) if device is not None: self.module = self.module.to(device) - self.register_buffer( - "n_averaged", torch.tensor(0, dtype=torch.long, device=device) - ) + self.n_averaged = 0 self.avg_fn = avg_fn self.multi_avg_fn = multi_avg_fn self.use_buffers = use_buffers + self.register_load_state_dict_pre_hook(_load_state_dict_pre_hook) def forward(self, *args, **kwargs): """Forward pass.""" return self.module(*args, **kwargs) + def get_extra_state(self) -> Any: + """Get extra state for serialization.""" + return {"n_averaged": self.n_averaged} + + def set_extra_state(self, state: Any) -> None: + """Set extra state from deserialization.""" + if isinstance(state, dict) and "n_averaged" in state: + self.n_averaged = state["n_averaged"] + def update_parameters(self, model: Module): """Update model parameters.""" self_param = ( @@ -280,28 +310,26 @@ def update_parameters(self, model: Module): self.multi_avg_fn( self_params, # type: ignore[arg-type] model_params, # type: ignore[arg-type] - self.n_averaged.to(device), + self.n_averaged, ) elif ( device is not None and device.type in _get_foreach_kernels_supported_devices() ): multi_avg_fn = get_swa_multi_avg_fn() - multi_avg_fn( - self_params, model_params, self.n_averaged.to(device) - ) + multi_avg_fn(self_params, model_params, self.n_averaged) else: avg_fn = get_swa_avg_fn() - n_averaged = self.n_averaged.to(device) for p_averaged, p_model in zip(self_params, model_params): # type: ignore[assignment] - p_averaged.copy_(avg_fn(p_averaged, p_model, n_averaged)) + p_averaged.copy_( + avg_fn(p_averaged, p_model, self.n_averaged) + ) else: for p_averaged, p_model in zip( # type: ignore[assignment] self_param_detached, model_param_detached ): - n_averaged = self.n_averaged.to(p_averaged.device) p_averaged.detach().copy_( - self.avg_fn(p_averaged.detach(), p_model, n_averaged) + self.avg_fn(p_averaged.detach(), p_model, self.n_averaged) ) if not self.use_buffers: From a4e74f416bc584d29e7204d23d3d1dd4b56b8ad3 Mon Sep 17 00:00:00 2001 From: Tugsbayasgalan Manlaibaatar Date: Mon, 15 Sep 2025 13:58:46 -0700 Subject: [PATCH 314/693] Fix error message (#162487) More proper fix here should be that we directly replace shape_env with correct sources but it is bit involved as we have to manually construct dynamo sources by hand (need to handle list/dict etc) but it is quite easy if we are operating on a string so i do this as post-processing step for now. Differential Revision: [D82478647](https://our.internmc.facebook.com/intern/diff/D82478647) Pull Request resolved: https://github.com/pytorch/pytorch/pull/162487 Approved by: https://github.com/zhxchen17 --- test/export/test_export.py | 1 - torch/_dynamo/functional_export.py | 29 ++++++++++++++++++--- torch/export/_unlift.py | 42 +++++++++++++++++++++++++++--- 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/test/export/test_export.py b/test/export/test_export.py index 3609f4fd52413..4fb3efeaf564b 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -11825,7 +11825,6 @@ def forward(self, x, y): self.assertEqual(ep.module()(3, 5), 8) self.assertEqual(ep.module()(5, 4), 9) - @testing.expectedFailureStrictV2 # ValueError: Found conflicts between user-specified and inferred ranges def test_dynamic_shapes_bounds(self): class M(torch.nn.Module): """ diff --git a/torch/_dynamo/functional_export.py b/torch/_dynamo/functional_export.py index ba89627453849..2dcee4e53767f 100644 --- a/torch/_dynamo/functional_export.py +++ b/torch/_dynamo/functional_export.py @@ -27,6 +27,27 @@ log = logging.getLogger(__name__) +def post_process_error_msg( + constraint_violation_error: ConstraintViolationError, + mod: Callable[..., Any], + args: Any, + kwargs: Any, +): + """ + Because we trace a different callable, the sources are all messed up. + Manually patch them so the error message looks correct. + """ + from torch.export._unlift import _get_input_paths, _replace_sources + + assert isinstance(mod, torch.nn.Module) + orig_sig = inspect.signature(mod.forward) + flat_input_paths = _get_input_paths((args, kwargs), orig_sig) + constraint_violation_error.args = ( + _replace_sources(constraint_violation_error.args[0], flat_input_paths), + ) + return constraint_violation_error + + def clean_nn_module_stack(graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule: for node in graph_module.graph.nodes: if "nn_module_stack" in node.meta: @@ -398,10 +419,7 @@ def inner(*args: Any, **kwargs: Any) -> torch.fx.GraphModule: module_to_trace.forward.__code__ ).guard_manager check_fn.check(f_locals) - except ( - ConstraintViolationError, - torch.utils._sympy.value_ranges.ValueRangeError, - ) as e: + except ConstraintViolationError as e: constraint_violation_error = e if ( @@ -443,6 +461,9 @@ def inner(*args: Any, **kwargs: Any) -> torch.fx.GraphModule: 'Set TORCH_LOGS="+export" for more information.' ) if constraint_violation_error: + constraint_violation_error = post_process_error_msg( + constraint_violation_error, mod, args, kwargs + ) raise constraint_violation_error return transformed_graph diff --git a/torch/export/_unlift.py b/torch/export/_unlift.py index 59c5ade5824a6..0cf9b80ff100d 100644 --- a/torch/export/_unlift.py +++ b/torch/export/_unlift.py @@ -586,11 +586,28 @@ def _get_input_paths(example_inputs, signature): """ args, kwargs = example_inputs - ctx = signature.bind(*args, **kwargs).arguments + binded = signature.bind(*args, **kwargs) + binded.apply_defaults() + ctx = binded.arguments flat_example_inputs_with_paths = pytree.tree_leaves_with_path(ctx) return [path for path, _ in flat_example_inputs_with_paths] +def _replace_sources(result_str: str, flat_input_paths: list[Any]): + """ + Given user specified input paths, maybe fix up the guard string + to reflect user path instead of tracer path. + """ + name_mapping = {} + for idx, path in enumerate(flat_input_paths): + name_mapping[f"L['flat_args'][{idx}]"] = f"L{pytree.keystr(path)}" + + replace = result_str + for key, val in name_mapping.items(): + replace = replace.replace(key, val) + return replace + + def _get_input_guards_for_graph( placeholders: list[torch.fx.Node], range_constraints: dict[sympy.Symbol, ValueRanges], @@ -802,14 +819,31 @@ def _unlift_exported_program_lifted_states( graph = unlift_gm.graph placeholders = graph.find_nodes(op="placeholder") if check_guards and placeholders and ep.example_inputs: - gm_sig = inspect.signature(unlift_gm.forward) - input_paths = _get_input_paths(ep.example_inputs, gm_sig) + sig = inspect.signature(unlift_gm.forward) + input_paths = _get_input_paths( + ep.example_inputs, + sig, + ) + + # TODO (tmanlaibaatar) + # This is band-aid solution to export new tracer replacing + # shape env sources to flat_args. The real fix should be replacing + # shape env sources to original user sources but this is quite + # involved because you need to carefully construct new sources using + # dynamo and replace all instances of it inside shape env. But it is + # lot easier to manipulate after we turn them into strings and only + # time we use these guards is during retracing or running exported program, + # so it is probably ok to have "not useful" guards on ep for now. + ep_guards = [] + for guard in ep._guards_code: + ep_guards.append(_replace_sources(guard, input_paths)) + guards_code = _get_input_guards_for_graph( placeholders, ep.range_constraints, input_paths ) ep_guards_code = _force_ep_signature_match(ep._guards_code, input_paths) - ep_guards_code = _force_gm_signature_match(ep_guards_code, gm_sig) + ep_guards_code = _force_gm_signature_match(ep_guards_code, sig) guards_code.extend(ep_guards_code) unlift_gm._guards_fn = _convert_guards_code_to_fn(guards_code, input_paths) From 69a5a5ac0248e3f19d8bfdf302da643e93fe0b0c Mon Sep 17 00:00:00 2001 From: Shangdi Yu Date: Tue, 16 Sep 2025 19:09:02 +0000 Subject: [PATCH 315/693] Add to inductor provenance tracking doc (#162975) As title Pull Request resolved: https://github.com/pytorch/pytorch/pull/162975 Approved by: https://github.com/desertfire, https://github.com/mlazos --- .../img/inductor_provenance/index_2.png | Bin 0 -> 576095 bytes .../inductor_provenance/kernel_source_1.png | Bin 0 -> 287510 bytes .../inductor_provenance/kernel_source_2.png | Bin 0 -> 356569 bytes .../inductor_provenance/kernel_source_3.png | Bin 0 -> 764944 bytes .../torch.compiler_inductor_provenance.rst | 26 +++++++++++++----- 5 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 docs/source/_static/img/inductor_provenance/index_2.png create mode 100644 docs/source/_static/img/inductor_provenance/kernel_source_1.png create mode 100644 docs/source/_static/img/inductor_provenance/kernel_source_2.png create mode 100644 docs/source/_static/img/inductor_provenance/kernel_source_3.png diff --git a/docs/source/_static/img/inductor_provenance/index_2.png b/docs/source/_static/img/inductor_provenance/index_2.png new file mode 100644 index 0000000000000000000000000000000000000000..6e156bffdf0af94a88dd820681d11ee7edba6056 GIT binary patch literal 576095 zcma&O2RK|^*FG*X9wr21k{~)UMDKMFA`v|jy#~>u_Zc-Nf+0%u7%id%(HTT%^n~br zbfWh*+HdlH?^mw>|GJ*%J=fXJo_%&XXP33^b?x$N|g^l`SvKj;yFVQh==?8t*r4IAiDJe?g$HS-Uy|_3n=vQ3Sy-?V@U(5s_HNJ-Xrc8O;tj7iuS=e&a>kMpJhEW!qwFC%qhul^aK8J@ ziim(RCq_KOfLUk3EzI-vFZnS01PRsmMhOy+-}6&UKESxL1dtIJr1d9R-N+muhUin^hQNJOrDcqfBP;>j-C^M&`g8<$s7Z;p5|Td^ zk*I4=d=Vp*q+THtU`glfk669E=c*9!N&*u?X``~A#Tw!eF5KRtLYFL8`NFta^{qa| zg5gIfXF%6Vwu8^~ZoRmWm2n^C^#^{sL;3t`gO8{#tu@QZ307>3?23MTS`3-s-Gyca z*B5s%_bHXS-51+J9R|b8Yqi?Mgr1}d-6IXNVpV?jR)wjsE1szH_QXz45=CcJ+K+n& zSs}X#h|S>VI>n`(gjbo@0{6aYKA3+%z7e(*#~NY@3MYK0UAFl8;{!p{_C=pT#9N6G z{b`L9YT?(>#9{ji@%6MA;C}1reA#;<7uSNrJt{kSHAIhQJy$5xy)XO3v&5_f)BwVJ z&zn&(j(lnyXq+3dYc=kU=IF{;J{Mj#O$cl>5dfL3ctQwHP5Y5Bwar8{?Ap z>q(7_x-k#kvekp%2wcnKv+h$VxthDf9&I;Wn7)!HxI&x==w0+AD2Pwq3D?OdN-tFl zaevdA42+m7OBAHjg>GtVm})#n}s=#K-x(b{)LJ>2UG_l4FhgxEyT9db~we=rp`OaCSm=a%P_ z$Cwvg&`3D&jc~cQD~&IW7i?oiT`J+yNm&M?3R;v%jEIY%Bj~wQI)eM&m98ma{&CDd zyqhM5|9U8pKsf&r$|G(G zq5uGOipea7KN2Y-9W9LTz(MW&`0~dMOG{&(`>O3ig0x~x-0fVY z_rKk+59&YT8hw9cvE#zp*KAO8)41g`jFqq1X6|5b8Ftq+LVUqyxA634J;k+JCWs7p z4B#h>7C^>4Nmk}$(WdmGBnS7kdgr5dQ=UB#LXIj@obu&Tx`(-k^&(MWzDN#a8b%2d zK!wcKEay0j$x`-vaU(ytGN=;ef$(@gtnQ<#%Z7?E>1tWhE)OKtrygw`hc}lC*lA zrf*@(FPGstI}*`*w%Yb0Z;S_E@_D~_*XVKDKA;9fKuiDNl_-N0VVR4n}24Ym$F&3TR6@z~6i#!(F8k&rzp=nEPu_B>~pq-+%p`DRZ zWUyyQ*ND>i)W?`co+fMJXredsV8-D4$oEcX%*XcK$UfEFE3kF+|fGSQf=$Pq9V6Nb&{P#w*)RN=puHq#|XQXSLnOr5aS zSQ~Q-ke%RI7IuMJHJp7)Xv(c&S+ZALcT~4qSG6eRTe`u9!NIqPZyQBBlfN7;9IVHO z$Bir4C(9k(9mOY1(T}&LH_NvQw)iH#9B&_6oU0$FEs`ukJC!>|KGdycZa2GfdVKYU z&2@QvaN+TCaPt$yTKKJ>jx6>^UT-d~#IIJ)tVLCz-ILwR+#?P$_`LWClinoRgKfbG z4xZ#+(D|IghQcn-iVa@z5Top=7u90bn%j)qLMMNnFjA04dqhVvicd&w(j2=T!!;2$ zZ&wNXb^E|PKu*;VYSx1Tkm@P2VS@PMgKN{z8R_-_C-*mFIFMPCk zK5{B^D1JhX+u055DO+B0@p3l>NDy@qk`vd@`?#*UYW`&r#1fPf#2`N@kCfMuKY8~P zQGwWacdjt4uo9d~#vU9JT1?*1ExZ7PdDb@yM3=(NZCXd#7TXG2nVyb6>0&ix9q20U zs`}{th3CryE%gEB6zT>soaqP8A0Lvxlwrn`$JfU>T>D(z7yH_U7xLP7!UNMNV@ieQ zHL}#RSj%{k>G6r$Nrw+?Hv2lDpv7}PkjIr4T?uDhaF#&*wzVYGC@IDm1BPmBBKrc&c^ zwtY|$Nap8Zw{hGVPcMhPqpl{}%Vhy$`wygP4mzA6~ze6QY>046KwYl!pKm7%wg`GG2UI%nspovyU6M24{ z(v+8afjqUgvoa#m!pH7hS4qHKd#S##lO{oNkR=q3Dw4{e`)3YTwc0NtkoyX4IT36{ zm*~%TH}Oh)MO#C$#v)rrKD*@&j5D$&>WmXSK@?P3e)R!2=Vhojud&GXRj41Y=Yc==5}oS4v| z)AP;k`%>(bG+&%>WZRI{XyZM(-RnzuA|fs!;xN-YZG@)Se7Sk<7>K)~BW|UrWO&Ww zAn_V*yJ+4qKln7;)yvZCrPoV6cNO>5kELvmyjb4QXeq{-i8qsm{%R-VJ&VJOs!iQ`0?glb^lNU z)4s0CBt|9lyzl zzegbd^0Bfq{`%6=&C1Hj-PYN|NWI1Zp8#}uX5dahaF6{@xS{;w!7jf3F*|L24}Ent zDNAQZKJ(Yk7FK*PN0&eK5I|v4_^6|mhdDFM(ZR`G3MRw)uN+eN_@88cR_1?Y@vxU+ z)mMMc{Mgyeidl?LfKPx`_7*cUGt}+1wbTp6r~fXF|0Tm}>*3)d#n12U?ak*c%;)T8 z!!IZ)DakJjYoi_gL94tMe}hw(bOv;9{k|6PxwmAj>zor{N^vlH{5dd)4IJw0St zS^qTjKgWMPrxncZe_L{L|MzR*Uy%RL9ezPR0sjA~8($RqCspdX9n8wXK+(<--)HzX zWW^;Vp#RGM|GV?QE&iX9`u|%}P+U~({}%l}xBkCHb=eY+w_qA-|_l);s1N{ z-vy!ke_s9nbj5%5^S@H@9W8qc%KtxuCVOil)GHQ0j`Vhln%ej)ewO_?Zbak%aQ@d7 zAHS(J?-G#DOF;05Kv_{<8+K!J=B$oa$A5LtBHb!p{sun@7b26nPwNhQ2O9jrRZJx{ zYwH(8^Lns<=vr|KDX8o{=|S}c@={NMrclo}waFvH)qsAJ>hW{3 zB;S%%W7!S(`fzc9sNa}xkxtz)E_J8CBtSBRg$1C%{2!dJ3Q0DpS{w^&*tJN4AN}(w zLI|231pP-<&jnD1&3VbqvD^J~wfLqLT~wJ!f++uk^IV=8F?IPiDVv3o@IN?Pc>VvA zo?r%S699YrVjYL%DcFDtTB4E;#a3PkCS7R4Uyd&l&8!pnm?#f<~%l4v+hta@yhtp zkFh`JWy$87$dbz59vHjsKa8cr?I0aeCwB69{pasTg9FrerOjNq)J-aO`bW-W1N@6G zwz$(Ti|e+(!=^jCSGQC%Cmjt{s`MtURIINAt(#8U`6tEXMlb6uzP+xWmJ3m4_=geh z1?iW~q2qrIlA7Nw=^NRDKISqtDLJhwIk;>b9Byn%?3a#uE4DtBKl$ZYx-a&~XfS^M z_^Wc2(pI$+t@O+Ek-RNI((1H)&zDJm*L~x%0Nn>QBLENQR+*D&;lrunmwAxz!#&4E9*|%bYlzjht0TXUfwY3oPEtHE{s;f}5+w zcxK$G{$b(0dPvxN%W0_FAN))1`m8?`Uq9MW?rJYObfdV=sb)FRC?w-@(zqC#p{I zKv(anEKVr$ocDP*4h18e1J9@0C<0I0Agz=OumMVP6#^HB!N9hV+m38Kn{_*lZ6{6F zm-EDOr}C;Br_})))hVwI+W7-qYcBSCLwEdle2eG_6_7Hr;GHP^@XZ_a`;6`;cS5c6 zzb{XUF+cZzq#K-WUtF78^;1a>H!$By%1`gqba^}~-UmJSF~1ibc=1XTTI4a=cl?M# zG$dRJIGS-jy&4k1_x@ym9as3th{>@0 zYnp+RSu#s36n2ab6al;MLN8Bdn~*mF5(}vk_3$P5xOu23zvyb1D5N}Edb?)+ZVBuO z_*diEPiJzovmU*W!0VGh=Vi)Mo?4Fhx#O=&GH+nZ+-8!{%Qa2s)dQPz5a$gAX**KH z@4$=BK(dUh!@N*c{)S2iOa#25J$(&d+FJylRXPEr(5S6KB8PVn6C%H=2@4xMiN?!HL z$w)kS1l^tr`T9YQ-zcl;oHnFt$8;8Ye`}yLc&r#q6=rL8wVyNvKqE}pu#~?@JfUAq&nL&o zHT@RAs4>*?ExE(b6u0XGdGzey%Af0(;}4FjoBUx{2N|<{tNq7p&4nsVbDmE;mM4x& zu5l%8>`BH##&|VR^lA+D&`@Q+>1ym+D%BecSEpyC)G-TxRL1=FZm5xSgJ;badTO#f zHF0HW*_us}PTarwSF+Ua%y%hy_nk$5I3~J|@O7+?=$XwZx@I6}&`Qt+(9NI}N?dGG zeKzp(e#9L5Q4P*!ZxpgEepU9CGMvFYBdPfOu2VwpWci>q)89_2-DY+dCVX$jvCOZT z6Ql)my%tYPy`-o#+ohBc-6~fF0+9baBL#5q3XurEG}L z0XH<&7~Ac#cMcm`ly%OihFse*dQK-9t*&h4d8tQpn>d@L85N`^U@~qwF`bWhru_)Ur#s%|_gYAZB7Di}or!f!P&9Bh`<3~dZ8hU=X-GIBr6i#CHFuB{ty z>CrU}@>Z>Wvi3{JqbreL+#oeORiI3&O>})UadB)P%KyQv@Pwo6m=z-VygA znZ}alIXVE??96r%)MWgA^Q6_3)7d*44TC<%d$_}WBd)_WF09I+>R2a=QP0#Uu1SQZ zcXov_hxg(gf&AHKMZePtWGCanX~fIrL$R{YeMvIM{`6z#i?XJ&p??cWzBv;LDAUwZ8i-#0uo__u)_TQYpPS(hap-L6&I5tT@lpeC3JD)*Si$g0)8vP z8FuMV#dly0iO)ci#4x}0ml>0>W`UPSKc|%H|IFc^Qy3z|)O;ut7SK3*eL3sgyQBjj z>fT<4pt%Fu@vqWYRkGn}ytWSep3&hwQ!8t74Nqx|;?4+{mPzj@TnsR?J9kge3@nyC z+4F5tmwp3Av9lu|7uzeTzW1rdZkV2YpT|qry@fhVE?l?PpZ0ENfRDe=bm!LM*jbOa z>31o!y>tJZt^OjhfDA9u45T#rkCbYYyBw0^Es>E4ddq@Ub;V<6cJ*sV;bS{_b~yXqoRZ4B z&OSUPQe&$|8QdleC+F+4imItI&a%5f6qV@aJu80SiQU=j^FZgF7We)*y@Zifqg7n} zhI@OVgs1UX1;yWjLH%n0nsWM$!Z>!R!n@KyA-1Mf_@{KPn%aReYbo@&EZT9$v|`B+ zj#lT0TV{{6+Tnq(&N$S6aZs%Jqdy}MQ$y*;m13V9X_c{#J4^XYX0OP;2ep*fg5Awy2vrIaalN>>p6k6W6#feAH7Bm1m`~ShsE5 zJ{E-|&{~blxOfeI1sA*p&1by)twhle>ku~dRWUNvA9Cx7Vkj$-dRVwF*qh#%ZZ)Q| z*xysKpIlwCzy4j;iS3v&2VOm|M7d5Ro+UD%{(FmiK1?ZpYv*cvC*-8@q`{F^#qy{+ z@VpvD`TYyFuoG>U+@3E{BesDPiC%Lx>-%1oHUkzjO&Z&>b4di}OHwXsW!%TLj< zO5agDT;89jxSFGwEU&EqRQKfV7x^zwCCN4Dx|aWAU*99D4zNfrZAEqaez;oTzsiF9 z?@BH1Sq~=HJmvHqj7uK|i2H)C&{PJuBHaD#kXn1Q)ubs+jzM49{Q}F3^ZMQ> zshyeyu8O{sv$5J`hxHP@^aM@Uk^>!GlMrpVzJAHom*}w)1Ng7O`2QZ(-eS+2SXb?7=o%HK3Vs zU3iVtkA3J7pA6l=HXfABzFJe+t8a^hRk9I@iyeR7ac&>+JeJg*9SMPN>7I3Y-|vY^ zXWB8kCfm)|^e;eO?0aS`&Rd@u`ujX=QK!|>9m$gu?+sb2rT+(MV@4Q(Kq*_aWbYMK zV<)i{PiZQ67|h>u#3}pl@i3fWIAqI!uNC=s!rZVAjvhnJu#JP6`+20nKx!B-(0H_vkSX0MWTt2}OxZFE*o~UB{6}kHcVx`tUB6^Np zkt)XNUDaPK&8`mRxu=_8b(9X7!%6B79rnwB3%ZlCO8tR~`f(uSKHzPaTZ zD4R`l6-)hG(6#C|x{r;<{tT^sm9L5NuH9<7zA7x%T;?xH5Q?!DbzLrEaM(n1WW~q4 zfKMdJB;$;Cj8S(3Ma3exwHsx*`|H|1kXoPw^)4Sny+&hW>mS*#>3G@QOwR)B(K*$a zm-;Q`=cP|}Q1C0J8+L#xs*~!M*ozz^j7E}f4?REfZd}#cTgllP(lI_ymp+ylmH1l! z#%!&pPbz);yqv4ZYfz4eEb^VtNs7Dw{m#?f`epU=KQnRb(tGy$>cr^&weO;< z8yqFpzka!Q@M%TsOv-GTdrG3fm6QChy9NHe+fo_+e`}pJ;~xA1h=bcjkq(684-TJD zn+yZ^RlmWLw2%UjNIbp{CGYpe9;A@CtbDQ|r|USW2K@y9cMmDQhM$ZzQ1=bOdzk;C zcg29(=&-vX$5C8x2^&Jchb=`5DC1R6PrNN8>Kc&-IZ#hLI26p+RO3SRfV;Ar`Vsd2 zm&+_^4Hro3>S;ICsoq{uEdTjdwOIt4sbIWK0t-XXM{4~9_DIJJ|6|%?tnZF>q4JsR zD;))s<*BJ|Q_t5C8Xn#N7zxaCGvMJ&sr01qy&8uN&6DLDtl7f z7Kqcl&gVUU*uUeqD&3}|XCivh;e64ti)uPUMSl8zg15+L+z$<8j+pie{c*=hiI)-) z+|TsRID2*J?fLr`OdW?wEql16ZH|bo?)hG{k63r42zK&~oga@-WP^uWp8l>hE|%?CmRGrtyu|i!^7WwC%p9Ia;lAdn`xb(YmbHu~zWfOO zJ2RFf=t%X|KwA-{TbfK4wljurQVr_~Us(-kcn)@kXsuv?q#VW21DcCl$n zJt0~ocRI04)xhI@+cP}Yoq@i4vtZmX-jVo@Lbj$MZWC`k+KHLMmlJsT>eMHhJbuYL zo?+rNr1IgOlK0eb2N&gK-PrR=wPsQW;rYHs%P2`}*mxVGSgK1a@SPP+noWc<+_R;# zZjj{8KCkph(Cr$d6^%6K?`ygD#@y1_!;^n;(AX9(M$0v}cLJOlq3cbqiyaV3&d2GS z>x&hnV0c0F>7Zp7Bb^hW6EP!f&IPxYM^R(@crUC9z~nm8gi}bck<>CZsQw5Jiqw**M?YI_5y*l;)H>ds> z!&3RIdKlvxID--1>v?d|q4i6E*73zGrp_%9JWax&8Hh*vgNYgUXKR-(RriHk_#1#u z;8R5cRa4n{pr$RO-S30bd1Z`mpQ-)*ie)HV7ad8);W;{GRGHX{VmeI>>pzZFZ(oq3LIF*LVCGeRI)5 zA)V9y$5raa-S_hc8P|SOiyimA?ST((__s)x2y{k#Jwf6#pH8pJjxEd?*CRy$>N|JY z(1scx=Yp$_@05tGjkgr;=J5O5ffK@lam%5yho8}Rn1hsL0A4yN75%bxWfm0|M~;Ng zi3sm&Ony#YH%k<$nrC-(9eKLo>pYLgaZAVB->JeAjEt7$oXzMloo-bKqp~tUj(1fz z@fSm>K~MjDF)w?wta#b*3U*7^7mafj>YV0Ua@YOWXL8~<$i)4hp1&%}Ffbj3aZ*TK z8Z+Vq1bci3V1+n`i{Ca2rK)u6aqMsAql*MXq|e_#=bgzohiq6^L-qpX)zJAA^FJO1=$GyBzp1Ye(zgAV~V@K)-ECT@1lyaC(L!T_wqnk&aOP(dZCg6dvaEBJ9 zx>EN=&Pko70(9U{d($CBsZRYllOI4%hx|+pD2e!oR<)6&8L{+dVrO&NCF2T)OWdKY`r zSbap~bv^2QsSBQo*YfW$m2q5F{%UfyY<88i{KaWAznL#RI5C2qXR?w46iITHueG$o zkQ-fR{Q(r~fP-pEE6|mt{O&ndHQFfYNKiKd`4joe8xrOU&Ru2kq`&W3Y|rCgdx?`a zij=>M&HEJT4``UgfK{HeGaisb@xXar;Q+#N5* z-mR3>paDIss#5pm3_aygchw=TH^b9pXk3AGORpzt0}VHqY*9D#9EB`U!AUaN9mqT* zNZv7B%n?r|53YVN8>>u^&33H(cpei;l6#%BYCWQK)e#8aq`e6cP~eb9b=k*h+`p8+ z0*HC_P6?KXFv4+&D+ftUPtb>lgqwf3rF$8_I}bF>R_^|mT`_(A&AyPt>9*0;-)*lo zYskMMZTb1266gl*-|-o%3(<^B=ULXuLIVim`s#(Thq>ZaIRjxdkOq5&+1E9W?4KcR z+(jb6h-EyYpMvVZ+ICf9LzI%2XGoqwE{QDSfo^dZiBgqn59Z%0MI*iNQi+&Q(^V^qe8P8iH8}S@*bs% zxgH7$Es)EwN1xTsU`h8{245f*Ki>k}v)av+cD{4y*AYb$c81qh*Lgq*_IPCuIkCNQ zwiQZA0oH;!MV^6lG5=?NSs#z@=*j67A1v5QdvX;I!jhN=kR~4;8sb(5b6-UVuUIbw8b*GxnF1hF>@9d~h4;&?i+f1Z_F=e1_Q#_?t zd1aB>{g{3sy|I`-B5#&z^^jw+Re$gr#z0lo{*qWBe)-YJJAGdA+^RcYjvsjKUkT{~ zjkme?2UeFh8kZQ`?>Sn~$H!P3C~rATsKPBQ>ene2Hi);09IQgmY2-EO)AR8Y>FEPq z{u2U!D&nl~=Wm5ZUN(tZW5mhB>=x?GUgcg#@H4eax{_P6K1f`k+=x7a)%>Jmk^Ebt zb|a$s9)gY~Xe?Cu^^VJ8kq-E(zzTutlCLbweo)q3)@>ixD;76}UPja?8=Pk zS_!g}l)>^R?-zi=UIp$^2MS)a$z41K>xZd2#LErjKo?;ENhQkjIJvVpNLZLXPR3stHCW~byrUR%ec5#FT~UNX+V{Zlg#Ro+>S(^<=_5A9t3RkH z^bg1A=ve4abTs|T_2<*5VaGmdFKMBs7!eA-th9ERz7k4YSq!kJ$m z-p{5gHyjY$ogXtw_G!=NzH{PSNSqr+$J>Ge)D;qE#bED8|27Qq$K2aA?8?+cBp5;xMRGxWP-@Rl~+SG%%pwT zslF}C*DP5nky%zB-xUz!L$(B-h6aw~3jDZs_u2OM4HrM#K)YvS)A4)*_vHSkZ5^w; zizAlB#IUXOC9Hz>=2ZTEzwF<77QFHY)kM!(vu02#x(M4jW;Kd|SA#)u?a(;2o`W}d zs%hk0d2RW5JjwI32xo*(;vB?m8EgQn(hrFlpHJ3r)-@HbH|G)LEbZNYixF-mzaf7G z*+NP|F0a>1t_svorTI(Sm!SQL6@60n;{xHgAhYnY-Npq59R+e{HmnDRtCwfL`J<^{ z+*)urHGWH3Jtn5Q(t+(eUYwOQYgpWsRv!g}NMs}q5p*{+np49R$+S4IwhLfVtjw)2 zfE0ptPdg@%_kAZ@#8)M<=17tbucHd6WqW6`etSSF5u>~!nF3EAM%Je?tV}FUvp2R@D5+`D`vq%JyMZuhKOI2gRrvU#Bw|4*z>|4MP)wk?I*%4OFE49U+ zcLrn-K5fV16!sVm z$wit$;W&K353VaIuH8EYvtTf?{d{|V45&B-F@N&OwdXEl+En+Hf?x$1S*vcaPyrjVrxmFCzctL3o25;c|{vL%1*>~f3V zXI>ytB#%3XZom>jMDeP>SvVbO+GSLQsECGj8p5dSLn>8)PQia6D9b{-zVMI%H5G5N z-v1nt%uGqa6WC1}ktt74l~GNtm4y*z4a;=ED(o+>u}7Y6as*J4!`MM;!KN;jf&4WF z#nHC=S0_z@12>=3St=3?M60;GEVdE>CS!Q*;<@AJa!q_%)EN$`3GeGpq5uj2JJ7UK2-%1RGdTecY&yBwVN85j1)3r8VAt>N zKZ2Xzcg9m!Fv}rvM;bfJON9PYz)n)UHd+W5V8Sz@y?Vs!ACi}Nkq!Dlx3%aGSJcAB z^?;~J6}}w+*cbBHHIWV&iS!V554)Rm^Wm%a%bo{nrwe}BS$j9B{!1L#+VSB^Zgmei-{J$TsKP4MZ9HbFW6x7iSTKy!; zb%mO}SV0ee^_eS3HFa@!W`1_7#voDKs5E0&;}x4MT}DSZj;_sNaJ9f>3HA3LKBcli z2bP*ohmiGQ>1h(~svoKApXkLv>S1&9))vzkLBVWDWV_K8rmRwQz*)gp67T&Yhv%#S zU{gzg!p?hETOfX1F@knsob@(6)@OTS!gwQHP|$f{MzJX}xy>|gIhd}M_G2_b>v^}R zDGy{C{+-f!lV~kLm6G`obU(gO%>lkfnaT6f=Il`xQGbB}f-Y$F+rZ7}@yGO3VM_I_ zu-LtN76=LClvn&2LFH1Whys2d$(hOr3kH=6+?`If=^rI=QUrUANKXuVr_Zf4yw<03 z(q*l&dq*{0nsUo2m~nx!Ec|z_Tw!RAcsmDT96(4W3+V7!MIGbiC<-(!<}^LNP)Pjo zXD%(=i*hQ^X4yt@y@xnp(h^@zyRMc2)e#h6*p_$B#J{wf3^7js4`b~fK z(boO-gE<)=L2A{5#~#!J9pi?vN!pCo!5rWN(T~mxFwwZqK_K_tX;=*9G>HYCh&ls^ zv$E_pEBfM1vS1dmTWR9TX= zzvC0;yU$XSz(G47B=I15nmY8=;~172WD}l_;eA+3MIqkG#Ny13pexIQPV2JO>=on8 zt~UZjZ#d!=%JXtX7WweqpocA6CObZQMrf*+YebHk4i(uP^P$g>;1+e?gwUxd%IQO5 zlxvH%{|r^sQL)64HpONCU`TyIm~<83b&ge&dK1qL4U;r*9&|bE`{Y+>)f=eMtwDX} z4{O!+wE_oDau&H_;6qXPS|N6)AqCBk>Q>%g}7yBgQ9?SSSvtARV8sQmk-hC z{7_Ay%9mnZ@9S~FxgpXCdOVD*HpB$BCT-VD0I@M?M@Y~cjP&ui%2zP5VuMvs&La8!gH$cR`ruv42 zdqn&&`GfK{i}X3KQAjkGZ)?nR1l{KrA@~76+#=1bAub}X?~Tm#Xxv=K<9vLQ622=6 zD-EiQ@GE;o2xAJk^w`RWb9zYzc zCB|BOa=a!k#u{(MK5~;7KOl_20bKldi`O51X9P1^>P{A*aGinYRC&(HH&$v{cEvV}O0rQRUMI zp62oG_Qk)dfvO6Yi9%deg^E_y_%*k=I)j{Oqh4R#!nAOt_%{=<(&cdXYC&G3)rSKr zv`3MwnQOb|KM1-EfEO)??|bWphM)VcSk2e0*Wuj7wmY7MKsQ7x(tDmmKO|{=RZyh~JsH8pG83lznVSgfHDyd^NYFtMLBxAThp@-W*4#&mO zjG+{0!~tNDo(*uC=FzEhrC-DnDQ#kE2FDMc9$h_gd?dlU^XjNX>`3zOJ-YCgi2j{v zH7`=gd!$n0{iTL^h9{8t-njSTWI;dA6V&Jr7Rt5F)UhYv-!kDnv!=#$d&TwgYSstO zKL!+$I8|jo*rD;#V+aR-Y^!UKJ#C@D^T>!$3CMtNOJo-dB`MCk#vq*BneGPS1azZ^ zDy)7c>|P;g8pd+8 zsv)@?kFIBM{x+m3=fC%T9R*Dfxk~MRW1#~NP5yNfkO$U;mLZ)I8wcaeact_dmh%&Ajg5`{LqPnsvmzwFefa{arM7bYbN1qACRtEqF;n##gA?ZA zjth5-Iz2tW;w{kA4_2_Z&EnGCCSA^WQqx&=BHI0q^I>wPUNQR)7Ef)FC2rm^V^EvK zqkzTPKSUc62H$;FzWLeP1N1VUgYvNN{_M>s5*3|h%r!{g79whqXp{kBs-B&lzwvZo z1V3H%yGZbi&$^gvSPCrFHANVoZ1uSRv2t>x2D9R z7}XH6nYYvNK9tp7yR{uisqeDa$;P}s%4tMdK)y*%=_ z>5g~X5Ue^MOb*rKbzE=U-A`(&pD}x*#z`IVPn+3FcxwK~_Itv(twcrP3H8&tHEY&- zY_B4{)WvSw6h_jSTH!G~@969fd0dy+n3}%tm^SbGJhHL+u`m}F=Qu26g1>&ZRct_3 z(?uHp9|i6aiJP)xGEH5CKWhlE-TS8K!oBwjuhqP`zP@ATGv8fgx`r8TZ)ZjD8Q%Fz zeV98g>{(2uU#bTU9-3%5yUhy0*P|F(-gZ5Cw)FlrArl9J4irrM5|4XZrtw&!Mg3~? zVM`?QBYLWOaD=|XCUfE(4<#E+sGv>TFmaAv3ejVK7w1g^l$M|IvfzU#msheeu-EZI+uH2$Tqh62&aX=mbTUrZu)j&OwUQ znZF+5g%EoU$X2CGyYmh4`9hDir-#2OCyOdn-glz1N-TbKlvEd=!6m^QtYijS(%D1G zCayj}gPO-}>UYsR(Y>ZFL*5R_uq0HYRu-xrUO}~-22D3Jb~RGXt_|T zY$n9W1AmyYA5-X~?Xz=N8;y7H3{Er~StW7mh&aCY!TvIjdinz!nN34mtPWE9x73qxJ zksuV9was5|11H51BkaD)3d&?G{=L6Ll`5WBFZv-;<}f%OEW^*UKzf8tj3|ct56m^(=qTejAIP%vOXJ3z9Sx6jSP(2te;jZH!4tP z&Wy*4qmZ|+S~DIgVPZM>xzM_P3cvn1{{%C%emsTzoG`b*z6fdvMQzZD!Bf(Fw`(0^ z(@S4^+R{b`4)-rLAw8`QJlc?%whI~hk4ooR9>1m>o_zpyjX1@ao>BeAu)n7CTW2r< zh_S}j=iwQurEgjeY%8FlrFV2Kf8G3mcc8q|^cjpZyc#44zHLFb_g4IicMwqG1Tp1~ zcOv&<6mQM)c|&anK=f<7xZ|hNU+6itOiF>3NX*N9(Gs{ixSfPkV(!Z&o{v(t!>r~g z^58#N*QDRbTfdC*uD3j`kUOt{pc4eXF$NoFeLntGECE`=p7nW)pgsd)fkL~xiLw8**VWl-wH0kn`-py}jJ>hO zpf01+;190QYDQNjrXg8ejd~O75>;bZ3n9z5O$-^eXQdifbk!fX`4Ra*2tBxvBniN_ zhCA#)L{ewUau<1-e|cof`fRV%lC)pI@vG(@MmS{rn>ue%kH%$+eqsL_X@?!q2}jow znUpO1ncrMjH8}vk4`#=jB8YcOhsg53xV$|ZxhV7O{!ThxIm}Fl4o~f)6kj{O1rDgC zjlXxEP*JM{QMHed(@;(wTB$l+!4B8k=LVt!JS?B9s@!8+p8SXJFGiVyp0pIHiJ#dE z@7c4ahQ=aY6bM)_!NNBb=)qX1DamaBF_sZh=BTi2MQA7m{*@R|zZ9!F`MRpx#Ukx+ zpJh_{u`L(=`RPjT2K~0kN`2b_{zg3=b_?(g+x9cKR{0X!7U>8%19IR4S#K}YM1K`1ounQ|*#CkI__+p*{?N34s_pf@yv?7m&`?8* zcE4lPYm`%CU2}6>;8x~jpTE4^>k`2)0qn0p=tFV;R$XiaKn(qZYB~&!jkbkSB@l^$ zXE0+*Gj-!%c#kwK1nTytDcO(D8LV*Q zM)=qH8Dk8!#am@Z@K3;fQGtDUntgO7uDKj-kGBY)2%@Kjh zO3T#BDMvC>OB5Wz5uC{pb3#GI83hq1KtS}R=l-6RXWPE}eiz?`XZd6U&hy-l{TTND z|F`eFQH^qq^Z%B;|N7?Hb4OT%CRY|!zWh>z9M+DFRp>}*!Ms146Lsax@rN0o9zJ<2 zs~XsVe3zEh5pEWajX)=dPo%qx0-6u|-BM(pK2w&iEmjvRU*9))IJhY&Z0XGY!+o(S zulHw}Eh|TRnsgtklynR)nR?`P^M>BTyNUEKtKTF86l6T*UmgE_L+-WW<3oY3fBjI| zYc}#=&wfj#IoF(WNbw1TqfT91p%qN~j1 zH4-Y*A-JTEu|Da@@Z@*+LtwWtgW>uKx9l*6oGcK9?4Psb{JSun*6q?1E!EEJuZ(fZWQ2b-~6b zeP^>F_1z;)0R9c~pt0IgG$<3`z*c7~X&pHmFU{7zG`2IQI+i&yEyJo`+b{<^NPFqA zg~3)X)O+8~0sixz9xRM5x?|p+siXDN?3DUT-y;(ft(6@?lD`$=%{wz26#$3uv9by{ zbM?$Vg{;GS%!(u*n?;K^s$ zJ?op+ZQ3=^;R`tr?|-5kYkJ{%>)}1Qro8DQ)v^yCAK%Y8_0Nmbe;;abLRPa90M&F`p4{lKcFi|b2$YjIVise|`nmHC$a&|Km_y01k2mH2PIcC6psT3e#cn8tkCHvRk4TaR1_9J-jav@CSm#?jL- zbNfe$@~?s6!r%|E5@D?_$!Gp<=Fp6Gir3pun8z&#C43*j!%(h{{2KI2JHj9Bgu>mF zQxDxoDnwPAm2(psf6a7-pH^_a@wcsC8#0oKBW7rSfgq5NaB|?nR-@G+oGrZs`S9j_ z^6!}0vOt+kuEH`m9&j15m$M+m-J{oF^ifJi)qokG`a|s$;(Dp3%b4B=ZAz8UzfKU9 zaU+2fwodzh3LE%#Ti@-fr|3Je5mS=)%=D4eMbFyM#I|GRJe@$ z`F|%?xW2#Ga*wGXOP&Ar&cuCMXsV|@`fa1py{l9H{Uftw?mMwp0h!OW-Z>==BMzU{ z`f$@KBH^%dcu%a`G)etcs-Lo@S`u0Kc&}Q^6Sa<0I z|4Xqa^sB<#byB68J_a=2`6kqKKZ0K%-YPf!a--i)$!Cg8}X%%5B*G_3X+`hN<^UKRjt;4XdH$0OSF~VwcpNX-*9yEW}=sf*=v@_}B z^}4`fP{fCez@0LwnAkf%-0J{FgQvBa^y%O>L+_-?BA=)t38{git5&Aarj%3B4WT9H zj}@-Id#x36{`F72#D;00+t&o;hCsKL)&jauIMb<3YS!(n94#%R7`wn|KUDybtlH~k z+P5LD^%!0|)gzb7a!-TY(l}ck;V*k5Yr)A>pW4#@pn$-T_GAMJ$cLIE zBFlx=kF@Wv)(?*t3oW<84j@InUO&lH-~YL~E06nfQ)})na)Mg`Rez-1WQpxh=%0F+ zORP=!PgWO0;dw!DyvZ2RI^lC<^mVQ1lba&`r{&~sotro(tdKNzYnSA@Le9IJ3Pa{v zl1MWh$#c)9u?nr``kCjnoBTezK=d88e&3Yy8bgswnkHY8*=={@t zhh;`CW8MY1KX3}&G&_DTVM}yZ>)$RKN0Gwsw?~BaEYCiE$!CFDTG`)wB zd(nfCa(nDdzy$Hm)^x$a)xBByQxUhmU(E5bPH?}eLg}!e%$^4BIa&4wEPQ@X zEav*D-_ZSkS04`crCyFTI;afFQocitPinZH67}@ISpclFa)(K=N<@OfWohMDlA1PT-$e^{GhVvpNr9Uu|}lUc}orX*Rjo zpWhn1Mt41X5nH zlZ&}ksod0SU_?Iq!y)GZ^m&5d$TrcuQz4W}7S`euM;-&m3j-d^zqLA^922SpDl!;J z`HvO5;(r3*Md{bIKAAt5F@F+NliGgv&KEQ7-!%X|J4)8}(`Alw3e5b|0UM@j zwowreIMYQ*Zmeg9Uacv>6gv>>NNZ_}`Bsi&8{D9G+VOE>sbaM)>hLwu(6xJhOvz^B zcc9k}tNZUi9PJGLXIuF5v#Ws{w{g^aA%1%WJ3XsY#KUWqbv+Rz4FFAqJxMa|w>a#$g@FsW4`E#h7ZI?*j@T!CwD zL1MesY;mh;bHPJDDKj$1P)8tlciF2lYq6oW*iV*BY066Do0WUhEqoQ_MYQZodw#_E z8r3w5x-@itD1&!v4oIPH+LaNshw|HlM;|cZ4LqkdSC4phc|>GGKhsOekM$qw7-@~~ zg|&yST8+i%BHk@zc%r19-5mX1YeIp1tHBrc6KdYuaR{76O#cw3-+YFNJ4%3@Ulq7? z1}OkMSHCJO(#h%B9t^@Rr;4%e_%6PGRLO9Whfe|0(F&0K68?QooVEMNfr@E84_br{ zLPKRLs}f;T-&w~>@vBwMRC1vgu2QR~^$^unNF6?AX>ou_@BpU*eJc?D@L+fA(dz!F zQ4~>QEhnfbYSvw519ueL`LjVM#ga}Lb@p1~JvVKWLieYL^;-xE`Wpl#b11k5Kd><{ zGuKZM$eaF+CA5)vzVvZZNzpw?qNb={e>Lj;)p54hv(FK>82}e>&iq`pr6XtP)^_n` zMgJu!Z2w5wg$M4}v{lXM%f2b?A-TfUwSXQH^S!l~L-R6T!#Aa&WLmkX^T%WO2Z3b? zF85_r$!x%Uy@Y#=c9&9Lnc|;~oB;q~lsDzZS##B$dFZ^(he|s|mN35@m0mj+8%F;0 z1kAhbClv}^bMPfj7bH}4B5ACpzYAXl53vCF12))cGUb}Gc^iyqJ!b>x=Z?XU!6`8_ zwaXQg#Nx;YCwbiZk7xcvK!)zy13xzQfOsyL?!Bj`x9WiM=2idxnzf(&`}WL;;&YgK zrJlPpb{Tik$p(lL1z_Nup0Yo|_A^0)9^_>!jimyaE|+)SYnKr2_P?`Uzyv$7WGM))PfVc%mIe?6w@hz5ntDS zV6M)o&sh-)Dav3L3D$`6i|#~T0@8dPS-lm(6HHyK3)MdKKNz-4nZl^_NfcAzocq6x zo&OH2VxWNaGVD)kTl@D%Z2!8dd0hZyIqCWO%fHY6|GHWV5xo`Qi1n4~`u7>;U$~3m zT>>tP_t(RZZ~kBW??&$VLruPXl=G#3Zxa32HvFNG@R{n8mv>_S!}jT4iH4RYfW9zE zp=J92wu1eyr1?lpfWJZQJ9%2+-wo9Nec1nhIsAVg_J7C5{}(_18`=LeRQ@-O{Lh5= zzd8GVGE4p+N+XX7{sjTMT|h7Ix_ot}M1$YmI@2+?mI7`2$*->u0tmlvyw5LZ7=w2{ zhju-$`v67;jn%L-{e_lA1k3?P)XX!IU+(WPE{gn|wlh~+&Un%DxBRInExT~Ip*hbL z${3}Z*?kFcU}=o%JEoc{!XmtQ>IEE~xWkt}DW8zKyZoP4P8;L=@Lk-eKC}nNJz@avb;%;}kEXVWB17JDVAdAttx!d4sl< z9P=o3njg|k$w1f;q|2FXuYMjcH77oZqgPOF43{NW!Lmadl+Q;o*Y6 z<9u0#oK3b=JBbVJ`-vLovK)+nC`aHs&P8qJYMbgRM~&h%+QI4m$$Vx(iv}AyRl_a? zIGO_-CDENFe2v6f%8+2;9|dxBMuMHgoG41ZSY-GBI6r-vR|!x}^E-)65SS%GdLh?9 zylPH8Uvz6)3j1&gsO**bjPQ(v*%_qJ{X_1K92PE$r`z>wqw>v%@Je`DXGlsWpDOx* z#N-!XwYzH($$(y}sk83w6#KH_VCa3RT$W9t;GRjzO+l-)NERuzu~K%vihxoH!BJ2X zRD0Gi_y7wVtU?J`+co^ftC;Sbrz zd^Jw2+RfMgQi=S6a#?;N_KOKXe^T_)gdzChV+&Btc14W@U}YQh=+o3lrq(!~5P~I+ zrNT?viJPCDONQVH*u``=<~NeiYkbkm3GJpziOXR-HU8p`(5HgH=Z~rdIg4$aUp+F{@0z`txMRQQ*;qrG+)Z)3{;@ zI2V;UhbB&VT)R%6bMT&U73 zx(u$-E9&;uR9%hZ9T5Z!D@phd<>MobTP_XSpmow1W7X=9Q z$1OP(NOl~Md|E-((Bq@7>5+RyCS;hX)3t6vHHnj2*wbi@e?;KbEN~1B>6`w{-5Bh$ zhS*B$L|ayr-#_B*p>Xe}6)kTAwF(&J;=A+LQ1vDzq&U+Q!sgXaEz9A79tvpp@nh6w zQ+Lxlj_nSL#%`K}Ph9$n*AZw1=&4E&ZULhosq5Gr-nf!Zh_DUBj3?D@>aEpzJA`_) zPvgpsog8K3?k&=?_#0!7?_>ftI2)^li%NK}txn`LAyA%Nql=(b!3yB(to8nlz_ke6 zp#_qcZQTX|6i40C=-H^YOv2ORu2%)xH{_96cFvF?J>Jmahlue4!W`x-cerv))@q*m ztK<@^Qf}bEI#pA3ynb^aD!ATvzZuEZW6rQdG19vRVGSabmzXkJfi?!o-Yvcj<&N*+ z-dH76*Jzaif*mq$PYMPFIC|w6kndL9?@by0!{}Qp(n%;JWH~O^>gb*MVDnJxi?mBD zMJ^9;z~t$W_h>ij%uRgJdDYCYQdvh=t`MN(*ejBNv8T7SMKP3L#_*P#)&Mta-0Ey8 zp?`3Ej0UzGYb+Qa+Mt_}5FjA$9h@eTzW;naZK90V^;gXfMvR7%!EDz=1C}Q&KhRk} zbUcz)(&8L8K7>$)3a-eqh~7w%D5GW1U78DUdf0Q0X)1bg-vjpPur|;#r!4?K?zV%O zK}_|+#>YQbfbk7s@~koM#6W4@m`j~l*mwj!fSt`o;Xz9^j=U(gvl~=yKmfH+eJj|z z1&?@;4RCc!pN|#e<6ig{rPWU8skhKP(BvU%19>+{qHYH z*paR47#hMY+sADjs+P0U_K^KoFN2Dy;)Mf=ZUoz%P|L9nA~0JzuM9>GWB6;Is@(ePl8mfB5A&LFw**gN=MQLjGavF- zm`*R;z~~}3FmT3>Tn4DK#h+$oJF&wey5D1M1=BITA3|&%={vUREzj?(81Fr@ahs{p zPUHNpi5A!WF;vw)>+sWml(zlI=Y+hL@8e6N-h90nFxAysB9r&i7c`hh$(65D8wu^U{e{wIA~mCmJ-Pa1SkF=FRXoZ zF$|T3$+rF4A~?^~3kFKxwPVkG5Y_=bGG9t*I#~CdaCRV|bpBvA*(!8cC3lp!zRIB` z)WwkeIT4vKzuIUtrGrH~_ao{Z{V;aLbk^gSrftyXLIO&1#0r@w1nf@}^hmc5| z=T)_cG*_$$eEPmL7 zHF0?bZM<93uLLtA#xEp{1IEJXzAd@NONKJreL_;VQ|PYEtY5*oF!8(q9mutKUy#J= zU>fy8Z4F~VsI^j|Ha`3D4Ns?Pyw`+tJ19kUqJcHxHUc8A8b_;=tYV$5!4U5zo6N+c zZF9Y44S{FMPwLkt?jh>@GFCXN{Aw(GM_$6fx$+3VO-#O-Il)^RWr03SSFO{OJ}~?3 ze#DY$eH}4p4|2GE4a>E(w~l@_`CvP4jvjX*j}$h7njcD|fgX-ld6|-{eAfCdAs7x4 zoiQ>&|M2|6EOaGi{xCdO1Pam8d?}pKx&XL2I?Z!Q#W#t_kM-Lad{>pux;l}(BM`Gy z&nadHAO8n5Xk}bQ`71)88f;s2S{;CGSB_3+cpSp-@+rH9D0fP0Ul$>Pa_`AzOBu_W zT^YJ+2E!c9G1Png0<{)}7LZk@6b(5hG`kpi$mV07ZC$cjLRIee4}oXDpO)CaXbP$H za&v4RKHOL{B84ORJ14+R^MNBTtlkK6o+VT-_!#I-JRcrZ2YU{^V z!rF1emAkH$p%ev7+A*mpkLn$oA*2GZH=%p46&0@L=d_0#7@p)wYx#J( z`58lCDlsn_+QmTQiGMgNZ4^g=Blt*RvqO1w}5+WpPeI_c?j$YYR(STrX$~HxSKosWKnxosq2>3XySfeBDEkvS7sO z`nxt-1oR1`_jpa7(D(zoiuprkr3V|trtGr)Xz(lig4UG;0i1T~aANo^-p!QZ){ZdD zdN#`l7ou(KzPU$BXU^-C^hd}@cpuzE(v1I=Uj^hNF3`b)g_;Svd$;WdrYwvuxB z?Ufh_MFBl%(Vcde^i#(N`Fwp176!=~SB?j+KApP?PpFtC!@_O+V2t7| z!3jQZGsMq*k!q{IyAMhQEg;v;DxE_(nl0Mc&AipPXIqxNWuxq|b6>A1sB5aWmq~bn9((h{x@7Yv zBWadUfDG%Sh?p@fd|^aUTt*!F>(xqiU+IDRPF4g+OXW3(uBPmJ-m$fA)75rqb?gk! zJYVG~^n;`AxaND`uraOezB+E|6g9AW>^Ys@;;X$=%zR_O9&Ipq^k`<=A>uFB&!sLvHt_ictAP=`@4rwX(6jDz? zV3sh9p(+Gg($D63ZAExJZKJKHLvm7S+fT-T5Y8N(10-?jE0u)?zB;_63KAHOce9NE z=TsoiSB0c})2YAp%o12)}K1gySmxeDWSvq<@ZZ|ua?q@)5D1tST8NA~*m zKr+}t9E8@kex?#EVooItToc8krKB-Z|A3WS+|!&<;X7>URlyQob}OlX_l8^q+~E>P z4XqwzkN~t?u@8H3T${^yjrqXUhXZUNZ9l=;Y4b&J#7V$4gQt*0U&;j5Di+ z99v?icVt7aDjFGu(gM$^%4dQ?$Kz;v_7e&O{#&+eLJem(xd)N*Wm3k6^`Vu0tC(e#Q$wxj{9!F|m1w}(YUrq{sxWEoFH&p?;a06HIV>Oon z+ShN9GgWy*9@|OBcNF&W#$x8COE$RmTfFk9oOw!CWzOz*m#K>@!xp1b3Y&q2uUW`A8 z7rOo034HjSI&?;iM$Bl85?U46%UtEsLNJNw3>vt$)4LXy0&F|&5`I}-o+}_x{S=j( zHv9wI!k=pol(%)9xH~OiT2jg*T)1-xHYZkDK9eEhb`$)E+KwXoU{|HFzvm;pm#f!O zag|%OS_Z(mlIK9qgRPPdfA6)C3=N;G0_8eyhKtdqVCZw;2H3|xYr&LB8VQS-8zPLd zt81cWdX9%7*9@lhW(6ENuSNbq9;LWeB8t-m^m|puNf7YG5nu@~AY0dWNw32&$`cs8 zYhI`TzsnseI6|m^xqCyw+=!)TDr20Ax(v6bki2l_O#DpL>VoI>!7~MPP-C;<=fv^A zOD}eXF?0;8y))N!H=B48#=bgFbT*r23`OI2V#)K#g>U{r{me?meW0ydsC*|Shktl- zc_1p2GYUu620o9iE*X6?go3$TjXaX5m8IT0Bp0?ebs)HECd=9FOx*8n&e**D6$2pZ z#cAv!Qzg-kB`l)70rDN!aEs60Zq9kQuvf)OPBJh*px_V1R`1 z9~TelQJ}R(?&cqQt#43IqEz4uZ4(73@HnLJ7k(Erz;`lT=ITd*cjrp|oQnU69PVHt zYICV(^3?&N1Tf;zzB$VLqsML0NjIX}Pk4-ZeC(w?8Yn1Eu%e(tY9ly!Jz%IATRGk^ zUc1M`0V}9y*1GWS0tw!XD8Wc-6C4v=%Q=eU{57x{0^vMA8tiI$ee(B4nXwg+<0vTQ{f4<$cFJiUUo~oA*N)!G(jiy?~lg6CugrF3Rr^l z(=CO)JH(aI0~j@#fR%-zz!6N#sH$#A`1}F60qWe->VTGX=(>Hi>*bMieo&`lsLC@<){%J3R{^lgP|x;%u!odVf8X*-2)&$TG- zYl;giiLum3P zosPpGA^|RMQ{s*74?N@P2}th)Et&#} z+c#o-doPkJ-o=2mT%P~Fts=&PoXd(@>oETg;ts$7`txduUXrh=_go={PD{IvR?#VK z$bSqZDAa0%Y8qzRoIMal6(nONh%-_kpcrEEq@fuSdS0^}58`-g<2xq%TiF6W{ly9Qsi&HpCV;JNw7aK^HH^k*NiIO>w5J`kDg z%xWtzM2}HY=CxHPD!c7hxjGeyw0Lm6E&&(TV2IEG>k^bLpqObQo1*LoZ4ltFsblqE zAVLSyEsfn-Q{7RI+H8^c!|Khl1q~D;!)K()Bg;ku<%NP-vcczPct0&k^TZE)J-dM+ zyS-t#Q^_<~1+INgJBZ!kVafFuc+XSPz@t_Knu9=&u?RVd72G&tx`%|+A`i7AgOr^| z9dRKIy@b^uLgA_sUn4ubcPN5BuZL@B=96c?^Y}ZP@;iB=dJu1j0H5VaHbJBZtc@2s z;{{y{=2d8VxoerJj<_mVsv$$aE(+QpSZesOIDj=C^G`rzenfC>xk}hx0tP`?eGXhy za*?&G*9GFb=*kVx0VK218Va|>|Eg^^8$@OV$?#{2uk9jty)pr?BYkiF(o$QlBO3OE z?-Q8%U}2)q=TCBj@?k`0f-}viEzBXwAk^|Jt-f z^GN({Ze7huw#`iItwIJdtzpT%#oi;kYMRmg)4&?kmGB--q@^I%er|+d5 z_*+*K0e5|7m>X}k>s>dq$(-!Fs1f`zOEbWOh@^qI7(X?)OX{#9Kq(eD~zhhrOH+@=nIB+uijHenA_FU-FNK>t>}n5S91(tqg!rG zg@*h^R-*X!=ndH-IewTA%n~f%{;qWacmVmGto0ORl>2613_v3by3J{cpk!;;FPi&1 zMA;@6YOED)DIt}Rf_JtNnT-sGH@GY+!4@GIKv`2t*hk7Y=r#o^_g{!|k27prd_7Xo zwSdG>(r+yZdS<9nn4r9)>*BTb9_=KUvh8YE)2V!JH7_}E5iWZ*F>nvmmbVzoe6Dw0(w(PwMDcnu)iGgswCXA8~U+?G1u^c{%T|jS-llJ(QNgf2(Ds=_NVLXd}|Mh*yc3ym9YhD{&BaH3Q_lTIVdP@n% zj!tZ|&}fQK@R4Qwa?kQ_uBO#QRBX-uofQo@xa;l3nDRf079g+8>J|sIPk$=zjG{LZU}ZL+dUr)*(rpM|;8mso+xlZV#2w&})b3;S&7eU@@S)tu2^JP_YCEuSm zn9KdB#A*%9LmqL0DoX}!-B}TrQxJ*T1H2H+iCN|SmH`X| zOInTTNa#@;6>09a|2b zc~^#-sdR&c`0BvhIALMm0FMM0?1`v_(q5NwPKI=_^=i$tQ$ds2iL~aKz<% z;xrQscit(-;GI^O!z>&u>Q->Wd?>#~+24~ti<9<08S9;>JCoZYnG^gtxC1pfE76Y0 zlMg&=xU?YjHpv2bD0w5P*$@f!^Bd@yw*d{M+lNqjxX1O}d%Oh4Him`ffYLL%M_N_i z?s{^s`nT7M@=g)_l{rgjSoLNe`I#I{F(-1c)HUj>4(pSI97~o~QvW!rWyBb<-7D5( z)K~r)XUL76sx0*L+~X8i?K#*S^HVGirP{9SjZ;5nMvD3dcF-A(v`l-t&d<{uZG!~? zJ?neyX^N>9N5yqKoeBar`YNJ+>^#YUp+O__`cJ{1QT;T<FJn*wbb>zlrzBQN+nZqu{lXAeTo%(guCJrg`jw+2eoCA}D*A zYBh@8)An@286TWsMQ4u9*nO7S`a`AV;`(gb`a^Spgw83W_A&-f1R8GS5ogg9vm_`t z*+VaILlGa9qs`YDgy}QVWBZY2XQ+}0Nvhc-S5XbG*l(9Uz%@S+i$fH3ZA3!HD5Efj zq2+pJLAsrVJaSscj>B~C=3X*~;fi&-vae#s@@C!Pb6>wZzLRxl2hBZz@Le1Xspfxe z_B`4XuR`S5AR}!g?@+Nd@SOF|Ou=-@MA<|VsI|ebA(HzbVd3pV*s;u~qUvDCI>7E4 zpQ(g2L=RHzBg|%y8h8LQrNGKI-_bmy+$!TPkiu>79*7cj6eZ*%J9Gp2IU1*Du}Cm0 zbD)9Dmd2N7bYE4AntCIJVgPPl&i&+_$i{Y%Pyr=6zKZ)Y+Y+8 zDT{AFAI`Tw^&|J6fA*h0yhrr)YnL<5w@yv+_*vwNq%OSTn+T5v=z{t3I*-3>;Et|f z13=_pM9_1Tjo%WVo8w2YkY9Gawm`NM>3B; zR22diN8?k=(LM`y0{(ichNc2_>U?>uC3VrmjaL-Z!XIM3@^@pfX<~$*sCHg)`PLq_ zza-+E!>8W#NQA`8N~=y4xM1;9C>|T`#1!mbHp5bov=(MxCia<6pXp{Zu;jJ$&RC0U zn-tfx+T2$H?Z_8{+B(l=GeOm3;aHw~3@utBp5xm4XIiC4{aKG?3#wo}NZL$piR-wo zoH!qFpg_rPMcFOk5ulk|2;Ag6KOj56mP)3h?R~xK$P)oTW*rbs9W;Q^Y7L_rfQ1s# zYR_DSU@-?t-x9%(tT%5znGpc7o@NT=qG#386-YGQA_l~ero`KsiEJ8R9PqgfCG;GK z2L>BSn3Ye>cg^ozx;VO+#z0pg7&A;gm(lqAfwx2&y5(txW1TIjm|M)Dga;JUhyhqW zq#ZthwIqC0RUT{Ii)jJzH&@@CX^{Z~7W+(~~})bl_jUSwfr0_Uw??@UW$q=_nt!6-S-;|3|mKg#}w zs55%~7MnqJN3{);-<*)FHL}dei0KIPD|1!uhD1Wc!wGKhKn9;{owb>b*%dl&1V32o zKKlD%E*yZHHtH0Ux4J=pc)&1wWBPt{q_;BZW>7~}SeSC!Q`5EL=j*%bMaJ9UsUC_b zkMinAuuFd(A7$I9oL;E<7ImzUg)(Z}c3!IJu&#;M{nqeFJ)K*o`69!|+J* zsSLkSyu{g>0Epu3{3=*C=Bi}HFeMB|?PeK97c0&lJ~y@>8@)$zBSSCDFZ1>0xlIAs zm`6&|QaF>4p%Jwj2lVLzUmHq3HP=)fI7$7<`7JED7bDErrjxfzz$ZWMQ8*H@4<-7S z$cmt|o3~|eM3FJ*2sHWCBiLl-t`qK49VeolGCv%0TmI?U?)3G|AwBKcFMrPv{sJ5M z_rRVR*C^4RrXalm@M1g_jfl4+H>@JOu|%7v^)vY_?Xf#mj<6wM3Y@&G#}7UECHcoG zVcfly-N0Y56`d03JiqaE`;DJLAxLfstp!WtX5iJBk#T>H_8*^YtQ z6DHqAEruA|t|61-Ce?x0ZO7T+Xvb6nUe~YkHyXH{4`H+nE!_Lj)YG$dIVw(W$hZ#h z7VS-E>ukW+0KVr-_3J~IQdQJMdhg_v_bQFNtQSAom%1!_d$|c2vO{mdnH*hO=}1Ty zj+|`uNKvle2Nv7-^%eV49Fg<3i@@A2RObhA-N~c>b(ZV&a_jpX%>dUJlj|Q7PRZ+?Ys9sC#y}r6vpOsx z<$L#=n{i|(FJ@6H6HKs{;D6Nf&emk-QfA0}??;BcDCt=TMO$kQ+$`QN>vx-lpwM~-@%g3< zLUl^v_^E2ku!b2~#>1olX=D_Zzq(vE|NH@vdCkf0nz{*@lW=U0=xk@0z7={i7b3Wi zVUO|kYfl%u~5Z>&ss!-db#765Bw9D ziJfu(E+&M>*V5W>9*5M~)|Qbzr&`pGL}iy~CraI-;iMKk-7+w?$Hq$_i~jBpOPRbA z%jlKSDB0~n_!j3T{5HL>^VX>;W zwK{%0QU-n;W79tGtggU=LZH<<@f_#PV&qbQA$a7i*P_d~NLrw$ov6C8e5rejD0+(3 zbl-O{3>&x12k;2IkiTHt{Ozjk5OVhOJ>#b=ST)73z<)5j?H@%5ZT9Wa6-1f070_`u za}{q;xyMNMO&)nK%$rZ!iw^)pYS#a>Ba^u25#PaehE8L4@n_ z$wM*TAq@x{LGa_084f!mUsy%5%neh@U72s6BCr*5%RgB>dXY7O2HFXmns&hEY;)(R zke!U$_5-!$yF-tw$s=B8&MHK*X$R4!cAqRwuC945yYKzBiFuL@_@ypu#*VDE(SND> z)NF0BhjxzEy>ou)zglZ>#5JEXYY=%WK|o;dtFY<>P>V<^D!cZGyJX=}{1Hi~=kC*Q zf>XmBp9D79GxX~9W?O%bqwG}_w6-P<62naA$>YwfDj!^y$QmhnGlu>yxFUbSp;}$<^^Bu0!h{YhDAR*@(U;(`uDWtb3X_;0Qz0p%Z(LxkHGpVQ4qJM` z964J9-AQ`qY(TbU(z6J^ihbb(Pk0K%@#}IM|Csb&1!do`hvjKz)hBjkc?i9m%#01l zxhV2dDLQ7lbo{+y!d?k;8DZSP2NtN;q+kEo>OJYRZI0i~I#=HK@ch^%Bj$HlYla#6 z#;?9(=iE7oN7wzPxL9qo*rx;`B(D4R_}Kw$FDFw`=Px(?@zDdptkyOCnkSbwuUC zOCwmS2=bS0Wb%l!eGzCUJF(LDERfFyC%twgs#GtbQUf@*QLy5pe$io&l4Y&&gA%WT zlS3MP18nPW-7O&o0HiG%qpHH=<2VFONM6d;nJ7mF+ltKG*R{i1jnH@S)?`@yH1Ii5J)i z*GTT5pX`c|(3rB7-9loB&25s$kVwVRr(aJz(%f_mFxkrGBP+{Nh2z^S-9t!{_9r1| z74< zKJh{dg^B{pwS&SJ!2to#-9V$nw~A3hI{LrAb=(Cs`3?QefrqsoZQ$Ly=l_E8Rs7n@ zxvu$_9x7tISn=4|&h;sH>!}R%QHNaKjO1K>e3>H`P;j^ zpK)~jaN*LBZ1{83fc|s*(^n|uJFQM!xTixEacYla zz>Op{F4b83okDu!gfncfeZKO;GR&*`lzp60i=wY?a+6f04;E)zHEktp>0uVE{$Z!h zjyyUs*;+Bw(ZBAtG)VOTRQ6FD-sAI!;XT~Xr>`6ZIy41t{~f=$T&|hVUUaCf zfL^e))Z4dq6muuB)ZsZ`o&7Dt7sm&>o=`*RGPUNgpaYU#OJX?3FH&NQjWXoMfss9xZd^ znUAjC#+ImK|4X4B9;(6; z&2_oLb*7hD&M7Awz(b2%z(o@KV4V-7-P8D?4a+tSOz^WVlhnrDhTzhqrBc+d_7yAe zu%LpeN^W7+YCG9Mm#}M(Y!OtkH2RKhV)240rzqM_-1p^yttZ#*%lD)y(Q-sHPeq%g zo1Bp`7y@-(EUXMoo^s0=`w17@zF>bTvg#FQ1`X(<+_;=~bp2i|_mWEH8xh{4SSd;USTBfY7|9t$t|?8=mR{(7K#e#g`^ zF(Co6-LiGn-b`}T(EvHEr zexIf1sHzI&vfQZFc#$*B>0G6iV&ao#;p9 z+QWVe0aru}HfVhk*DhxMxK#J#@}<7+`HK=y1wB_-RcA%_Bc_CUSNc-RJ3WQ6i^x;D zow1?P>6Us8;ut7~o7ggjz+)H;SkcIQPPy|Q^`>*Chf{bTx@UoAI+Qzg_~ z96OUlf@Uyl3pZ_f##s4Mu5E}$e}Z*(~~xR!F#ffLVo(ip~c4%-M^8Mj*5MAVlvd)3_@O!o;LlmjFm zC!k_(wF27imb}XII6t;>+=AGy3Y!f2+X7<;TQS0MwLhm4a_p!n}}*MF35V6RQ%g^DqlC_8XQa>DhYkRye#E2^PKn1^bOonLlY&t z;IKuX;ePAnSb^&hmq{J~U-4+mc~q2;sH-~B-_&_U3fgE%;XW^x5Mlq4;Sel3SXX+I zt>5wsq}%4J$d=V5-ftY6u4{QqWg?^IlqyQu3EqGu7zWn9nrz@$3S74Nx z^|W(it*dYe*zLsHeZ9?v%ehB0rW$zRm8T=$=&K--uqvU7Q>Z65j`-P)k2>!}>p2`; z`9L-()|-8O(U+i51~1GBFKCFId*AvcO>it@`^JDY{i?8GX43D@^pokg(>IIXAO8}b zCGX;P4l(-5m?L?=QfO;gnGs+RCf&PSb(JdQGNH=OtDpC5Q;~32?(j*>3NNs$4(}WP zOG8cRz~0vQrmrQGa*$2jP(mA4OZ4DTWYOlxjQur_xVy?*kaS2ZpQZN%e@tWm`{qCh zu)L=e#XlmErJ*7rC;0qHz#HqyL+rmnEy1f6s}6cynld7{j_2L3+mK%M$QIS!eJ%P$ z(AWKDw}iEI+L8fh5N3^W{o40*ev4Zs9GI|DIP&E6#Ho*8lfEBrO}Zd?e7#lgZ1evC zy+A_0kNmXEHqwI7bXYDFDhuedV3XIeZq}KFfJgpE6R9)2p3Q60PD` zUG>pX;0awy>FcZDtp2J`{nV*V_w|K#>fw#UZ~W1=`}#q1Px7jVey)L45|(UO-GEQF z5fey7FYuRgFL>2qY_#KE;aT))0OU`V=MXbY`cy}af>N>lVzpt-JPzx3`3g)jzMz>bBW$JfJPBRo=V2yc3LaYYC!KIN=x8Q*^etbWf5< z@&KIr1j2+|ql6pJQob$gO+cTL_R>z&F)#tq4A6HQqBn&lapn z3T(c`fQK!~qkPI)di;x&Y49gGQVBsVISQUZ(}TA4gWl-(T(%!{ZYp6HLn0xx*=7?W zKl&Xge&t(2FZa=BLTJ$12CIghg;To&|3Q6EX1qLx<7JIM;2FTw=#%!{-vZoY{2Qx2 z_|UE+ZF`TOEhU|jLtbv`sf=b2)2OFeu1!Mqw#D~%E0?V5jy`Rl?#y>fxNVd6H}9-~ z{6+Rnu9be+VJcB$xv^yhVO93lR;8(U|gHL57AonqO0#@608z#S6z5Z zcmBr~bg$g*<=tT?C*gL~q;7iN&$iXJgLh<+7w|tH8i4h=d`cEGsCYw0(9Y51u6-j9 zo3>=!FXM=SmGM?qd&UT-M12kcsWS%9j^QSg?&Ax`XHDXaZ1|T?V@uKz{q?1ue%$xF zPWN-EZ6kSCw$S{^)pvIDzrQTY@bjL=x6Ei;1bCv!)}iXOw$rWz-OnC>y2%z^a;_xA zjqMuKW2pYhM;lYRO=I+O{if^GZS}|Q)3@x4YxJhyk_qaS?q&n%#q}NfsD89X7xxox zbolYdAKz>?yKQwid*;CxPua579)6KYvIxg5B({+)n&i=3UFnE*U*BhD{<=}YbOa2#LV7#2h?c`11KY*v7w zH{8f!`E2mt*aN&y{+}Qc+_fo??4O5TfqUp;Xzl%M+GCsOEcnB37&b81*?=AUqa6O}7yMuv=EE?)`b8ht z(MCP;?7C~{q@9Ah-`~c5@55hk3le|f3twpXuUN67J@g_u&cEOnpnE;6DPRl_wnp;DZ*(H_+5`)m>OEIh zjiZ~@1%j=!XU}dhr~^*$z=aGs{l*WtqoFbm@RL<^s>3yh_QCBQd}@r7N&T=F_*;F{ zb#PW){lLM!x~46s>)@hiwc{xG(f@)AE@(a_yXLy&goK;#{ljlC(|hXa2MpTuS$wNH z$~(aXH}q8(ZRiHP_F46Y`z!dXKQuxED-S(*Cgvta_EHjV=)jK9SNI05+BHD?MSD1a zM|t!EOUYUNF8JVpw(G*9%Iniv<-GgU`Gz;Vp`9B_xRFhEr|N?ZKZ*}Ej+JrNQG6$l z&Z4vWzQft+sCz!aidNOWPpAGJ=m(}c)X`??67)ge>KCl8;X9dj&zNw17>^pqeR$MG zOYc@RUm=6;oBv3viNE57xGb6W5E30Dp=pN^ZNdj1(WLlU^z7rIUHG9R*!)(9?4lK% z9DVqdBkS(z3#{Z-8Q1jJr=NT1=br1uzhQU^uHjGZ*6CjHQ+s`W7TsI}0~qxUUWdMo zU-yf*?Z$fk=(q4CC+O@zBm8yhYrOUf=DNo(VEfFKjca_wPY%abMYnKh+?^`&tF$p(56raZft^7*w zXsZ=&`Ua1~C;q`r|8O#&_mHi)G8jB4E8(U-*p*k-cRbbx9MB3ae0Pl;mhDm2b^IAm zXZPs_v=&27Fi1a-JosdHOSWwE@b&dST+w1Xr@!mayiP~LZ4eJq?*6+TZ9AeZx@u*2 z$Kt_ivoqhF#N6>y+b&QZj;~~!cpN-l@yh6y#EQfW#jes!JHQWy=MxhPGK2fMCzhp+ z7IV!fygdAG_qLTuxb2*sn7rm?`oX(rA;FG`EoST`_E<4h?TSn4Ukq{6EQ}DR6|WN0 zQcfRsgu5^8UY;G{#6h>6_{t;@cW66#jph|GV%QI52dFD9NP;o17_!UNuIc-b*G%p9 zI&jw(uhX75AKJh{oLel>JLU8%F8cDs?ZiD58I&92M1KFK9P3TE^;fug5u&GUY>z=9 zBbcWHO)NOilnI3(C?^o&so2_f#Q-z-1@FdN;l>CP4g-pTp`e|98(0*aeS&oL&z|B@?Dm_6oEktUVWC5VmM{qDMPPFuD2b~Q0*f5sGEaC8XFOLAESbx#8D zQ?rVRGPAmEVG?d$PH82KCm8n669tyldViH<8D-+7nIrpVpWi4It9u^2`?0pop6&Ub zNIfsu6u`H2-KzVZY)WhE?YHPg_SvbMoPLz|4lysqw34nVTS_=^N!abY=l0z$leTLs zjJ$iy{=H8=_KUWS->R(Yqwpjnd^){<&C?o{P_wtN(~}C1Kb)s+R%N@LoK~%Ex%K8P zAvt;aPLVmre6X~7RaSRB{J`K{ZWQpOeRgaLUHjhiwc^$XyXQl5Lk+$5nYDXc1!lDq z1%7W|={B%nuZL>pGQ)MNRrz{weT8V_8V{C1PMj}j7$SH9p5hX$5kZ{A} z+pTbm%p9FW>sh~>gjXuqjoTE)50xa6qp-@s#?HOSoMi;Woe9Dl&T5 zDbrfQZC?quB>g0+2IJ-VjEg6`2jYB4OzoPdY$VI90wc>SZ+)mG?-+aIZ_jDj>MeAY z^aLaMB=?@GkyM_Xu`qGhOjOc`#MVPuA}{&(^L+OkY-G>VHI~rt5x&1X{c#w68b?Vx zPbC?KHxglJq#vskeZtYoMSEV`e_C7ju6Ve+;mQ?pvhHcyih6(BLC1~6k=Ui-vt67l zZ5eNz9jhEAnO_JWB?682S7rOr9rCo1ZAz8x+nVNBgs=1`olPg#B#EA5BRl~v5X14M zyXir?Ry*uKP2AWiI+s4D_f5jtC3ecS>bLqNPpw${TeC9K4+h$HbyC!fZc*s7{<7}k%_yNj*1*~+oC;2}5(T}!I_DRC-ZEt&9tKWyI z4_o;Zf}h^;f=+Did*1V&=2M^%`*rcf7q^FP_#14BKpz{%Ua~13x{)McXTSqC_Q=CQ z=wAspG-GGRI|?qiuu1wYzPZQt!rgap;G+o&IhdYrO!(kG8>vratrn0tg&Vn3j_iQT0S0ZO8CcN4y%o-%y5`feSWfckif$U5BI^&S1kCIb?OWMtv*wZ z&*${yDY}8t2`8sEv_=0o`GH_?4_!SN=j7Kr$tODdR2E;H#+3Ha-f2uY=`1I>l?U7K zQFL_OcQi9zvweDUXvQGLL}Z}q@H7F;X3 zkZ-u-5xBIgANRp!e5-E`R5D>)(06@0_G#=oxit2TM{SmTxkqN`N<1>2!0f)V>cI)V z4xebC3>Zc-Zpk(}qa|9wAHHx<4tzx)*NheNr5^kp#sJ*u6lK{I@=G@u7p`lU?$QTZ z7)$7kr`jaT`YSy|-sm!Ll`Ykewv@q7x&ckqCBx=3p3X)Ow#LJv#sMA_56Ow&U?+q6 zW?RgA*kZ6NOIFRBy=N4h`XiV4Uiu7=;e{^RQdcq?TzHNa+6O;AfE~~Dj}K&r9H0~Z z0w#5oCr9L0IeHx*j4iNfQ#seb17G|!#!8OJ#l{}$^y`ru55joJ#zVO9^yhJBFgLBcFJe2Ht4-NUnkCDiy6D{cgv{p_Z#-K7} zVLV$4_rgiPU?3;jAV=<@p>dC{WV-l3X53Srp4TTBs*X=M;4?W@mmF)?IE6Pn)xj6| zp(!~5yKzeI;5mJOo&{^4Z*>iPWCq{qRkVeZ`((&}Vqi#Sc^lJ;^;7=mq@I zCb`xQ`NBi^*O)S%)j@aofEk{~wKCwqe>9?3*jm1W`^EuUpc|OfC#QIc-eiJ|gGHNs z6yM?DKH9;7JZTqB4rQ#oWXp^VMVyM)9QszL#s|J@SD$2sY{P?om0XY%(zmnl zfJ2?yB%^F*`7YWsX5dfueb+vp-_9BELNhwn{j%NUg#3^NaFatkL<4*&IR&FOT+d>S3$7Xu~Ng3^uS2jy54nF$RCjOd}7(?J@uSy>oWB7%4;3og-vL$p6IY&3; z(FMKqr3@M>M}MkY_Qp6tJMcM_g$wvfuhG45AZO|i`!HzWfH&QRkMIGne(*@=8jsM;xB@qsgDafSTD!)!zR`?)DNELj2ZwTSMQ`%zyL#Y;KYYnt z$)^5`x9~AWCEVDY|4zb90$479>`1tgRWzXoz=Kc5f@^d=K7a?0`1X}>1K(f$)n7F~ z9bDwj`0|}!hqua*A2`4dkI0EJ2yb%dAfsrcKfFQ%IM;#iAyG?cWVV6yK)+a;WSThF!--LeFN&RQv%@P}@=DBEJP*ubjIZ1_ao@+DPCR;f zXQkW3UAO9XPE1(**p5s)CAPKwj$0+>ni$-(&vy?bw)SY^V&Y?B%03M*vABu5Zqwq) zVw_@dUZ?WoiYp0?lll`WyRPZD`zhmwP0ix#o0xcR|uTZK2o8)7M*}UvN3nB8OnkIWDN#JyQJ!+5q$E)B$#P!px(6@D2;Phg zI&i?j;~s_i+0TBqZO?#~jAaE?1ivK3IHjBHRnA9O)i#@%E=9eno`g#%6;Glqz) zfKS`_#_*``Nl9BsIf|e4-6wdgAo}hXug?a*~HVTfReEtp{)yVTl7G;>*)4rYy0k~Q}^kPK4qV_wc$$-ivfeHA9>_bh9=P+d?w0EYEBYu-i7g7E~vNv;psRgIn@Pg5&zjmp9r;Kx`cYa^y*S zxB86G-OEQzKoWd(10Gq$=*bQVH!B;@`rRW2E8GHm zt)hA?Ny;+Di>_YTjOeDDg|;bUuVnIYd)@id@;R`ywLt{(<&FpdUehphQ6(YGey_R%EV zb{I>z9UVtH@4ssaw@AEIW0uz6m6x~P53{ZFS@D|Wo^i7L=KH%J zo%fTrHNYeH{cMmN-Z#nNz~Si~?|j@cXk+RDyLO%g zLTj+mMGo|=Pdb|(r;`Pp*aW(b{`Qo&U_TqeM$F9?YHXAG?110`J*o~T91Tn!IL7P} z8^!VN_gj6t*1ygUuW7U}u{>utWFv;Xxm^-vKXn&C{Lihxfy=k>{LqP76AWjEpqfCwRhUnuO|K zU&^w5@aCuR2TVduhTv?n3m>qdo0V|j| zN~VEjk}>?`+d7SLvWk{ygFjAs zz<9uSCtjeNgtp`+p7Ray84l#rNe4IuFm3aRxAc#(;3NaiGtWG;1=+|&|3KINQNa~Y z|60L_w#KEg?L=E}ka2iPn0v6#HM){~fK@>6o$q|-8huEIfX{d{2Km->Ek42B*yM9M z^h5vPv)~$CLT8a7G(vlHR@XKDC|;49dSN`A$rJhH2a^r$zy<7rr(hrp^t!$r^cPx^ z8*+-LMN6N?4|zAYz&|Is&{xs2+R`5SfdyUYZ*V$l{Q7OY&=GiK3~8T#Zv3Dn+Gt1L z<^jen9Kddj2m%{F;3d!aU~G{q@&`V!t5@;{ZgPw^;6)!}hkWXT?2&ag&G>hnodSwO~`Na1)4UH`rYCFi!PP zH_@l+qNCR-XhT_J!5mB<#txY2TW}gX#vR=7kA4R?oeF=vR@cKQ#+I>y7Icd~=y+30C@rZqNpq##?ZZQ~kmT90K4TQuSTieL3pOQWKnY{3-qDwmRU` zK09fJ4!NKg(8o5;;PKE4SjhqS!AD-SMMu%|^tdsL&g!$L@MbGZ_oz!A$sAerASioU z58jed{TbiJSIINmNYB%CWN8@h{%^d|rm?RbJfVNc7MTJc{*u-5ZCq2{>JxmxCvf5i z{f-yr`RES@dPsRT8o!Mv`jng)-}-?&9SL5(3puZVb@2fGodrLhr~@8jnjJ8P=pA%1 zmhgdWYu|V^K8zpq(GFgcWqkBqTk2^S+;mK>;3VI4NZE0ES3T{*laF8=(DB;DH{*|u z*9KiHeql`DKl-5uc-&VWE$CBnY#gE^-CTNrzCjy)g>lJ`a2P-2gO0^dM?c|)pY%xS za&*FHvS&>5xzSsH^b^^_BlIQ{dF$`VeCQ zBiX?>`k75fL-Zj(cvH5j`0o=Q^gZ2zU+|_I_@~5 zN-B_LvW}PdrLMS^drovE$K-=Qf|q<0W0{}eKnwhI(gh{|bfxh|Kho32((Ku@o4?2& zp#l2pm;8VY4#o-HrCq)yx~q#f+B0s+9G!+wbdz!9JN-Zo>1Xp2Hjl4Bf6`H8Q`>Af zdio7_V?;Ul^9{&`d+;H9?$K>@OKmNRC&rPo4*aiAd@-K!lWgIq?|8z7t80A1gZ(Dk z#tyj1FI{Df!O3+zBlqkEI*}pt(I>it!CCSMALBva_};huaMCvz;7i}=cNh=280+MV ztXbKHZ|Fsi!C_33Il77tCC~cAS2{_3ebLo;OUJ?iKJ>M*0cJd;$I%I2$+@wEj_j&3 z>Z(H*^Ow+`ZB-Tx?Px55>FK<7Q34B^+nl5ZAD^LKWho9H@*MJZoeb; zY`bBJQQn@Ilw_KR)5Y)}OB~n3{xgpqZSgDdU-9E5Sy8tv+d1B~EW7$8mMS4PVe<*y zo%!tn{LS-y>%<6^7mvIx+gMr%VZnjq*iL(H+wDF&u`T-8b@C3a4-cu|oW%rpCO#+O z_w&T+9?l{MF-6Ha@hY!Q5knRGg!BCG=KI-ncP#ejSeuCfpcsiiyKa*&D(>M^LF|NQ5h0y7C`TnyM@<)@DJ9llfkoI&*&L%$hN3Y6le zKqv>wfD(ikMXzlNgn{E|sYeNzu&6`9mvR%pV6;u37L8hn}eTt3pWHdQTjIut}*9WD7?qH&*1WYJ8urb(hD~Aq0@RD<-90gwvoidCI zW6n5mU>QmE!OaQaU@&6Z;Xr_w@uh6h1$~?hF8Y|ff)#DyYT{A#;HW>!TDxe*D41N~ z6=#Ps>M1EfJ*;& z-26b20(lxAjK6$YTe5uUiBppp$%>h*Y?2V1f9bNe>c&$LR_{#B>KuuapC!>@>9MC0 zB>48siXh9Pw@Kp1iXrt^CdqWik_VG4nVN)15`=GW0X}c^rUOX=^6`gx~ckk_a;(q00Sw(nIwqneLB5-Q^y1C1{Yc5&Zls)=N zMxOY;X0&9EM8yqr?-)zSP3X1>Ot$;9;!6@oQc42ro+L^nH%In;RZC>p!q3)_k7tG3 z19`#&KV~Jtb=({BbbjE5o8{UKi2Pl{Nu0%yODt}iCq}G(lq7yGvb{9PPwh!;9dztSd$PnzxBHSnW#BwrVs%zqww@&5 z-Ju;L3Saypn}@zOIwc0Lt#T`Yhh54*6HQveEeWivF1n?=@IM!{Ej_0lvTHZ8-%eRc zm?y)NU_wW-HCXkTV0V&PQ-a5n;damRhufIgakp){ z!(ThCIRz4#R$N-$dUqTGuvv9xOm3ZfRygWw>2>$E1l_FT;_%GMlZ@eum5Dc8c}GhS z-gaG<(`Q^Baq6@-Udfavg}l@8KJpe?dqT(F)e|x{{eCt&V5=mo$*BY#8C;{An(UCg z!PX>nHJiz?r0+S%96?Wnak%M!69_>&I^U#$?qn^7Tl`OcR>A2Og#{pi!=!(XK>x7P&Y1LdQD!C3pZ9NhmZm7VuaQi?6^RC0KlrL2b>Sfqr=AHj zxv|2{*inby2p0S{PeCuVH{U2;z}4KugJur8m@KI$faIXd^leNCcF?PI3|#P;j44Y# zY~KV|fjaF7{_(xRNM`A5!4h@ghMstd9;G*xC)@Z+ci|nGrjzsoX0nM_<{pA9;29Yi zY5YeEhdv!eV|1xN7u`cv;6hgD46;rx=m`Ny_u!82=xn^;FFJw=EZ`)&aB&UK;lWZ-Mf*r2p8!gcd%xsu`1#IaQG&iOMHOVSG z1fSGHZ)1lnn@4IB{$;Doo5_IwBmj&pGNoTK1t()bJ#^L=J_zEHNpo?0fRFi^^7u#Y zm7#yNO)rxbGEI&JTlxc>I;2=0}jRm-m=x~l77*I z-lR7KeBq7H%CM98j8Al}Hp#W`>=e5}ejRkcr#|(mCZFVvuA)!LA^wtgt3}wbvQ^5_ zdEj^ARULk#5qgq6{6`b8pp~-vblv!e&pCN0NSkDx9j4#)M<>w-;C7F$Mk5JUV_jcl z7`+4z;co1~fxMw9+PFuTpc|g+i%j(42czG0kngg&c&{ydH+~&pXJ?EtZLvijur*H5 znw-*2cu0o%4CIKNBIjs}cjS<+Cu_zm-@x`_bECUPqO0(k-ZM_{SbyZycecuy(xoa&J+eXb7=I@QU?C!6TSjv8NVF5Zwq^fAWR z1pGH9X3w78U?ubBA8bFp?BN*p06+PEbhq(Hj`@=G1b&e(*Yt^x=wAHddy;A61dYh7 zu}D_YNSqXX>meQEMLA=E?m}O1(MxE7R>pbpkevF?_kh3TjMyf5q&LYN9PyQ`z>mL6 z4}n2n`bHZt8=uAsImUCo2c4pQ8w62BuwxEU-&`#$2WMq>s{|^c&UqL^)LZ_ zjK}m0yx3aT)HTM)9UAfx(6q);Lm+<&4*Ffc#wXeL9gWNhltCXp3;EU;`6^udI4J96 zOVq(LV{LBqA{wzpXhH9xqp__o<3&5h3VMN^9&r!t$*z0qv@S>IRO3vs}d>~is4t&v(J}0NHqlLLX|HFe#>K2~|yjpj{?YbW<&-mLk575tS zao;Ty=Xv3Utm;i1%ES5B&dF{9iBoNtq?efMu~~Gn{mxspnD46lvQ4c;5ZOjj-16s7 z#xKgl<#sspd(R}jj?7{UE6tYQbbpKQ*`Cr)Rg)7>w9}I#;5Qt^oy6Z(ug)%R-@UcP z!NjNRs5Lb)Upswmm)M?|rj>0TJQpt$zY=>CN4zhq^2A;rx<6aaW?{uKr|sMAebDah zL39f^eweK)Z_fgYT}HNRu~;#(TH$7)#O2?*xh3Z$<0S2@a1;CVu)N*HZcQA|u2vqf z57^!0B;!0hFSaR>xhxA3#CAs$&lRUue~%>UT4A{QqT;XdvEGCm=a3O$Tp4~sYrrzJ z6cJ@du``IXW9%qUN}u3U9u5PZ!ZoNFoed@241)ftjZ$1bwP&D}5o2_;{r>mAza?Z0 zOa{1)!Jv9^P>-X@0W-)AcupH7&LB~06g-3HI|B(fxN|rt_ZrBQx(O?XlHpTNyI_+X z5`-{upd73O1S`eBP*f1H6ol=!7;Q$5AqNZP;JqajG{rkQI@(}CUy}%mjG|<4J(VDE zLpkG@CkzD>^i>lS1`_Y^gW^LQE7lkm&J39J>AK~`oD2qmQGpAi&QWF%D0qs^%12Kd ztD|j>lKXg|Z!l4=lsyHCJ{%$PghreP&WDwXoJGpoq!=D}tvz%>XSCs@pl4H>Em07K z8D?AEX1VRRJ~6-h-WP9Z+5d^VY}K}u^xm~52kRTmYv(Cz2EApl*CyGvFv+9aZ^%UV zrU%>m;Ep?c|8Al!Did74;L{7c?|k99JbkcTdosZaDTxD5E!>=h$AU}p4!Fzm)P7b7 zS*0dXAQ@ogm(x~-Pvl%O|DGgB?&(fH=aBABcy zb#Uh;o(zGf9e$YAIhTEXu~E{JBl81Z^3H-N*-O8=sQca*Zs@k$Cj6NAindbi zh`_h`=7P;H>%JdaUG$lSF({L|Lr>Z(iHO~zgm-IOYu=t^##di_Yu@Lzq&p$WvNQhA zVeLr{I4r&4-Xs+7Y)Qf;3+@e_XLWD>z)|h#icL4&q;10~0b-?+q{eHqs>n-4CF86V zyCQA=AaFATTW1xOy^qn&DmBT|x!=60yW|TucH8WjCCGd4(9Jw%G^@v^2G2Z6o2Nyt zJby_y=PQf41CN>9osdM1CpH)d$s|v#;CHP~YXZ_J!~q)U6ZbbR;YN34Ymu=8=7JyG z*7iF7>Ag?nsgY+JPmfH3hH-r+Pm)+ID8V)Vl4ae+pS>=t!glXY4c-z`yTm}wzie4| z(Pyu1eOoO#fMOF$?!lBUC|; zP?fFA9|=!ej{Z1&zvxp7vu)zD-A)YvB&9nAjaEai^rK+N^g~;EgQAN z*h`2mxHM1ie)T6wOlAemxwE>x1G80QlFp301ezy%cF9V=!%v)+6=``gJx}JKgVlkO zj%1pX@^H4Vye4$L;L{5l{D;h*+Md!m@bJlblIXE+?gclsm1?W*dZN8d_LwAMZ4GH# z&864h*L~-+3%jR&_KP@2J2waFkgUifFPwzMadzf?cWFy#z9B0dJ%MBOXG`RLFAh#{ zu{9!Ue&NA}@iEwTv^G(c z3}6$%;Y1H-1zFGwo`Q>PEWG#^f&)JJCG4_^jsOT4;0JF!E7uc;5OGI z!}K(nA)92c;KF0NR@>fxYCNL_n_~`!?)cz7c?Z9~;foJs#<*j91%b(t`e?#V;*+2; znrh2+@8CACBR>xER5H~rY{VaR`}NJQ*-Lr{U+D|7hlT?E2kIGD{2KB@51`xo z-uJ#X=VVWeMYP6ev{gs)TpI$80&?UT&g2O_=}qI!cmxZbOCOMT^a2ApE!g{b)6r

jw3vgWbqO>fh=<~VnBY!|Wc&JDA#bb;Sydk6X1pkE3 z0zWh-)9eEM1b2Fu|6xokLx$;N_Lk0st9H%Z`C<5=U37;lK9FzqjSqA{S1{lu+hUB6 zBW;p#I*okLGsZHV7WZ8T?sei$6=gH#=wPtfWF40F%C{N1{2)y z1ib788qsOsQiiRU%q26%x5T-7cugM32l?aUi&5#1@4z0YhaYSt8u6j&8u*Lf=ucbh zFId=Ke8b}!qxFfdaN`598*CIA!gD;)pDp4m5yDo|-}p?f_^R}sd5DwFA}8qT9vzNO z>M>qD>NpF$?csl!d;L-4Jv|%^dFnS#A#D>b30FyCBzW9Um z8{VKfJL4XECb*9$WJ_7&41K^zSCB3El4IkKjG+g3=|=Y77^H9i>7V|oea8npLqlT& ze8w4`k!5`cwX8sC&db9Mn==}Po5 zPcg305`XZubblje{uKR-A9M*G=%1`>lg}r{QcDxZrNFB(N%YL*Ued;dtGX=v&#T=UaPpd-d6#7C$6Fpud^ySQ ztV9z#6}z+WVsRFNT=uOS8+@m}B`bAL+NZT+@rZW8v1b-OSRkUFU7OHLqV7ANzpg!) zzFXp~wvvS3XdcA3bCg}SzW>!5+NwJ7VT&gYJ7HRT2>rRlMLqC-(dQOs7pm+Kl-JaF zz07Ohd{8$ltNp?V|HBFV$T9XY`0?xYcby401BA0>ATcTiG399xISe$D1B#IXrEp9b z7<7uTFVqyLLA{}bn|2L?I{kKNtJ6e&qY#V4gBh{$m;IjPh2a!k`}Th(;r6BL#}aN^4-#%k z7PY~=p@E!+B57%x91apK*Z$!4w#rTN;Gt~c_}Z-GI^oTkbY-QF_oH2!q{e06T%2v^ z@@}vw#o2E-APKMi+g6ek=?}l2?F7!dsXOZQeY&GwKfSGTlJ)lo4dnL-Mc$FZF!<$_bfe@aI>^o;>z;e zqw~~+?JYOk%u6gcY4;@6B-?gK0`lP5Q(J<^{<+4&_r7p_TNP-_zkLteBZ;1DU76Fi zkoN~)NwG`6d}BBJO$Q{|mz5M*$pd~b&y*m#<=T7mM900|DZewbv!q&LWRp!_wq}J} zOSmP0b6n_R%SrULx@qpWlTiC|wu#zyi+^m*5F8q>cv*OLlNZU->6WNPbC0F;3(e+NcbeZiFS2Wy}*<-JX zEn#F^Ov%m5&%ZfiGEa;qX~R%6q}nBq%d-+}(T`U)zFI;~{@`kb<-G4NYX)`3tMg8d zB!a!1l)OrcT6On$-g{?drd5cNlC~b*G_rvgWB(g$X!7C}Zr8RlBM0u(j4*hu8a4L3 zo6u7EZDoS5gX3XU;KGa_@;Bk-36>?PYt`AlhwRx_SAtI&hvefv zdE$k`@#F7g+nBo^?VbBluXG5zY>;adOt#w6Sd zX10KX&S^FA5EP@k^=*R2{(*swV;f8|P4EP&OkhpW=x(;j#E!map9&tf+JQa}FtcUl zd$6aI0Xsnq_FoyaFrnisfdNefbMRALd}cFEZcPZ-e3J&Wl?YG|&U`4gl)q8* zX++5%zn$o%9sRM*Ca>@kR3#q;I~&QqgAx6d=UedOlrK0;9>HufU6UT|n-HNl-wzJx z#D~FS{6j;wTpjq6LB1D1NZ;(giMf8@!{0F(Bsaue|XIf0JM17oD^xxXQm2EI~`beE}tSlV=le_l-3$ znCyeOf^p~tKKyWxFNQz-Ag61_8+p*CU^N*uUY+7OjGBKKJD*E{h-?UC@vFfAM|um-@rhg+zs8uL06w^;Z+=xkhrA5BZoqZ8Rw53J@l#y9$#htR#?WrK_b`h(69{3N&9F{eTgK0i9r zw|L?pTkgXJ%^h@^;1!uOw=*vgP~y{*HFG9>kmw=L@HAc|dj-R_FPKZ!E<~-%L)eK8yiQ*pbuFsI>X_dbIxh?(OF-D*n9|d!v{eCx z?~OUsud!`h(gENk!~7g_Ngtqxc`CWYD+k?!9`q*s*%3MsUCTz%IqVJjq{GXu(-Uy{ zhky8o)(_cpuk;amkt@MSaz3~s&{+hfeDho)?XF@T-|zHnp9 z*ha9yNAlQpb&Or`qdR)2OW(8G#uuGY50=OLNG{`Ah^7-(V92i%UMaDtVyDU-CaKJ_m7~{9^gWrGfzTOY+ z>)hvlopbhnv7f|aZ8U;IUryzs=kqK6A!vB0uTThDRT^8)n}ORMTS~UsT1rpCFiu(* zUMJtMf=pCwN-4J5Tnd?@mv*~$d^O9Kf!QH#4D z>-^c$nhHi~RQ1NxY_3;uUHBdG7DIN?$nNyhsJK^AC*3Q!6XT=ANQjZ-krj?IKi}noap$hirB-a_3dS`}EwlyqtdmV8hRrS7mY76IrIs&Kjjlb!%MtNXvMo~p%25ls z_>I#(iS8U?DxG6Yjx#R=5=n*x~M}Z zLnqOCrwlf}oz^#}{NLfFEq+Sk_4MBXHpOXqLND>MME>h>z-_yCwy3M$OEqU>ltV58 zD^m%^pCzYB(g&C!!w-MUe@)f8oLz9S!g+-o$-if$F;S%{i>3ec7(gYV@E={I<1pm@ zI?87ZRR;L#vHMi|B__w~$lwgy>_(CIDaWMi!gQ-4^A$2RqkpG@ZYm}9XnKL-=!f0@ zY^|s00?JmVI?CTOC>{NX@hS9!=Rab8BG%}O>xoCZ2rUjo+~wKokTht z42$Ky3~;uX7=N{`~%{%I$>*Dsz$_ETs+&paKa;6sjvFuqVd?6Sa z_+rD-_QpX-Zdg>>jP5qt+nHK$*(g-_=8xF3{8ELS)w%;+8T3_>YbbE}C$cJFmL;X> z(&MU+>{P_~OL{*o`*zIra>MPwYtGui$)u7Yn;z_9A_T9H$O8T z^K4)P0x(<7HaYb%-&lV{da2GAfq!Vg^rSJ0=^v73`O}WwO`q{lnM8rls6~YOVaxgP zMWmMGTpxb`)tLT%;%<=QmQ2q+ z$ZWZ4elwyf@E=G*qRP1v25y9a`+W$%; zXivEz_-NU+`LDdi${$i&p?>stk{>|~-mx!Eaj@N&q{5zX?H&bm@V zhurgQ&GqcN&8;V&2U+x>dYYw2b8&;&cWsyxQfAM4&g#uf{LoR*;6FxhMZduk5_cjz zZS4ye#ubzfyMG-?3}`>}JKfu9_|HI7VmVGA=E-lXQuMWbkymNF4K8*bq7a^0M7X}~ zB{Iy~IAdnDY3z6M-yuIDH-2;W)oBneDNHEqaf6q@!leUjc8~?pu{E33N|kf|se0s@ z_i}0|r>K_K4?~kIU8qgjla&@VUn7I1h0E!=Pq`&s;VI*c*yo=P$F{(M#W3=&uGGd@&@omC&s*_j&I{gGM=~t(77L1p+YxRBt3q>Azsg68MYe z`K|xxzF`v${ctbc^yDdQD6{v3l~_ID%tIs@_PNcA-i>dT7oSCD`Yn<3fpDaduO??N zGObPf-yAu|dNL0a*|*0C=3BFgG`DXOYIW8OqcQQ^S%4G%N>oa&Cx{2$mLz@iU2t-% ztL|pc5<|g_sV|H_v=>ttrn?A&X~73RFFU3BkW$zGi!Q2~5@5fehj+XBi9ntGsJub! z&o{$j1{dyTK8i}_?Y)7I&Q%n%*nAvx72+Hu4*+8SrmIf%B>^#aSI9)eFH)TPok~2j zq-lcNcjgOqkE(~#%PDwo9S%uES*jDmXzSz+EChqEaC)1RMolGikK)k))s!mj7yLyE>Q^fn)00kZn|C81_ zO&ty&c*PDXG{E2JPRdI5gQIV$vk3F`fd=XLuK|3ad$TKPs^Cl($0>*aU4V#E3}p#6 z%Sh&3Dm#8wwItcaschv_9{Y+%T>Gbn&Rq){VSkeI1?l>fcLNV?v~^lD1~+*KC_|p{ zh@l5OJ$is6jT)3-H_Pz^t?GY?87MuI@|>z^1f8T;p;bj3Kr!_N=qY)GWsjHKv0@D5 z7;rKNRM}>!PIX3V7G1_S)RFNb^Nf#*YNQ#nrS8*x1oQbbba(x(BO+Jn=&H+Y{n$h_ zk8OE|x{A$D@x~tN#y)Cdl_NWzxc_5Rlre+c%hB_*?M%4bv{WI=;mK$d)GNn^&F`d_ z%r_rOo*60QDUr-I=vKlI@M4fluG(+2m_8GTmrma$yfs&BwfIDZchdMZ1JPzjrc}>R zhZd-1Gbw4dd6E~4a@0s%{C5_y7xy!V&Wi?`4cJF!*n1IE@!IS&S0{ir(*LO z0e{ib=i@XpKSS#1X$=<)i%%2Zdyk2Omvs)>MO>AY*ymVUzZaP3RYXqvk-+EbmC&?Xr~GCvzuBJa8F?pQhxqLqqlkx{-QaYC-7|jrEyon(v0QRKsb^==&9brPK5(sAu&Wr6;3|j=UE-c)k7Dya=#eL}wa53E@4$jZ1AbL%6NhNS^f& zWwnJswX*y?AyJ?#?|xE+1OT+68o1CRif@jmH94IPgn(anG6-0Tb;PY8$D|GlaxVSO zG2ntuXi^71`AHZ5T{TuQjO*+)n$QHZ_jk4mqu7jFWIM_v{Q?Vs(@`eBi(N zH;}l%VSD5;HMJ7UW5&?#X?KCc4H{25FUhT)>HsUaZYXQeiiL5kD@$_CPs!9u$aZ?> z&j~(+_(XZwARFSv|1t?qOB|oAR^x}R7z*rp zSBpwQiaJkw*R`qZf2cSX*6xGHUsmmW5NH|T2vtaZ6;Pf6>zj8>Sv^GVD=$n@+NDxP zidiq-lKUgkSx(G(++TEB#$9cNWzxvsvfkwIJW*5Z&rmv9R7RU(7t$EUl~dw#iX>!Q8O2&}Qu-7AJl9rR$Lrkj@_^M=_4;XLWKSACiKdb+iuxM! z9`NxF|K!Qnvtfos3dS-@`c0GeyfpI4-yePd6H~ftg#Ea`af9K54I5)zZ5JnhOq<{E4^o$X%zS?$`WJ2(*=`z8DYZERS$fSUi89NsKUWyL^JRbx*@=DjmO(mngXUp1Z#`?d=Ql8?gV5#Q`h z|4tvKP|q!Ozcnq29#115T`Z?iI7p@I*WdLG;-JC4^yWZ2KXDZ_%p8pO;p_)g1zSj={51N)q|9y3pY%^G{XyiJ#5Sgx^+(?1@U)&RA@k zew|b~wl;t+_8ei`sSUCb8^Ss!61vSL#V_o$sf}sVKi$j?*6vP>?usOdEbi3^w5J0U z$j9kFr`E3%Cfhmi$m6!vde_?XH-M}slT1Q~tJhLL{t;d2xAi>-DDyM-!cJ8oCd+8VS-LSP976_UwNjJf5hB-K8+F^#=nGQxt9+8(nk0)dsmcv)Mo(O6~`BS9xh!hTG)Fm z+A6;s4u5Z(T_wQ{#7Qe!j~;WYX7;n?oQ|ZoTiX@=TDr%aX>>@k86X z)6uq9LaEIk-MMdt>1mnAwl!0qK4+}zWy@Qt)e5tyvndhh?C|^u(!-CdUPW(*zpSHP zBd^yV&&{tjv;zuO3}eFThuY}(Svy8Q|IDNryItmL;aW7hv!Uw}?)tc(u1^~4_xjN4 z#8|s5cIT;BK`yeQn7!S`>ou|XCsh&%P_c~D*{ zE+3~g`mUZemO)T}+cT)u=N~0g)4n=m;yP**$mQ^?`%o%r_C}GWljeW<*2ZRHmyH*+ z;kNyD9xCtJ)b7_{@P24vMy~`87O4-7{vMleFWmMLoUfZtfnaHi7mW2TKI^P?^pn(0 zt|GHU7r|-EVgRzznG{E>eLn@q1%1%1pcCzUY6>{o`#9Jc5*dpHq6HK1QR$5 z^C7)4X%@}Q-b;=t*gGahI5?e7WS)*Mr42G5%TLV_z|qCudWdRqVqBjlvj*>8 z)xV(XSlIy{r5T+V$zNFD#&7)Bt7e^MePl8umss7lyFLLsFWlaO@<_q7{Iikig z9;V5vibIRH8fe}{Ymm%0-P?IQ0pBK?l!7Ghg2~1LZiOOvJCOI=$RYzWeEZhn{SVV#PFgoS?Rx+n3W} zVpNF>06{ncK5T3HUFVlc)EX^Ycg7gqqjW(IN5hKh@>prtbiS-FslL^#O39@Vhg4)! zNSr9@^tZW>tK6tL~Ax4o@QTUf2Xj#!?&WQj-Hp4YkyD6?thhk-5|%b zv`>SucMP!_PAp@@Q$A}Uk2aP@{$3lI^MHVr11RS#Hy#=+7n6DS06;~CRgot3p|u9wf3_p)AGe^Olj zZin9+H6Q*sxe;-*)Y|D$xn#zuHYBOAOAVfp#+lOqy$W3QS7xQ#E1H%bJvGS0`L~?4ugc>WbUI~doeO=GL1HFTXmonj zk0H|%r+yA8A0A^U=MAclU8?hn2B@U-2^Mjkng{;Z>ob;V!qJn#>Iz5~JrUet%v8lrvh3(JJGt2^4`V8k_^-N+4Ix$l5CZU`rf7m_Xy;=TB zw&>Wk*-nqmyHO`rSklJIDqLr{@-TVC?+iiaH@Fwl6n;kM2teucp0sNU@=%zb)c_s9 zEJ;Z;xbc-LN$4YT`K{Clqz&0^z^?f`>Q=q(J6MXUQ!W}=MREW7Kk@LOlF~nAYPK_f3(XU%k4z2=Xy5%{41- z&tyldPAZ1o?m8`+KWv#_I-y?qJBJL!y%=;(9dU{3&2YPYj((`3T?%uuR!Z=RxA43h zM)TAQl9Uo-4*ar7R*&mCc{QIr=q9$z??~}AA3QM2(?D|4uqNbqtAYAAE7YxQFAJ`g0r>jx%a~R#Tsm!qyf#akdi2d>qa7nijVb+>Z zYsh=ti>;F%=o6ija0;>gR-z)XQgfldX4Z<_>TgpE#Vc(47Qf9^_|qC_?)mEHD2+|z z%^MzEP;xf)K!4!}e?R6K{WV8HgiBuDC{=PEa~Ct+$tAllzoTcz>2zO5X9I+vF~01o zj)lFdv`xmLI;crROL?2OR$;LOY~bL@zapR3x-lE<>ROc3`^Wz>mj%|h0D>O&2^VSd zZ=VRA?L84wMwAX~o#hFZhW>k;)ZvOuq@FVw_i*^iC%}%>2&4hh$oh`@es*H^=J^nH z|L~pc`RL8EQBsCV0~80Gv~J(oKsL}In)tG)B7GYVYv-=^qQ>=P6)leWZqQ@>Vdf^* zm*qe-Z;{kuvz|+&%=#>c+HL0y8sNxZZBvC7ZgTw(ndW_?l#Xrx*>A7YsIz!8YkVlv z9w8RuHASeMjwV$=6ptvf5nK68&AgUNi6-w7NF9EP+<%%)wgtDkp8M6AmvVtG1v7~A^yTxWjjPzVzl1#*4Dg_%2{VQ=X z*uQ0$p}QCRDU$dqZm%4t34fEW#s%!mM8D2WNYIY+znV&dwfK_B{#QHNl=ZKjDvz5B z!B=~^*v&ZprNpQgJ^PK*x01R(ZjQ3*37ZNpXUG5Q-H+=6k3eqz>eI3HHbHq+e0 z+bIQdi-_n=#ShuxcV9T37s|#09CzHbP1p?Yv5Ec8Mpe*D8a?bWPy{5MAC##?V%Ix{%++<^BtBD%mE8+>%it z^OoI+JmGKr@&eJ8E&{v+i!y-EL#oA*y84l)G*&Z32 zBFrY0D<#LJhn-u9EROoXY}DlL@G=#%py2TyDTYObw1!%=fT62wE}ww{1sJYFMez z4~KSH;H%jm?6sp;rxOH>G*VX|S0?o5D z)HlP!Zw}kTaczw0Lh);X-F19#sH1|kvy)D*&?WIl(s4DVf4VKX`Ht(fjMhIgy@lnd z1dZuIRoWl#*t-HZ`=wDq&;qVRxJQ(8;<7o?omUyTc0Rqx(Be~l05;e-WN}PfeuEq? z7t!moN?=J5as z;S$ql(Qw-cG&S#e54Y=dzDQ4Ja%|tVKu@Qnrz}4LqHq*56w6)+JxjMKqMn93r}NS= z(8<#2r@SRr%7fP90Peq@b_lMdj(_cGAyF+7%#TGbSW1C&RI`zZx4_klwXQ9w#64J4`=kymnQ`QTrygI`&q$8l9Ox ztc_MF+D^ScB8EbPln&gj--gE-u{q&|3G{`n7+5hQ?6G8t&qWz8^6=75s_ zk7aV=5lP`JAT}bPFuFJ-%PfWhXKtBUs5a5Xud~VS!)d~61Vxxb-NQiRwSU>dx2NK{ z7Ry~PwgT?i&XuAuZ9CzlI+yU{qpVX|6X@>FERA3}AQT?*kLq%WDK z2Kw%U8ypE^J?UOSVmY5`g~5=L5|YzTz%2xpLL1^gnDJ{otP;0I=+$(ft0V6ad1i8K4#O#gJ|25K@ zAl56i(0E?bVKLO9-n3cpSQt{RGc5lS^$?eP0&Mt_b`1(Rm-#v%o@z3x*Gv~V>Qh&(cM@Kp1o=R(6ZgNzw6_* zY^xk(RKFgMp)Qr@;jK8HX1_->a@8jp(E0dFlbcul?^#zTK-mUey*dLnGx7119fnEe z>;sske2{k!HZL;<|7wn@@LdRkI?gs#i<1QVnqP18bi}MLnt>!s)mzH5x`vfhOeQnk z6-dcLE2gdYZBIlAh3sKr00-6NPt+#=Rs^afmN=e1-1t~ zEAg%rZTFZ^;%3Zt#q}i)n59^uZk_{ni3O)+?2tBtZeg(^zb zadsAxQYvRDHN)qi9#E%P!F!1*+hu4cp~jmA#c}^w$0gOFJkEw7$#s~=CCF~)pg4&E zp)h^uN1i1v7Fz_3DoVSR{&sf^??MEo8^G<8#>l6kS%ag&b3m+;#AB^wc6;c+9+s&9 zIR}Finb*?^n^Q_(FjH~cgACINki$WN6yz`N_8l5vq{@YsEaq(@TY%!yG;lT0j!SDBH*!!x{yJ*UyzE|NW*==w zrHLL;iG=~fSfsbTeS8iqtzyC6QB*gO9}SI`fcbdd3v-z`jxW0MgxwhS7(~O*|i8z4_0b)Hr^o&2tBbnes~-}qEyaHkTN;`!nG)O zq{sO8=|9(rK^$2%j>^p=T-pEz^)V;0hebf2wNnghYdI+>+fnIG`Zbl{5iEmf z7ANmdNnXEzpx#2Gg$VzBRxY9{bDKDW(``jL_uOW<(1Rq_8rdDy^^xE6fSDUXv-qTr zPjRmuI>S|u-%9%=*-87icYBw&HBm)w#`UtQKK(Ic*|x>};&>$9%Gj&b`4r%7{Hb0U z&C+!}(XXru-qfwpvleH+RwGO`gd~j3G0leFAC{=nRg}T*Cov|S($efdcJOU;^t)uz z3gAcPa`D`&J;#Dua`KEL4w?9pf=q*E{_>6Pg)q{zeC(TM;2D(n9yZId8}>+VAKk~> zft-CWLy88>KJZ0Sz3UrqPk!oYN~+8af=seFQZIWY?I?Nef6Mt=;(r_rHPX?aVD?rN z*`M;fvruSRulpzBpQMish!bRumpo!HrD^GVXcq=+r1|%yU1GP6x{OEXP2}#)Xx(?9 zTC0MZG&VS56Waeq@v+enW7%PDfu358b^|HSWLGWB5XwZtPTJqdO+HZ8{WW0QIsF4c zg~?B*m?C<{6|(n|g=1+ zOhP}m{;~W{O!rRwqrKOe{%<+dN%hO_Dj+AIe!tS(C%7xv9QVp9Ne9_p*&{aP5KmwE zN+=2)e3$&@V6A`8#$v<%_gKQQn0nk3_U|OU~+I86V534$P9wBxC0RL$9A!pKAksDdz)?(11*-*%tSr01z{GiNM*_@JRu&!2{D zP;aSQmf72^)Ry8Rrc5may&WQ>Lu-4+TRX}^4KyTHWPfSxH9CTw7N9pS$GS`$+-Ls{ z8khJ>jJ2)#6)fsXJ<)iUJu!d%@5V#vGAmp83h&6q%*y=ME*Cs%&Su^tehBvH5swHz zo%FHB8wJ5aVqr{0K~svcKKlH+P@)?CQ`9@Nv9U%JHm zJ&&;o%TC=6wN94>Apo-yErKhAv#xc8AldZzWksFT_(VQN0raC3K9RVe4I4bfTIN9W z30cB~W%H)MXT}}pZy7GZaBND(&DvzipQK{AIu?ey<6}z<-IU1io z3TP6#{!>Z4n5^`9;MvGB(txmg*xB%nOS zy!_hTd>P&|?oP3_y_D|9gc#Xh@PajSJ21n~fqw};cfBY~Dehc^L$Xcc*6+5@@Cjha z)xOdIHSAIhBl$oE@1lSBZ2*s>&5@X>d+*)vlO6t`zQI4lyE9E%BDj#>LpJCP2mG_x z_orfOlb>#zq*MMCE2PYU6Y>8bvYtI8Ym7LEanMZyZV$rb$Cl}DUW1jFQk7~fw821>XU6JGh$|N*{ zVXZKl%vLDl;8oOjYWXj?6B|2J%zAZ$g~bNTlc$)Rvy-yR?QNFZyN_e;%Zv)%>YQ!D z#IJ3?ECZAbD|AruZrf9{g*JGcd@dL7&=S0N(i2Da{GeA6g3@i}>hc&}Y`E*q36s#FSyXT-#i zn`9Z(dy0jYbyikV{J{e7MOCNlr4N+@gdo#)4}`!&oiI8G#r{;H3Q4WOM*euT2et)Q4(2em7_%l&h7#>Gg+pGrCpBO6vbgMB-qTkzBM2^u!y$rLkM7;Xv zaU>ejJ;@}jZ6XzXu)X&whuH;r${CCuh>$%?nGx0~)Kyhe^3o z7y|MMQw!E8;gdct2#eaQ<3}j*1$wq4P2Q*T#+j0x_C_p5< zo_wJ1CV;E` zijpTaMRG-!O@-_n(>tjVC2w97(qj~b2Zd|YjIo#Lad%s?cso&gO&1l@LdMrQCuR1d#zeRMl~|dfZh6ct%|CH5Kc6b+92){m*72{er znXHc*7N1=<(HQu-A7J2*ZZ_DF26IzHqu3TvRs9j%JAYDini2S;d3&Upt0?My?shM` zbLChE4g*xUENMUbjo|_e$rlN!jc^YEcoH%MS8zN5Y$bHw(ED)%+snVc&rypHSWBD; zg+1X#`kXi=UMPhNHpfg3oE=Zh_S zY=Q7;Tmad{-(382m%q~Nz82c(z2QDluZI{gE-)(W{Jmu)+OD`m0eP*n{a9*IBp0_O zyj1|FcwOuiXqBFGa+zgS(;3y54#J3kb+1f>h~krclM0eX7x&-IV^aa{Wmi!(<8hZJ z;%j*TZ=Uif1BWtZZ}Z{%r2clYsmIVJGe<6vp3Oi*l-wEwONrqu}GZp69q?fjm0j?WA^CB+WQo>j~t`|+V z{=N!=3NVD)Z_LKA74U^W+G^QdR!fgM&j<67u#&MAcf^#-1C`OE6Wwuy|4`rQ5AspvGpvr}v){NWwU=A%WTl%;X9JqdXQxjlmc#lk z4{~stj0^vg#C;O`3BqZrP>cMc(%V~r?WUhIX(w><7@aqTe6bM0b2_Y}wU6ToJL!V`VA-nxv zi>pNA*}(7MJO4^&zGr9rGG$lj-bSpQszCA~?8O#PBxE_jhU|Rc3MN?$S|imdlcm@e zm_9DxU*_LgE94XKx(JM{IfsU@7XrLX`@jq}USru$na#nn2SOo%_RZ%?XC|8aspIJ$ zOJVSRx*O~g_qZ@3S2i;TrzD(9Cd^LyP@+|;ECed_f1i<@wSZv}S~=i4L>)8FRJVZSCteU1 zeDt9Jsch+OveKCf9epK-0DY*D@(Krie;5%#c<}pVYw9|lKUD(XH0uam`>?-Eomwd` zpdReKUhrhvBgYNo;JZzoZu|_Wqfcn_$xvlbK7wo5wQ*EMRx(y6<}jf_R3y4(;+|Ze0Vcke-+2Fd03IntyfAkNv{4DV=@K;q4a<&MJoN2K_0c~uyHF0k|Tl-x%W)Z*o9LPI4 zWxY5b;%qo7a6)$|#B4t!+w-^+#4uZwW0=z@CqEdM9)iD`eBuohTPx)=lf-|N9bgNG zdd472lHE8|x7Boc2~{ve&K;yHOs4}b5J zzA?kQush2$zg&WY8)V}1I8cRp;G=Qs46 zRVI|b?+5Wx8%FN@)1`mBC*yK4k> z%QQA}Z21e{{$vGx>m%*vOVx!pXiup9BdZ2Vh{Pn+;2r%p)gJBjCel?;r_o~u?-Htn zGRFt=F#_O;t~zj^+NL1_*h+Dm;tnh$@j{<1gVr|2lAW_=@~vk#SIol`_)d3AiYN3W zC5pn-8f>IA!8PA2t~)PUSp!`{!l*2y(L{`E~u5wjHAkM@P>l(+X+b^HzvUpz(rQch@%E|IPGamIpA@sL6n#x26%C;p{pO z>jL8X!V%>dq> z4h`t9Vln(;j(q?{Jr7?X3_Vchiy@EPAI@&7iu^gc_=bqu4jc|RT4Hhm;JKnmplaB= zu18u4oS;n(o^Ek7A|cMDO1^BWv|$8FRD;h4@2BqM@MLAW0C!bFv5YquM(Set9B&wW z&95gd{1-V~%Lhlw$YX!SUy7*0Tu~^4V)71%1D-&oJG}Q8y?2=tB)+s@xRhI%2`vL( zeHNbi3oY->BGKZZ9HR=Jx#d;rEg|KJzk@m$GjM4HD#jv>cZ-=+I>X|&3CeN;j@tqR zQVYu4-K`cqw^iUgChr{s#KLb6p3vD)r|-S=m*SOrALC|lSi!lTSW+G~T&%e^{rn~% zK^~>gj?nRCZtl9I$v`~DPY%p7oqX=R`0T(Hek%|J@5wC}eR;u-mb^~eW){F0TV@|` z@K&>yp<);j3ro=q@^b>%qFs_uJG*c`C6=4|-R+E)M&v)Nd6Rq3ZSQ|vQ##;f;wmR< zz;19J_mSa=7Kq#xmr0Og$0t)ymzK$vQQN-7wvGBo$lRuK*EHu3n!PYb}EP2_#nK;hwdYF=6% zXL1;2BE(J&$5Ok;-nlC;f5-b@*$)$oT^@pzA_Z_gAyQje{#nI54t{c*dtw(Ex#MaZ z&>m9vAV852XSa{N8xkvdzIYE$uX^77_O^QMhIk|$;j0jgI5jkCdt88^n0y4unKW_j zRqn*7Yt5%rJ|lFiov?I41kT3u10E|3fHXtP6ky??KCiqy>R+{2OE!+k{{F5Ii=bpc z0n{R$bAtR|NtblvV%Vzf;&V*+h8?50ck5AS|BDdt)mA3DGHI{_Nv@N2-qhjJ#l2H{ z$!ua<{ZL}(L7)<>E;~aYJ-|Kv!-+ah;hD^!Vy8ge+$s$aEQn|pP@b+g>+U@>@7bSX z;>*EZnmGhV-u!6)q{~e|7Br%mF#ONyCB5w53_ht6T7kI^KkIap^=8KhY+}ZOEEGJXYBP)j6dYw0dNqk?H}9CSp``9S-0kpqRk$Y?Yp_yggde$ z#ire=q|#5*HpuCm_4WLp@Skv0Qq4Cqd49Tuh@G+LPyhwdNXairMSe)vmdmfz7M%G1 zE`X`zZX%~4hA9LiH^FCc16XQ|liXeX%%kecB%Zv!X@@BhH{55m!c4Ij9Xr(wh)Zma zM`bK7_9^wdo}Q0puK82ckqT3oN}psqW!e{9{;?kE*l#q<}Ks`P_mty<~x5q#gqt_ zavi@E#eztWk&q|136=Hf7j*vBNNzuiqPz(+{L{LE2ih-A2NQK&>th=(>zHQq#Ox98 zzD_J3oeg1i$4oP)-;nI&N6 zNM^U7BRp!6oo&rM=vDgH&*~!24%3v5;rhwzmC^}s5r1&_dV=B+CK?FQ zcZvx5wJJ3smK$*Nr2qTjaC?2jM7n@UjCHR6`;L|gh8E-`v2pSEV=x8(_cykvQ{c_k zI}uXOgs71)240%qjyajjzf9H)I3$l7%?-((FET++`hV$n+7b$iz6L|L#I}PYPz(JDUEu>#_bwwhV|X{iHwM`) zBN6HSTTzj-5cUP?sNGF0MY51n&7?BC^Nbz%Wj;?UV?K?)GPrh?RqHg(z=xAYhz2g&a|+3S?u>91&J+b?6Vr zeW}OU0=5G3te0lfmIU5+FJ-VrWgUT-gt|#cRB>xCuk>1jP=WmJpJ@f~)^In!Ys(q| z1%A$Ot4uL;rj5`p*L2dV`0nxuEp_a$$P2oxELMp?ZCrPOXB3po{fx=(7<(I$vGX(! za6%p(ZQmMcm{beGHiKTo1%p`o-uT9_NN&(ce(Jas4Nk4TU|c)i+<>C(uW&vc{&mcp zX5JlV1a7U+-kt*>Xd}YENG)<^AuGfuop3VI?Sp?2l(scAmvXcCDy5RV3`veT><9+Y z?jvyqhkMI+R_-BN+qvi4PWU$z@2jyB)QxDYwm#N)oH|N#!0z zxnE+K%_T{gq9WIjRC1T=jJeA__vMG#@-e1;G4Rb+SXU0NAt`Xe>LP*%IcRy6JyYbQ}IUN(uWEUkCoo#gNBbqM)W?J zm%EAomC*JooIl$rskJ%PuJYKx`UaQ9NR%N5P+N@6hak66MbG+pDFc}n>Dni!KmtG? z<|vqu13VQW42uf~*DH&)eA<8$V-w}W;|rKKj;lsaPUfg!bL*QpBx$isL$>IA8gTk& zOAs-L0x#u|CcZeDAE&KSK;de13Hxv@AH>6s+|!uf(XQ;GR2_#-p*=R_vJL>16q#sX z*6Zyn(?}C%LYe)NyQ*wa1`R^skxA1+5Mw}5W28^Y4|{rdBbCinX;K#e+!4w=c3T!i zAdIOdnuk;mIG#cmetAg-aSTVRFEsAhFpjchib>(&eZ(g2MdnAgm=OCo)TQ&f{gEj5 z&ZIn5z{n=$dZGEL!T*p5eY@N@1h{=CCQj8j=(2xKPHhWNjCL+7#p5F)($(MkqMa^x zdQ%Vhp9G_j4l=Bsr~8s*Fc)6LKZhkL3c53GWhQN`i4w-wXeShiuB@;6HwTuIn6>MV zOej`1Vehy@?z|g4D$V8gXOcH_uN*_CDC^wJm+5*@N^jxjXSUhNhv`R%8>{+7mC%&! zzN=>CLE@ceIIJmo#oMaJH^8>USbLh@*y(^@F#KZ#u`?|kRUJ4V1N5Weduvg8!RHBVYT>8@*mo>@K3EdMm9CKc~WOp7lKS`d<^ zH6BwvLjqxkxpH5g5ZB;IUEK1uJ~XHt9OZ6CK79u^OeS~`tj4HciXQx_;qNX;CT+;$qS3J+WfNZ z+;6_x)P#|!Mtiv*PO~0k1$`W z)BxeHJ16N?iUwReoK)35B~8&q{s9{);qSNyUJo;VU~FFVg|YNt`*d;KPBR?vMF#fX z@yp@B>D7fZ>%vFgplJ!A!y!+k8N&!vqir2xrI|2uj8pP+Ud)BzmI9wkfgDsm_DvQZ z@cH|%cYv(~)9iQL^R$y8NpQ~!)c8HuSv%fOLa4T?HPIYs8Sp(}?6fKwD%RzXqB+5mGeYl_AYpvxS!re;)68?*S z1#h+=Jnh11H+b?k(z2YdKvk2^dtua4*ThLYnl!tF2JAG6q^A!DyzaEkAA`w`;V5s z-OnHanC`RmarHnB_Np?gJdXASH2)$`^>YN^Ko^%Ex&%VWNpzSV!67!ctyu5Owxbiy zPZ9s4hy^%T&?O!{mqGBrh#~H5e@2YZ%J~DCar|>k<__iZVZiKuCa%2Cc?h2{##Cc- z)CoDn%FkWc{y63c5%g$E9BS?*$)ZyTdW*#g=^0->KF2ePu8$$Bju z?5QQUOpa&!XOjEmyV=)iO%;2@HndB`EI!nuY+Gd+ZxW_PSn{s$U5GC0xB;n`*?HtVwi_@^V1=#3%Jp@LoRdV}>NkNSq!a31{A|9YD#S%)l zq^+Byw9hLY`f?oVt*?l?Pz1RID(QoD5rea>bm0=v6M-E>RWMw)-G$R#7@6aRtl zIMb_^^^3`3*)bE1K~F8enPpfaJCv@f_PNK-zPI|0rFt}uS;pq*2|(iyug>`=!e{}O zRdr|&9$NL#XN*P0^PyM?YlqL}{;gwJp7zjb0kinqtLEiPIrZpLX7?duxy@ukh5S(y z6ZxS>U$?774rZmOEv5*j9#xfz|&E>@>eF>o>jC~fh&ZdWy)nk=@sHn8k=w$ z^*yb)%uBxsH$|UuCO-_EdmLCBd^mmA+c2Qj){}jzK%$J;ddgsR{btwJ%8s<@v$qp( zcX1jT<<%FZ>XkC;lpldHr{4vBrnM{YaR*-4&|{ybAX@aY6R#kIKgF9I2uEH?|wu#vN;3_GUaC7IX6dH}Kqa*Z-D+ zDjz>o^5MJ_f20RS5_Yw}^PaM#l(M_TxTJXnt4i`5jMaROyGJ+o@RqPt zTX%SxyuP~67epaU1SuYIP4F-d-b`L=X&Fd4^}8 z&JoR@pDKvS-k9K?ZP=b{NSNx&0q6B#OSEic8Isv&jw+Oq!-59h!u7?S@ z{K*a)cq(i`XQQu167t;oz1&Rx#OG&mYXAI5C3;Ept@(%Tv1;InAwv}>_k{d!>XjQ7OEkn9C1*ImNl<{vpB0ZGVWAB(6Vdrp3W1a(?F)Q1pf z4`Y+TVEGBRn9QPPL1i1<4sq^hKVZFo_R|hhs+wuk$3LrPcF9@mA?5{A_3fxe=Hfpy zo_bEHm0(A~toX>Fmr}@^_M@5dm-^n^Egf>-52-&9NZ$7v!$>(KUF*pHHxroNz7Fxx z5>ELygP4pm&0ShE>&)~S?Y|b7?8awTC+n$x@9@!;Vjs1LizKqxRo+TSNvNAIp*K;j zZ=V?(2WJ;Tr7D+*lG(aaQd(P!KiCcJUb-C!{ff`;!*)zD4z`;suIsmICN>hj5qAH6 zY79|Z)e*L?H3nR?gm_2(+X_Ekj&LD0r*vaU7(WB(vLG3mgl&V!% zb0KmN0eCFFA}rL|=OTVV%*;%=%dx&=_CxFEorV2>*>)n(#!_9P#a_ZpxT~CRtUy(f zc~$VwR^a1)*dayICT->-DcUz+@sfQcCWk;=_$;5@jVUfLZ9X}(mI2+$_*i!3pmG(} zY6@{?ilbu(Wn4Nk3;b67u%x%40Fd1~XIt?ni#k&lq9b~JUbyA%AZ4%pdVpvUn);p; zkKAx*4AqrgZc>rzb|TUcfGE9iTW(UKkEbH!yS8lkw`YHVz)T(#)_gFOq^lNq z9kt<*m3#ma-}jU2d<(8V(?2n}c$jSEueyVBPCvMU4qd!?9Kyqhbm;2-swM5(YNeOK zUw$xZ5txDtmZM8bgqUMaZlUTRn&0v)3`J*rZevm|Ctg3Wrkyd!ANVie8~4w%!Ukrp zhwIdX^?{{z;Gc(=D=b7Y$2B9Li3;+CQM*h9wB?h9x_qs@|0)j6aqz1GmEuhBU{`s?`(q-yF32uC_zR{kL!4iAZn?8Y$~^ zRN8b#)@98UbcaQ@W$67AeSVzV@%Ni2THGrtmsM_#J2K|OfqkLJg)Ehc6I|QitPzc> zxnwP`h_1fRV0i-O>h{Z8x2asUs!4<>#pB%Gg*P4?o~M6?|2H6wQx+V*5_ebot$~Si zU^c`19OcMyTtS#H*apw4IC7gctzCldZ%W9tB8k6w6(gh(3-Txa(7lyCSYPwF#L$3W ztWw8+q2HY&?AS~%4Ne}III~gk$PXR)XCBdy?N+980}w~8>Y^Eu zu94WJL-tGDMdU4EdTl}rC;$(B^tT$=<7S-V=1TJF;AI1E&w*EXn1!mFL2=RghAD}K z4q?0>Ms~k-U;j)3(Ln-q zH7#44GM^h&Sl#t4)Z;kL#NYh^?^Wc}wfUex4&;Jp51Ch6(|oMltbK$>nj*TLmKEt* z?vhZ@B%)6-MZ4WhX$I_dU0fj`kbsrz2yJWZ3)^i8TMx@HAfs6g!C5kIQm#UY2a#m5 z6}zFjC|g#H<>||>(P4Sj*Dj^c9;?2%sVIFG5OCS<25dmQm!kBt#|G`8i1ff52pQ!i zuC_-9vD&;N>6;k6C{D}YTCR+?YB#g2GiQs~r526@mgi+)q))fQGX;dvj@=Ya|3Ml1 z0@q$nZxXkDwB&bFov0`Wu0ZU$@&4)`YTSpo!#*mzZy<^$`&~c)YN|tuGS5BS%(=ha zk;3K#Y`zPmo&W4=>7fyP(P5IP76QKA-kKL6kxCOqa9`QVlakib4u!{rfAlup=Du4r zHSqv3U?`cBfNC`)MtfeU-#k70t0)0D*l*fVa-0X`0KY|c279_dhSJVugGCmM6Bg=b4gnm9LeAM8dMGNVps54=7& zTZoc(ej>%}$5JjZ&zZWlcz?;G3)T{4+`46GjsQa3^nl8$2=+Zd*9kBnhUsl1AUQR}iCr>4;Yqm5c(mH?EoNHDHF)U~C5pg) zw(rtM;d@ZiQQ;EqU=foBL=3ib$2hjcEN~PvdtFYcE-A9oi^E;_Y3W5y#5h9CHbXCP zmi)y#g<#8C)EUq~bx07uCNKGE8p+VyJ1-qPv08B`uLw{?H8T)1#qd&hdCSEodrKxO zBG~V08@I{CWK6TETi3Uygfh~{lAH2##^t8CU)*%)fwOz440RyE11MQ{l&LQHG{<8I z1~GdiX;-A9U02S8WPqA6@3s8Ai>?Osh9&;ta_AiQc@xw10?5A9ez^+xYg86#=@*Xf zk_n*zF`FIQbJwxCa9GxrYm0w_-)SMu`sa-gGOj3Klg*sxOgG4+V{8fx-I37+eAsy| zv$)}9qc2I!;#WNdwofwWt!MGOr1lv*K%c-oAYo&uF7C(Wk0=6FtkSl`em&f>$QT9!CZf_q_2D|)K@iK;ej9^ zRL@7x_|LOs_r}1nCo2hwn3|CZ8CYOO<%MARK)?-&t8$~Q;6#Xurr2qp^5K)#(^_*C z+WLr@1iA*?kBa zkJWqo8**jkry2o9o8t&8TLz&vKAb)jE)+>@25bTz&k4DobB~>=e0-e=r*|Cb4?IMq z`ZckSB*#|8%lx;()%Cfff_b4|6;5fYiQIgBZ{u}^g_FrejvOD}bZ$+&>{-bx%@#zx zm{+fIFWeTsG(Q_Ho&TWiO{~Z*%*G8lj_a1{Iqn?E=7*B@e8(d6qR2h8@SL`cUU)Xy z{Eng!P!8dQ5EE6epLRZhf9kv>0Xt>H*8ARr-^#6QvapFwv}>7`)_&?;NnuUKQsBJk2KXh_>$4BEMrl;gIfswaU3V_hA5 zdTxF~oS#cKyEPdc;OH~IZzcb<`d!dny}E&Qg^o?V2SaBrscZcvBdbF~5gm-p5dGYu^0Z>YL)DTIN5JlIxsggj|c*Q=Jt~ z%Q!Qwd)|L<8+#uzm05ya-EPx4V12?O{14ll%$}uIf1SdJY41o`@AjH|OU+L{Rw40t z?VDY^J7@mlIgP)ixnUzq^G3(#ZXMgQm-L%i1Bea03_12gizxMt?@Nz@8Kj`K#CBKm zCg~*nDMI6C=6$&9I&D!=^tJyg<9Q2F?GIlM};UAF}?Ms5dsy-xA=l!~2bc_1MD zdY9(B!DY?55ER&73ssaYTNSdyboD{0D^VaQZl}&nTLvSmj#DWs|z(7?akJG z?t3%o&&oa5PT!xo-LgB2W;t@T#kK6d&KaO{K~7Ld0}M-qu4alKe$iQQ>xn-n9_ziP zYXUAc+@ko;cV#gC8pbr&&v}%kP!`_ZoRs!owh;}Cx^-+S^DPK#Js>b9i-F%$fp6E) z^0;JTI0yqLlTyfD^79oWYWPtQ1oLD@Y{E{J))we{N$%~qk@hQ8e!7a}lF4>n98kc( zpilJSF`FL6NC8P3Pi~>{6Tsc7g2IG?(w82QDOCQ=FsA%I*o#r~~r%itD2A zXpw1FBsdNM4}KR50+aZEYmpXGS28tfjKO9{tqBaJ7|5U7#Y zMGIKvH~;D&OI^vJXS$C4$^iqD%~2MM)YN($_xWLCz-Fm}Xngg%1=&>7#p8>PS1@f~ zP!oPIp!8f}`1{6oL&rEBSKnN-L{ae5T^lM`@~P-(sLGkoWcV`#5T(I9Yt z1)a)SL%+BOlGZIVnsfLJmOdh7TBVdUcO1*scC1!t?k8fQP!dF zFU(e}zoh4EmLvdIi;Et8oj=K5ysDJqnAR=H^kuyikzvVA9oJ@_k)jEqI^x=e9vUYu zC(I4F?r{|0_|znu4Gq4i%tRa#GfNcQ3cL}=3%-r5qc4Ka;ru*+ zk%=`WWqNS{Q9~tFG&q)zq>cunI+Xr)Y*| zp#PBiC5nze@~R~DM2Tb#;E|i|FqW62om9DtGWKD;r|m|u_e*-n(pQ0pdmjuMo?@53 z>QT3<%g84hhuj@qAT({r)lW3oZthT*TU5uS#1wKc4V%O<-y$(H50m&O-EiymxZ+Od zSKom%>q{JK@qlN3&tYyn@X0cu+jW6XpSWO>k|w9a0*6B~uQu07q9ANQbZs!jepAAR zH-_v5MgB=S*SN`F$9q0q0_76Zl@iD#q>|<7WqNgd9;?79S5&(?;AkQ7^&GLj1wiL< zV+ZUe@hXyY4z$P^T)UFbGNd66c4XKY7c&mjjD0}HjP-~+qyfg`uX~Mils5f1o=EYs z^w64#@FHFWy2Zc;c`)>n;EKCsi^H6J*b50_tg5_sWIlzQ9>G8jYGk!?#UG{oME;fO>}RVni<0)HC`;z({N2rUd&QnC^Q02 zIjNs&_)qg&Fb!@;`3eGUG)ajcrPhs9!|G@;-5JQSK5n%mfq-<{n$)4TUyq#DYgRfq z>Vdp`;AuEnlbB*h@rG8ub_kF&b&K_K`L)qCOLEudJFKTzhy677{@bz4w2M%yx`E{@ zZYz{iJ<26?@!97vIwc`$;@`Zn!+&eHLl>8Wz|12N=-^6%b&Yl|k4==HWO((hAGERP4er?V4(V_re(G(VGRY68D(zOYk}=w&zTk zBXy!)Gco?iC#tj<6jF|%shaxnue1t1v1}k#;AohV9rZ#UV+j#fBA~a06a~Z`Z`gWUgP9F}RArwiMQ&%zj(P z#RBphnejV5?xxlxolV&SRw`J3FoSu+;|wJ7hNF`Sk<0tL$;FWzQ_5rQaA~_($rhDa zyu*U%G?T|6+`UhFG(r(!HBSUFz!vm>D6=WxO`D?M+u^N}woEH01OtJqVmtJS2UHI{I zY^=%?asz59>E8kiO=NydPHs(+nxYIVNnBd5TYBF{*zf(+1`=E`Snp{O?a0bfidEOR z>1)SpP*APMyYfeEHY=#oKx6J(VAVt{8PDJp_``eWhx~fO1yiX0?8aadBcyD#wa7kb z0LK~VGxgu%wTc!&@aEgIrgkXE{_IBP-i?gl8CHX!EX1R=uz}N(|Ng4KIr(W!#;$(! zO6Mv@2qa{KJ1p)1@3@a6F!ei!AoHJp9V!(Aj%a+Gkzc7 z1b=>q>%-ZY*Y^h1E+o7ketN&MfQUDCx`WHGTQXwg~43&C6Ms|SKLhOiF7kkk2GDX-!)wFBMFddwCw1K0l`c7*d4 zbA0TQj*ne1*36|3;^Yxyj$I%YLRadeTv+B35xgZ50o$y z)YIIklpB`Ykn#PF5J+mu5!eA|ogdU?u6rfixE|en5Uvm;~-gLbg1xz%9 z+DkkN)8#d(zYhwV!p9}gy+Q1x&t%(^moqVl4&Mx#;;wSppCygiv5F!3?CP`}MDlY* z^35xY9ToHNja02~D*8onSOXy`()R~lkj>vCC5?djCWM*({Ec=6Fs zC(GSzA3Yj$QGPseyid(EWi8iTZ#kgB=!8VO_P`7gyzsRf?>1N(dJwGVE$MIUZP`Gd zkZUYsEJqH`HQV-f95+FWcCLOROgk6$M0gIxl=Vizn^lML4$|2wi^DdZ4{74&P2mk( zQS=lb(~jH=E;G6Y+7{T4?-qzmP$}2ll+dR=9S@-K{iKC*6^vU2{*K^BRF-PW@QQ3_ zG`M!}O0Y4_caf1I%1Dlbi@maFkTS8@+$y(k=ml6yy0T%CRx!|Gq{hNv&*<|C9S|XK~r~D>Q zLJW%hMMd(&Wn%EtymNhu7t`j0Tn)1q@-4Zj82EHJ=YDO9MbZ~pUr2Rag88jFH zNT3=pHobdXGKJX|G+$3s$TR`W`7@GEs0aCvfN)ZqW^hopBHvcQh#xj zTSrb3raB+{8Co3^xoGa%WSFG;% z%$yq2m+<&zXn+yu<-nNL46#Dsg1IV#OU;JoqVqC%ou-5usM=2hW{&uFaY>4JRpQks zje09WERkulX&)(ERYM+oKhp)O|B8Jgk|d;`{HuIyGXnOzrGGM_+rXK5l8q3cHWliR z`mxRdmx&zeA4VPBj(D+Ov?R=TUPKv^PGd;6*$XpBIsiJ)S&El{EWpNT$`XU~ITRsn z_s;~D)FJVyX6coqx%?xN1i9Ryvmi+QD}p%1CT`U0{g8k2+(R>_diXlHQU?YdazcP- zM7Hs|ZQ#Rjk1slQcu`tt&jf)jjA#e2+6kGahInVyq;COluV{vxDfBbW=KG!}VfQbH zb)3BbFkqe_D-6WFCbp)tE(ZA|8g3R99qeON5h7~Lgs_3^U{YAXm^?B~z6J=%2+uZ% zw8EtTfV4Vak;(!Sq^S;KprrPlY|Rx>dn0q*vucPg#HvbK12R|AxnNf;5hDL9EC&1?dVoAX3OLuw#t(&vYMi2U2c@7NGAWM zFS?%)1?ue*V_o!aH->wTv<3MrcyIJJZlvQVDq-^mNK?ZIfGy%TjQ3G1Akc)KGJM4~ zM96M421I{T*83jH_p9VpJM)!tMWNe&(6Opq*@l)e=VVfZ*I>Kk;8h9z(^BKUj2M6a z0wIc-dOhvfKCp!R#c)48r8~`7X6wgrgpU6gB_cjQp*A(DJ5{x==Zc!6#}fOGX-Wry zogF)(kM`=2CjXfegj3gH1d*gX`C_V6OW&Do z(O*?Dqx!=WDS^yuI`e-y87TdJRGsOA_^5A4b4|cu`}x96jm+z#(`t?LLToQdth4Gm zJ(eg#jZE(Q;=eTylN2*?Xy-Ak&Gp~yOA0__%!FI}4CJxoc4*h&L{DT}VCf<8To(ORDzvd7$+yb~5y`!g?xmgyy@N;Z?xgpxR|E>oXHbLq z1VTGrJAf;{um7QMaw&zdm>WkiU;@-WFHPQ%2`GzsLj)BKp0W@LYnIigbaMu}yu1Aa zo=p_;`-4eVgyqL!u7R?dAK2*z?De*b45l*Ax`=mRK-bNvKcX>&hu;JDGW&pee8ga+d$hjY z2}lvH`6;a-@7HO$1OSOeSw!3T;m6w5$j1`%%@wqKQWO(|J{x=(OYpEzrfsh@4Y@ z!VO<}`kk^HR69m^L-Oa=;wi%U-TT)1OY@M4B|0EmiXn7(1q^C)pJxoTa*LE1YFzf2MsDrPT0WC)8H|m} zLal#$e^yA=YB4nU6oGToe&@X9@--DYJlzh5Q{E+iI-{|bS($fl&e;l2V&1R`vrk(q}O?NGYRUazAZrmPJYctiTW+vneeDck| zgH#1?*|J8}Ogkct&?jzbECD=-55@=|&KqB-6umFW7k1}t{Lk9Efv;?3XwIriceh_< z&CPFW^0$wa9Ece(M!{JBqpB$Wc%A501hZHGdgoTWR11f|yzj+f|C~e=@ytoQK=#W8 zU4J4c_+PZPa6U@$24VzM!#)Xe)HeK+c$~97)CT~-MRCrGUnS6ivVE~yXypAEK`+}3 z|57pQ!z`n_GVlm+mM~){9rt@e1yTPoKSa>Ad@qovVe_x>wGUkWavAf7NZ9)VKs#<~ zv%!fV*OT z4KN|MWrWqDRWxKL93|FqwI1eyYsqrqUuP7S9!ACqemwQ1Qm#;Y0aYg%NV2&53v;T{ z%A&jtBN5AErV5w*zBrjG4}cmR0}GebhH6GOJ5+m>tY$$Jc0kfQYi^oZ`k{+iF8tA+ z2eRkD{|;@d_(BWBm=_j$_TO!Cz+z>ot#yWGS0Tx(>*YYRdt3zZA+bQOi8Dn-@``dS zIS2jX0(q#OpBwRpA!Lr&vJNYMF8@Vtygk0q@W%2FxiqwqN z$d)VE;0Z-%P6xLh1+z5u+k#&O_4h8;c0O!6Y9>A5)s4Bqf9?@w>7$MZ)UoI86QOMN ztiT)38AwFL`%lU<@{rl4XGqh)<{g{lU+-(^AKqj>iT_888hGX!FDCu5$Jn}3Joadc zdDR4lee4W53vc+6vY;-BRW-BIWe9hV?}DKa)Jv6NAHD(GlV#lG_a+fM}UF>e< zr(?;#T0l%*P|}rpqpmss_gCusw-evXY?_}x<&a0uo6rOBFk*sYwdxQZlvP%n0@iHF|?A^r{zr!%uK)Z*lx~DSw@uuc>AHA}VfofbPK^HhCTBntbwW z%7m3A&z@V^&7h$@$vYv5c}1mh9=tOa?c)Y@?|ZFc4vN-SkCLj|n@V=K3x3*!Hi?!= zz16^TTe31O&PTmPEuplB@+lq6fV#$BSNmaS_nyp5C*Z_?jZyHy0*xjCkcW&!t3b~_ z2O|<5&#_3d| z*loefMvJqhSr@EpQpNtuMV?Rrs`T$L#_8%v8qUfaIqoIL5Bs zkKS+F?iWGeHYCbZkBj(lqW>8nDa~kA^_Fw9QQmBTH&PcRo;?(;(-dQEOMCz*Lh68Z z*N`N_^>#AiRm^UXbA3}G9NKhb&9J@1I2UrCKo&V69F)KZg5?qv}&V-mBG$$Q$VE&x~}`Zw~y0Fo}kSHI{r2_ zstZ}Eg%10ylS%aUH!j)0`Qn#Rq}Kqy`O60Uc+#KX z$&YB&As2gaf@>nBpi2im??42OcoMwsY+8LKJJD5pmA}6~|DTD~$`jdA& zj%MA+*)YNx6X<9S-Vdt-iH6l%Gih)2QsW4C*W!bAS(_Hut+pQb3n34skc)eot7Ibq z;&3W4!fLGw0AnXuho2B#AQ)lX_I3g_{?d_(0byk zmS4zbwOnkuV4Lbd|8WwgrHu*z9oV_KAGG_49OXf5!df5`{v%Z@{YSY@#1gc}8j=F* z=mr(Ton2{&kwI5Jf; zMN~aW5iFhK-WwyJ4ffZ$2>n~8s%sznt8T0uYTeksxB?5cJ5U!Gr-iPi>@EDwSQ2FO z1a37&s?xZ3$iPM7cZ<;~?Vo*~@u|&o84J!p>I^xg;i{{Gr~^oz28W3T*zt;@{nW#v7n2 zz4e5Xhm)iW&h^81Q`T$uQgQG)Nn-s%T0CzeD|KhZ8<_|1lC8TcjG$f zL<#g*XzR*aAJh?jQG#9&(>p&t+<#d;`|k>mf(4?b^ln z7rt%n$8vg6PlhHUhVX?N?ixjOOl@Xa?Nav-c;eTDhm&!!?~BGt0P{jZk{X4>)k7*L zB+6j)2fn6d<|W5B2C%6WHJ1>rolQR+Ami~$qe{;gYi{X^Fb)n3;PFCztrtC?X}~q? zHLhv6%*Um$dY@Kw#Hkq=E<_f$bS&7G4?hMN*d2=fIr3BsYjJe6a`8t!@O2E_nPH^w z`8!vNY7SrUnN&QgMYQw@6CAG1eIen@mh04h*0C``J; zYbd^liQ|G9Wc;z0UpE0aUgIT6eC)P#-$jN_<0Xs67A-`)6}E;D$l9(Zyj{dtUm%oC zd)IH9`_`89bSPzYB#fY^)xr|2b*DxbHBSx)iQxY2c>)b-j~<5A#ZTzNrn2r`^l9jC zy2I>#`a5{if*}+DzFnZ88257J+pAQUG&?;t+`BLLihWz1mT&J-fzibH9$yHRQSPrc`J$aZ<;u*;CwRBjP+~HqOo&7KL%syATxsoyn$oRwuoR z*t6PS!j*MEYNz4Z7*7|sZ`XkNx}MLqRmb-Q8p#GCvA;d>Gga^7OZQal5`5)$hySbm zh|FUTx-|H5YT;(ZWl2_%9p3ds9w9`>{tR>|*d6ffdc7S&s&SPhgLs@N&zgLyLy^?G z>aZEN$uryK7|5No^?Dg zTNpH{d7JpIQ`33gd;Bc%w-@nvB0eT`a=D)QY(Wh9y2naau!Zszhv?8fYBu6LEJVKl zoPZ*UIfHjb%q9h3Hx1i@_LODBU2g|!4{UtT>l>aZe#}7O1wbwmY_$STzosAR(eGUx zK3YlfKa4bFaCdBXW(5COon_H7LXDfRMQtwC2QQbDx^ph>y*j!wtZpbw`#od#J$nDH zrCU-R^L}CGfR+V=7{2^nkp8i@ZMvHVfl(hnx7%<|1d)VLy8H@5sXu$Z>!GM;093wv`tf9Te>2s) zs+YYNN*WRUrQ+%BftHvmpsI!m7gv2o_f72t8ONxq9_f$3m`MdsK6uBnu8+fGaqury zbRn=~vUv?DEfsq319w3Dd)~yx=b`fN@6P9xN#rgKAhag%>)Qgm-&eA<{q?K%*+(Li zjwMO^@Hgd3$8*v!N`=L2KxiBp+$tFpjd)F&lKmGe-p(%yG1OaoYc>8gqjiR78s4HO z53z=g?@t~G)Gl9r800>J7HL`i`F9(o{*9{YTy$ktbCxEPhL0)C4;U`!HrR|-tMQl^ zqBbNRDvwUd*!%U;hw4hmLj1)WErrkXET=>c3=#;t;bUS&M<@HD5PxV+1hw7&x6GPt zgSX_)$G@+oONH8nRvk~-p2F^^hX=Zd%v3Tq%L;yotO&zp%(3{ggZRSrthZ&h|Du|I z;L6BPQSejOzL-eqr)0SqnWgu&g<0?I>>&?W%JIwCv^_WcjZ(cVpTMtQf2ZVu4LDSq zuBY&(;$@&q5|V2&@u-y%!pIU72tHHmZQJ-UWq&5{AZw_Wv0d+8+oT7r%-q8%u>dKd z%Y$tP+9?OHOJo^cBf!gnh$1||Bk{OESb#liFoxRB+1R*0dhmM%xo>dTu)iyn!3w={ zy}n9C8ea&?pWhvJ^|gICLa*N=oA8`*bDsLey%odL{(PTqdayJ;xo6|xk9$~52y_oC z9%)2%kFnOUmDyuEJCZy;n~$ZLmp2?l4>$wcBS;V0I&DX{o-D8??1yLoXXVoP5WX= zSC4tr$iy6INoQ$Uv!228=T(NBgJIokp$>a`(C#hNUuqlt>8~|N7L|3*5PM(sf&z`nM#9#bPQK5_;_jV&FU#m|WcGAD!9T4!u_qu**A;Y3HY<=!Qm%4+(z^?6CJ4Hv|A@K zduMQOvW}p^s)wfpQs#$JSzL{wx0V*YH~k%+Lw4+z(+jK64)|M7Is`qXx2?Y+W{0bp z_Rintu0QdT#Z*pfWL)VJ9&dYG!2E9xR_mJgY9SLxy74QxbwSYkuUBjO#*f}&`rky> z+pp-|s z_m3KDd-LD234KX3<|$4>1oieX_$ zlMm9Z;{rTC1n=*T#}^g|LY;_|Gmm^-S=piMwJi~qU%v7W=TV?`PIp(I$4J{e&CvoC z<5M)=CZYnR*CV>Anc39+hOJLj>hpvDo-JEX^eh6UJyYn9bW_k&X7u97TqvA&)D16c zzaLqf-O$MHGP+Zva)gYV{99`|53dweKr7VSNdLL}GH5n6sOss;3J-7QOa8U{u5!0? zGZcV-TwRp@{&0zN;fXflwb;5Cq0`>f@v|8y|Jo~Orn``xu|1D7W@mQq$WDMFi{A zLbFo4)C}C&jWcK+G|pgLj?UlPE%Q$gPfr+=8D(Art??kE)c#BB_utXYY}7u(AK1Cb zG-&)BgqHCiS$cs7wMQ(z6z>o_wWhTzo-xt?Wu@TwSr~PGzCjXjZ(M{OYj7dO_HxRpaCkT~HV2AHM=kZ+U#(KwY8j zJt1UkXP1fcZ_Ix=^1K06)wi*TZ3uO&Hi`jyl1hyURd!xO8svcc)eKs-c7x&JOvCS6 zD^URD$;hY%<*Z5}-Hiiq{Waxgt_0QEg(Tn2FIB9Fy~*+BNeu`%dwIo1N`2dErpkT4 zYNuYixN4~ZC5mBPWTiS3RmZ0^5AAtU>$lrCYe#FMyTj06l;disav?<&kGuUcGP=rVzd__?-7*dHxp)#E;lX5-e@8wraDhD{Vw zn#IVv4JT~7X`6Dtck=nZSniJZ)pHrM>c|@Zm?2Belo9)6PTdXYB~cpwX)~yl$ru7r z$TnaL`D?q#Zf{3I@@4$o+Q{@6eDAC4lCr7SL02}rt10b;c63*b zYGaFgw`B1wpt`)^fb8#Wc=Z3=FIGz-gnDC=4`xFqdpkT0H{UKKhmddK2Ji%xwXvP; z6&=rzek$W2)FJ&JVbPPXJ_POXJiGnBTN3D=iL9f=UmVI{Eqv90c2$F zz4lydwr9>~E&h^}$+uBbd6`{X;v{=nqiH9{!sNP3Il(ul9IY$KuCH|6 z_wJr9W5=S%iT5)S-C>DS=cYqfb;V`#z2*H0$HN03DX8nt{2RvoO}6oscg4FoBYrC- zeyRLU)_s%ud8v)vTfX^(`=bX2@hjhb58wKI9X(ugoEWNfI2ru37C!Fwdw+j#y3bd4 zI(cZ@eKad8a%TMi7}A)}m6OYnP~GpO{l4K8x;_#KjComaP0cC;7R>Z&B^a;1IJJDNW@1*YovL%weC`) zwN2Hr(HPMeo#Q&0+>g4bd$_XBW_A1}&xp+&?Kj~@+g;i9?wCLMcb@W4q&~Fcv7w_t zpIeU5GpQaOUs-atVg~z&k0*5n;rrhgj0iqAwO?#?;|eP;c2#B? z`e_Q4j5|dHH@fJC>!sNh-FG*;qg8|VJ3c)5DMcq&^;Pl&`tZ(&b7T}CPH;KL?GHEZ z%SW5xF~7e))-BO$G)obcmu5W=mzO8$kc(^8j1`V+uW7#)myA`66n?8u$d55>Be@&UM5eM_a3J?o z+l2ARJ)Ub!poB^vUrNXUJLc{6%;#N)OrBbnN<&W5uSe(3+7nc-O;=*u;BA}p%ABk< zmz(SHOL<4aFQEs?1sh@sHa2wn^MukUfuwS1tg9>b#ML{87)CVxrdPp@h#^lPaO&26uv6L~pcb zt&DLUWt}|tEAyGP^yEHjVPv!aj3Vk5FJ%w5e7z?sgP}C9eeAF?~zkU%_#Bc=En-xYm9UrweyxbYG+DG zq1Z+BcZVFR^7|Y@4I_1q-L1HeLieIoDt@=6PsFQjemwHy^_y>@TJ)s6=!a~8Qv0r& zL)-nVFCQaGi3v=`PGPs&R@RnJ_CVgdM>(BeN=P@G={qnqHtzFJ$K7)Ee^?S+9?E8W>KwakUGg!q(AWowgqQChPM)BN zy$RK^jUp2Xb%*x|YyoQQ?61J4_;?vR%HXA@VkM+|K?-e6-UG@}!-xt!&+*hL@WGh* zw|iy3bEUkBPQSqMZk(R7!*<3c^B}FWr z8PS?o(N3MH3DvisCsPDasa~c654XyBGP~rZmuGJ_cX*WFj4G6z6|O0Q>@NHEm&L8M zm(K;u^yUv`qvxUrPf`;jcBZ42qG#!C!2%!ll@(xs+#eaiUgH-Dm)KDm^a%}7!% zzd^Eh7~i0h>ha5%Lq+Dz#m#=OQ-=zvP8*Y|Gmo)s5AA||SECu{qoFpMhb*`?jFO0K zfovMDN29p@W~kRw&#KhYW}RWWij17Lm4kl!LCqxN_!v2G6tUF&-mXs*!_#^3gGr8V z8%y1Fm5RMl_sl~FPdMM#;sArPqEO80=+i2X$nS=k#y`Jz5ji)H!mUEe40p4?%~q7w z8x7m-XqCNA)zuDSsa}|n@_V6cq#cG1j#%;-)PT-yO22;jI?guB{;?`u=X$i1*+})T zekwZ6%DhNP*2W)F!smOs6%eK0-_SEKQpR)%I6saCwV# z8b9UMy$eUm*J_rEKfZC_OOTrAfF9KU{1mZO-)9*U@pa(v>*#3zRz$d1*{?2i%ypLP z7hg5GpbzXe8aHP8sJ14)X+9Fjn)~>=KIjp|;lT9XBBOc#Ttc8bPosh9?*CyqJwkD!9TviLL%J+m8NwD8%) z*R-u|(5Me*{cJUS(W99#uRX0Y|8u`f=s`S{O9GqR)|3;r@GCf%ptac%|GE3=E`n=L zXSXn=E1c$_pC|GwM_WroHOuYLtyP^&$DYr2qt;pm7T(j);xvW05|nYY=ICc*Llw8* zz+nTJrK~=qV&k}TA<|JOuFO39#K7(@$)4&SPIrUxfj9G_DaS90LN2Dzq#4#fXjVo@ z@o(IIqoaNGURvch-*gR>7~{aQtIoQt&G4_v&yo(Zx~Kx~yzF4PvcqyZ?HHPzuWhk0 zj7b((?B6VG(|ulcVW#xER^(;Bi~jm-bm$JHJF;|hND|xS?qgJ-H!%5w^NmAK(eb#U z+dC4ENa%{rTAL^;vxDcJ1?{|V#Zb=b_tEO?p_Hhn<3!h@oJFyU_4}!Be>%qL5?gMc zpcPCGSBy{I#dZtz;|5C@*fkb41KmKabJ1?WiR^ zPq=2aB3o-=ztMG^^7vWOr1Eg?yEUV-g}kVc7v`f#+4R*k1*zjV36m1wA$WPjr1o$` zx(~;&o%AlcT@t0UAhg%w7_3xfFJ)i;RcSK$sH>?}O2WgB(vP>6Y5)MJ$JIRYhBup` zsgrn$VBGJ9)as2dJgF7SdCcv(%L$}gw)*i2Lu(_vqdP+@Zq0^J7x%R^w4T6Lxfx`) zy*ygCIBctsckA=e&6=Vf#-XP*Z_VU<90#iPJm>81%8@=gWI2m&=doJ-%&qWQR!J!| z)b;IOQ9iV^zk3)vG^+A8AC(84fBV`jT}o<4H;+-)?yhl_Hly)~MDeGnY2)#siLEaV zV#KV>ssZEi-}0pb%;$nFJ;SeUr5jbuhPm}e6+bo7bxT9{+msmo)GcCbv5}h1{q`(e zOzuQ4)yT^Y^=31(?ANd6g^yPTczg#RDbVV9%>2AhdVgO=;J59$GS9{Bg-X2Mx2-Pz zt)Yk7??#a&Pf1T?-rcaDGtzdi*VVrT1OoR?imvetg$QreTf2^XB2Uh^0n#DQ26|_lsF%?b!RjtH1rP_UG=y`SB@R|FRRZ}`> zri1z0X+sXB(?Ksr?Mvnv?`^%KFIJmM5M!5jh8J#9{chz-O+HzUEqqRu@EH8jcTj$A zf52-g>$uLL;GsWKnam}9uKC{E z+xgXfkjt;3Q#z^SDl+RA_|K?C0)cYi3wcOA+PW0$YwU0?>tM`tFJ8ZFK z+0X&`xyfDg=JdxuXY_F*?z3<47sSILO41r@kES29yOgY07X9q|Y_X=^(Y-eZ z1`GBP$K^BqqsJzBS;u!7=3O;M<3x$el1$2NQfcNZUd4G)`-p<}^L65WbE-uwD96=Y z1q+s%w@3?pO~Q_a-L#|eJ|k&Erg5Lqj@@%Ey7K#pOSeoyO}dF7P&PaoKgW11_S{ml;}`W{ol}qi+Nhx9!m!kYZRlB0-9a-dcu)yR z%T9uE-CI-XJ)6gf>KczlQ9tU|eO=@(tN|b3J#h+&tWGPPuP3>Pd)ws??g|)bk7oQ!td1*^(Yup<#72%6ivKaD)8V~8q5sUzqFZ23 zn(S7Xn>INy{3NSAnmol&GpEvq9pXwU6i=@-%B1Z*%u#>;rqHLM(!tPi)sXDc@F3{X z2~TVAN#5|F<;3P);5~=Na3MSR>C7iYH~p@b{*M< zp6Sl<#>h`zT3Y`0^XsDhn#kytbo&V9tlOs5|Me8$U>}QuPOD~_jG=VP(UF?zk~{bu z1LjRC!;FG6aK05UzQ0ckQOchnc0a?_??5c_r0a{;rV*L`J9!@!L`3(~4E>4poJj;P zMs@t{)Fqp$G4H=m=6}qq{@Yqnz{s54HC{ft=3pzJewgqdD@@JA1=N3HuQjqYDRGS^ z?^?d=g?_m#w5;)Ag`R}c)(snv{~EzxD^iT?q65iZWk>bD=H)+^&HuB4PX9EKhdgj% zdC>Rq+uv66-{kt=z9>&dig=09_j?N=MgQlZ{`DP!PnjN2Tpl~$|KeBnf64G)UnB2_ zct15bX^Hgvf0y<@J^FNP=P#c+Wu3W|Fr;i{!gU;C({3atx}JOsbVyV$WCKYu~xi-s`!01P%gSx^C#m0 zDk#UxQ+Kc3?P*mnI!W$vuK8Go(4ZP+JJZT6PACRC`@3C8Y$dAtuuyZ=`(NwJ-#*Qs zzi{iy7?|oV{yk5LqC7{$O761bGd;qkV`^sMj2=zBrrV^1dBfc+dvz-P_9edkP-~lT z@9}b1mJ6D`xBWbQ$%}pjzO95qE@kWPZ}>_t)Q9RX2{U} zK0+nDMfUbvrlyx`*3j9fZ}AuCug~puj?m0WmkR%$Z%@d4+8+PZDEw{N?3hhv%9cU= zQAxi?x+ZaSY*%YPN$8s!v&ihzlvjl+k5#ixyCx)=sJMKc3emq@`ah!%GBh=4mv{F( zylq)W{_Gsy`;v`nV~`VKuPIM(m%8One7t^rOz1s2*#5Q@((Y#Lld+8ZsIbP0O{dyJLyBdr+s#c0YVz4P7sd4w+3(vY~u<}u~3$E(!h zv5;i7bw{F_&80)A%m+wSIutMMwUJkV1aUBsXSdA0f!AguV;|-mRiTN;(Ps1FUO&q{ zO4h0EHlDnU1Y@*>43K_#zDueYcb}A5t5~o0c8e=mTWKuVs+)HDW~hyo~G8 zVfEo}brH*smSeEJJMdbWHhrJw)ufRTl}FIs9h8cHS|`c;uO7z?ROzN4C6c|1wPYTTC#V zC~vNQhrI|<+WA&Yfa+YyxSKw4JM|=)9Zi^KJqUXIk|W@=$_tigQ;>_M^d^#F9LK8! zk|OQ&x$HauT^Qzr&ZweGQ38waBysFWPSS7p8g&9jFzCmA-U%`97oVf2I>YQjSIXOV z#~RWe?;F%{h^7zR&=I_QhWv{od7G1OB?;P2i@HaSShcWb4JR-uguu* z+-YN04M+S&L~I79hp~TL}yDokKwHK#!&<``&r~PC^`gZf7P1 zRV{J#erDWDH_8YP5W$Ly_pOI;y-VyCtNL|6$vr&SZRcm)cs`r&L$Ta#<=p^DRGzly zMtHSa`u!pqq?}X58Zr|TTw1s6GB<#0BB9wQo z0y`j0Ghog{#>c``ULYsJSngC% zjwO0%yIm+@C}$)nDg#wxlyJx$M%+Ctq!~&^dP4?+e-PwOw&iSyJHIZR#i`Oh4l|9~!%B1rn)Q7kq7ksoV3PYDUT8ikd9{H0#YwfkZ2@pw*Iw4J-+}Abs zb&Y&ByG`!c_1rKhBHwW2S{`o{xT=~&c z40I!qba2`4(%HzJdf<3-D*bize#N&3@)<1fbFPppIQc{1<5$W8aO>j*$K&fME?p8f zy%~27#vM8<>;)2J!DrQ-G2Eh9u{Q89_ztYz?xC zJ{+C2#7FSm3I<%4EC4!L^}Ko>S$Mx|X*%lucgEfH3LW%=sFh*n+q8mcIqa$EWkypY4_9OIJ~G`zJr zRCDa=pei#Fr7SZsAM{@F)>ZgfGv;+|7K!H;46L|Pcxc4s0^0^0#HJ%r$mVdZ#LjEUHTIsE88^@|Twy?qcPA$#GB zHOB|3CuRD0nzsbZdkkI1v`EiOq#Yh2=ef^jP8*HA>}Sn5`-y)joPM^B;^Z zm$I!jcR09QH>6hkWFwa-{lr6dFe0+2v((jNge-wndCWztE-Wb%YsKt@uM?-<_}F33 zT%GTc^HrJ5m=;*q44~Vsvsx84T7}$c^(G82K+f)rxpa48E}Px3_4hQh3uEAG<06?0 z5Ho4rOk0@IQ{-IscOYZn=$gavby12Eb^bEW!$3K~PezqlV14{8mbD`Y@15z(BoWzF zB#RyXgvS^-?Q9G@dKb^v_N6<-qvL{rhMlunst9e{QbO6BB$2APM|L&ld2LL$Qno(P zOyd{AhwEBuSy(qBSt31k<1c*Ue~#4rX-?;H(}c~L{4Cb}ZNW8m#TKc_gD&1uHoz0u ztXJ)K(M`2`$q{>^w!Fu?q}D5W6IlUId>S`(LadR3FcXf88}DwhA0d)AOiltBjaulA zJwHdaAa}-qn&fLJq3?VT8RV>URIEJ|LI?f^W&^i@Nw@}cs+yXU!(CJl^$mh%gK)BN&h1!s%iM@v|Jq}8T$=c1Z`&yg}g^#srZ-B z9rKQlFO~EYIXr49oXHt*Y#{jX2O7*Z%9+dN-p2=vE;1wEXyE2pp?fp6ni>lbec%wD zai4Ig|G5I(k|>3Cu^7@xYNM9CPxf#%=J0X8iGpRYA<-YEe!NLO z2r{T#jJjP{x)HZ&5+w>OHZ-v>^%F4ulXltZef>$|AnSY+!L#P-3Yn}^paKZf<7ao_ z4qI1Z=4gAU zyN`)ohw={8HWE~l2gGuEc1lhGVi1Rqe!~kcXUrQObTL7xuir}q_q`orA*slW6c#U~ z#znhQ5%!DVgbbK7bXT%?9!jB@=Afxm?S|1zH+ z755(kIY?Kkaw;TV#MAgMuQ~d8PoTj)XCP+^LW8&W$K8!jL2?&EECW^|?Gvr|)JtB; z(`c$jgj#`Wlq==ML&-Vd7>XcQ?h3LwqOleOt!XcvZVmKf|u2*z47 z%_x@1f(7z}IRa&fDlLjw#TR(Lqj2e_cBysnHztu8YFUnBj?3^dj@KM^9BCZ!KLpVA z{@e-+{>1$EjBxKVN)EBrs=C26E~xBoU}VZeG=q>^TR0&2MhuAJz}MyImi!Yfe%>$hblTDE^RH! zop`c~_~K(lC^RROK$=GuY=sM8dA*fnLfsvfg%qLZz8iQ-u44VCv?lbkzwbJw)7s)h zC~gf>lklPR`ok7%&V})MJa+ij)@0$5zEAhnxssg}86=r`MD@(G1qRt%JUWD$Z&9eg zrA`RVatlK2uKg&zDdjl}!~D*hw*Ci!^$=CoXy1$PE;?lM|1oo{Jx>O7<5+A9rcja~ z(?J{<4;TD7MvKir$_?oT)fbA?vu9<}SRiuJ!;5SVnW<>6$V7!#@|UOZ$LoG4hrIV5 zXkh8cu9!8R23YNo2{0YF(j$ry&5;i#Va022r(IkJ8m|?QHz=r(fe)Py=eXpO&WZUb zBr3GL?{Xb7Oknp~`oQ~ygY!X93&t3S!7+fYn?jxa5f6_PTI$%%ngsj~+K+xqVxbg_ zX~7$GFlrdcIcU?IzJMHmX&SR8HjNjJ$C<|5{1EOYsB&-mB~2a7Y-o4LIJG&-599** z-mDXjdw|m&4*b>sMN34ibjwI;;T(#_82kyG4?Y4naw9IT**kjcqZipREMzSDxL&Rq z=J1nE=!wwgu`GpHgB-SV@@PTqry~*F?L85*NWMw^PHfCcX)mGWmd25?yxF@DWP>I) zmr0BSdkM3HrNJ*c3o}p%3^oSP$tE7EE-7L&)^>jNZ8J>(w^`g(WOu3cX}q9L!TZ~^ z^G%cA#B|eDj)yU2vlgVfRG(GBT=&XZHqR76+J&$f$!uRZ=O**XV_mLPpXahb35LL{ zSV#V0bE{oo(ptS0=VZS2yzY`e)kHEZALa#jC}e0sY|+8QDN1;9OX0qYQj@LGaX9nS zsp_MYBj8zcF66y+tdrS~cvs&+>})SZ01Eq5Z4 zcV+O3@77OM>PBN(O2OTKX%fVvS`s`x}aSk1h zwi@07ehM@O7lD<*l8^K$MYm|CaAej!v8K@%X=p$iWoaL}Z*9PPCWutH^$W{Zr zW(Ud?WMa1o(#P@*(xtH$L*Pl!!Sm(~3@fkpJB6%3hDa%R9sZSQhgLEHiea&h`;IvY zhs0-FHR1On!f8;^Wq}a*Wjixl%|yXPkoOL`)dJb1L)m;hE9AT_TpFSaY}WbKy%biC zCo}e}fu*sK@)`2mJ;$7zpMHqUxg&8fmakzFDu0ntqrsunz;!@dLh{ub>|o4iCmq^3 zY5~7%w$CMZaA$&+VQ@#C6S0lu2)FJaDlKtB{UB>g39d@|NZCX^$F?K-*Jh-Eqgz^(W0sv zLT*i@s&!w1Fkw)92V_yk^@Gush*v^_*O6>T zfmeo#4x7ZPqZeQ8Dk9m;uFRO@P`H?EMB4sFjjS%LRLF9$%^iq4dAWrg76Nud&xGX% z=HJ<2fkSPR=!K$|1hA4mjbcs8bL_So%UO9{Wb7-!&h=C%aS91|)3DB^9v%uY4|=}l ztFZEjZI{$T1EWz0GZTijax-avwLk^z#w-`@j7to^Fd4FH5m6g4`xe;O+lV|{b(jPG zEjLNY=jqCW1!`KPgm7v$-{2M7$jt&B2&;s8bWUYg%7N0KAi?|y`lW|XVeX|6QzZ)8 z8FIQw$T<(|DDIbu*q7KP01iqF#35y{U?iv^@HA|q!P9&|012rl_!*f0Gt2seSyQM#QsnjgVNA%?sTzZkNrR9i`y3HvsM1A~;{Wnc_omi(;S zkAkLx9t1lDZRlQf2xr+0b_3l4c@!vf*Ik=|`yw3RpG{dTL3y}h&=ced>>}(1M>eR+EW%3`sQW0Easc?7>o9Zd$?FkJW2Wv5r9PGgEzbv0cpx_S2k zDstaK=z4=H1zqv9#i1Ngnc9>^5&mQN{OcC}w0>)rI_QnvYC@yH0=LXUpgjUIHg$LB z5{A1HoRHrhLQs}U%N>?|bzo+s7J8lQQ2Y&aCG5SjymtTRz6()Q>gAHm9D z4-4iI(T{P-1ZLSjKZAOZu}3h=8o0mZa3feQ`5ce97OgRba>IQrABB2@AQ5STv_|ws zdWM$4BhZRYZ$}Sk#V8&esZP!K`WDS+$jXA5aV>!_JMLhaRsyYWA^F9HbV2Fa-D;b0 z-bZGu*>X%UJK+S7L%xFD?2*xxvZmIvj1Bp8+C9Dmd@Iui=`@kswhES<`>T0&hChvI z6b&!k`UxjSTi4%7(KiyNZIGqV@v&e#8m%GeAM=7!LL7k&rbw;QJ&dFvxgG6yWwp@1 z2MGVN*m&#OkB3f5P9Y#M_EiLdgBr$~F9X9|Ldk%%LCtI009>wlW?m*cuc#`5rztoC^OyBZWsH6ZaPETx_^J!>)=#b7B&r z2}z-iLvcZ@&|Af*mwzyQ1xl3hqWc*+M4Ez|%v=7)eMvl7s$(nw-I&r;bCrstNSrG@l=u7XPa2oov82; zOX3qKRk>vRxepLLjd4Mv=WyQCbiOoq1$;Bjj;_{N=xft0s(@;m?1k$K< zrxJ>C-Z&O3eu)%Rx;$IUMUe#_!@bXwLB#J^j+s%L9A>CvOi zc$-3Xq#=X`$@5#!`{PuQ({4```c{~Y;HaxVoYOm1JfLIaO^q5i%_ZEj9e>G zpS6(BDa0vI3%Iqr5QcQQCi)?v>e9TZf&bo?@!!-|5(E@9fml8pj-gVZ=%GfD<0Gtj zibLn*)f8%BZ$fieejcAf#no>vIN;=vu7H53m9Z>vN;7$o5snqv1N@^e7c|ur9|h9K zX@Q=CypY#`&oImw3Nu!H{!7ftsYnKhYvW)TYj{@NR6NfkIYm2p3Wb-o{EC;;G?}M^ z^3NfV4a59G@Ku~>sG4X4_wudLhp#^)hzi^aNCj{JgpAAV9w>wmWK8I;DF3X=$+`p`)#*}uk*d&Mq*wcF&S!rWH;bN~4V2`V#@{e)b38`&t z9dj{@=~gs!niau1h2}Sh5E?^F+6|^1vnA{hNG&?&*Sp=4^2zD!lq(4m=4*)lx_8}N zsRA0Za+Q?Po+iheVM0QS1pJe^anGGr=A#+e-mxUPKSDv5tna@6GF5ER`Wh?m-}3TR zV1B(+8rW*MNubyQDX%~=^+6WmP%`#h9i6Coi)QAmBFG-n0Jg|)_5P&jWIkFI3`L9Y zD_y3;&?>TFnCocF*w;Jj=l}(##PSvUaMpz!hrpFUrlT?xOe0#6r7r=rs70!F+ONcgT!6x4^9ui`$EyP zjOOrV+UbD(cd%0DDMo!LJOd^Six>KhJ8Fyj3ax}jfh3EA<_ryxD9AzZgAhgqq>4>=m`8>xdOS6#G!75oy9Oc40~)h=#bIi&(t?ki z`uBsZ?~ZGtw_+p;X-#Q!2^92$b%-eG*Xfgw)qyyBC|{{VF`G(=aXQL<6G9RLprFb% zZp`$6u5pU5vb3UwV#~FQA`2Iw;&<^82PP@t56zPPG%MjTfUe-wCZ^BR6*nVq1>AtM z_4ws;%m9-c6Cis6WZQFUcNZ=S(V`Np@2yh#FZzB>g0 zCmG4v_ce0WVf;rzasM-13`A*D-zh{dje*U@A+J2dyW>M}^mc|VqOU;KI7yHLj{dZk zj5EVQKt>?dex)7wRa!hqA(HP>L2CdSu-xxC-oOAihvP*u7K1F_DmQRZFs1b!?_|o& zFG)=3Uja(GoH${-1R#Vmqf_i?mfb60z0joda_4@-rD%5?B_YDT;RdnEuL0C_=fx$n z3lL;wCe<|AG-o6L%=v=@ov>gvZ+I&h$g7n4;cq}SOad56GDbKJHADm4j}tUsY3lo` zhA7@QuaGNVX^`zPD4MmfD+;FCgLrO4NWjdf6fU3K2;iGxz}!Hg?BedsF`|5jRq_>! z8jRaAbrm^g5c4?s{*WA&kSTKqh?>$%i?M7uy2$hbjkKDq-13`YP9SGOozxG-_wYB*9j4P$NAbR zTvR8TYTybjstPn(nxPb$OwA)r*DE3U2R=eI+05{H6L}fh5s$z1s{B)-@-NGu?_aBe zVKF-gXQ11Fl!5WMCrYJe6|%)_NP;|4fz^~n3Y!_s?!_Hu53CDqoEV4#$&QK&CLqmV z3;DY+x(&qDFmB$oL@=+%tEe-;{C%$7!{HP`sszQ@%b2UaQ{-f4iXq;B9qz=fD)hgn zqlH(2abPiUVSZtER5Fzj(NP>V}2VKUY zK+ZUw4P+@!O%N-LY;yEn(U;hw-Hdimf&H#!G6h2@3#jn4uyYPbL}Lg?08JD2BtL)( zLqqM!$iDaUE2QTV>Sh{H*~{-d0wSjMLq%!D_X?cm{HCENm2c^4Hln|uMKrJwKSGP2 zAWIbup1zybY`7Y&vva|`0lL7NYE9>jMGQhL?|-cN+g<>^h+-l4@GJP66E>70Krr3_ z9r?jvY@?cSwT{_0pJC%{bavcS5YxQ|1R3w>EdZ=p!)>%bF&!h+hCB}afoo~Smme;O z)g+}Kf~m_?1ud#et=I~RO*V>4s`#ja`ae*cUcuAU=xxU$sNs9hw`fH*7A_luS(vHP zxb=~6wdaNWA&;=vqH)d~J}!l>^G_2D;}P450t;_NuqsXACTjgGJ;AtePzmf^pp(aZ zobEzFFXb`#Y-@6UB!T*Y+z6KL(|_~)|KLsh?Nj8HoX9Ea&ennGenBL_FkG4<_w-qx zWDwsRj0x1HUdg@T zTrQ-`<4hDN;7nb;FZ&O~501!&0MCcmG8~5@htiS)_5;7!1yOJ{*e+kmg4q`~{DW4A z)}KxvX24O&F$VdlECHv-ob^S(VQ82IDtHM&!G>jNpjw5$;&AApo1I2t-~0I}zET=9 z31`hn?yolj5`$M~c^deM^^_FgRro_lT+o?1g&LeXE&wOB5yVB}5H4th&SI!UO~Pvf zNMJUI6l8AW9G)VYHWi|_6h*!XhCrrc*RGptuv5YrwgUw=R6H5VA!a~b2!IzE(9eZcC|ZWoz_c9~(&*E;-N3&zvXJap zt<_p87?TmmJv+f9{J{lp@#nq9tl0^rRx~0LV5{kRVv~*?OY0NUaSo3H2JYF~^R5s9 zh@PJMMgY|cXb+^-aa*?5e;0p8E_vD*6xu;PR?yDu_A5pNfY|^j$3fh*t8|$%8CPyW z+K`yrZnX0EG#m4Yd`t=Q+A#BtcbjT$-HS)E6&@e37LaakgSVC678;Jzt^cuCXcX?x zI?X_?Y7M&@MDI_I@(xF*zQEa^fw0cJr>%q3T~uU&JOpv! zSc}5AVZ{3D`#65qqI4RRWDnGgVoi~kr+pLBgMHpWjlK38!Uj$SyX7~ELmq*1!R(4Y zwT>f5z?{$GHe=!ek4S~m9DbT553j;7!s&fE_+igrk9q`9njk>~1hzH?%`E1Nh@F!2 z6|#H`^O}}JJ)Ra=+n}31zvmQ!yG9HJAsf_=0?vxo%kC5I0%{q~{3$#N4KZBQf%xOX z00I$59R(fH#Gy4E*zB3bXEVVW&e=mLijWj01hE}u&>@wH0q3?cKN^O?Wq^6PYq&sP zJj{9Rd6d@%hrsHLKC6%)p~K4`fxfgG6k*bK8*^n3F0k4oCR-a5b1;jAu3YhhaBlj? z9L!|UaP4hGdoEyayFg|YAJ2#HMc1XoNX%qIg}T6dbq*k(4J%n@5%Xppq-1R)^lQ2sP!<2!FA?Xgpn|I$moWi2rx&D-dpwT&pB<>otR0oq?MX)phLX z=h)6(qj8lTO?{tvjwn$i}b+T(vJsB=ItVsHw}_sKNB8 z#F$>ApR&1|{?=+E=2;cQo~}QeC_egOH79Z;=uC7mRWRkwJgR2B%LpJ1nu@g5p{q2q zwmt(K@@2HbmvL%`+FcL2tl;h^ZGdi%M6_is>@~R^G{^V~< zOI1VENF9RAi!LbekHpzzZK@a>#+BTpIy6Q6zEfeVHMslakH(holJ(8f%^xxv@14d6 zl-%3WDl2K6`=(NDpH;nSw}uy-ft=~r zpuS@#7iQE7C`awQ+ppwu6l&QE?J)*%neuGauNUT(cCoo0Kf3d*GKo% zGElH2|dnKndcL>P3lwx*>m0VPL7h-{Tc9;v1C_IjnXM8!Ujw4QVx+SMFU zIkZ-7EB37{33Cq}f8Nd~QD;Axt#>ePwwUUNmIV45`%c@__JAsWQw6?G$0o0c`C#{! zba?rg3)}AU{!?^p+>Ieaw%xsczQf33pz%Akg)2|5qR_WLXCGB;s~zmqb^7vGNkwTLXJm$+LF@Yk~)xQ=i~ zxy!0-T7PQXt}VVdbsIBUOr8F77y?y%`S|-Y?Z4WClOpjt5@$&&y&9na8EiFxzD}2 zZ96yz?0f|@ipm@lW`6;Vm3Me_vRm{azYE*>Py0DfTj5Syg*5DVS}Vq8miX{A-mo)4 z_I-He+56Hsb@WMt2nZelGf6H{C zMg!!bD#7Nb7Wx8-eb|3T;i)|Xe?KMq^18b2AD!%Ja7Bay6rN)-E-%i1`?@ay3J=Zu z)?V#DP2j1&@IO|S$LE}MhOpoHXQ};TLPyO3ycoH4;S23QS)bEu-k$)hs1-k#G}S-E zg#U4Ka@_wfF&T%HQUpEpJu zT><91?i$~p2}Qu|ty2MxCMWkeq;UZ`FtBzQz?l0PQ~wX+Ytak>HUmKFVuYM=W1M;@TX|~6jA%n zcM<&>V)7keBfXgP-T$b{?#UTu4sycT0bC{ZnCti3;LLEw8J1lMNbB7Xyy5ZlAS;2& z4Mi{Fugt%iqm+vPx{b^!ormS;2m4{g4E<^!&s;gHuH>MNpd8cspuhSq*FKmb88C9s&hXB%lc&Uw|a2{I%oGGqqA=`(T=>AHMR?l$Oi2PyUC)WQm3b5AA z|5D;x_HQy>eTaAY?!a;#&f}OgAG83I#xP;&eGt;|bA&tBJ>ljBE=`lvYh<+rJuCpf zzo5qr(egKdJ^Mi*n*xND^2>va-@crGI#09yPvsd;2~%2BOYP zkOj7hYTEV8$D{MsdyfdPgackX>zwp0tSrws&fQ%PxV@fOa3IfP{G<(?Ap1cjx%G#0 z7~F&x;ac5pZP8dQkwQ2xa(_9zVa|`qoBt0CH;vCC!&`GqUKLt_5}lgDioutePY)!> zxh*H?qaMF!k!0Ja>%eb^%vD=lQd)ht2FUJBX*B(6izy~2zJe;((`|~Fvy0WpJb;p~ z3qE-m1xF~E==RTCRU|b3jTC{cdsshM?;3zDeD?nE2Q9h7*{s<{341uyiiddp9|||0 z)F;ome08yvX!Tfisu}-Pr5|x4wc$j z&Ex;+PPWj!&Bv%Cgb|iLxW67GyAH>Lym^2rjyF_GGMLh?mv(D2D{|<7Q*b@r_7;2QAlQAtWqg@$XFR)X6?mjk-Fz znY!JCM7cg$es%!m>?p}b*8D@wxG9Y0Cl1A<4UIkq#qj}%uJxT}U)P~qeb;pC8kG_Bg&PM^*8UWePBJ&FBR`)=5cTqGxVzdJ z2V9UT4;`cAQ>p`hlhA%A3|_F5A;6YS{Z}difQ1uQA5?Z&z>k0tf5zXMAz-@8lmmAL zz>w1_&)#}t7eSGg9A#cBd_K?Fd^u>NWR`3z{jwi&e|tXc`@EJAnXe_<@5#8@qT#R0 z1gojk0=$LT0S_9AvAw#bR3(wiwKuhjKxU1N&kGNp>i_sAc}N(I;8?S0 zm42xDIw%kLJzW|A+&^*~)hyA83E=5s_RqIn?aq04a?)(vSIO1%X(V11QPYM5ASP1~ z()_nANBu1Pc96rB-?OB1LE%s6q_ZCk4&l_IB6tB!_InSaLRA-1gKx%X1cowzVK+bm z3|M@>&tH-YF|M0|L1-7LV%}0!<(SW*O zf%y4fUvi1C%_;!i0HjscGH^jk>&gd~OD;UPZXcXy+Pf>17HYX4woX}JABN`h_*Xs* z*n}_FqAnNgu)hGZ5Nd2suyUVfd{b+9$!qqOixr0{Z8OuKgzxqNQ|!PBWhMo3My#oG zQAuzw`ul|7*^J5fOoa)_y!XLBA#yFD&wE-}J1#Bj5Wt*>=zHCsZsjIf%M%Fw6-yL! zw(bs=<+fYvgR+WhQHi6qp7MHZtHIJ2~@pUP=unrLfj^Phyvw0?(3=0DUJ zvs$GLAL4)1eHU8!PFMN;J7sNBiCs zMIb|9g6}YZ7&Yv=lDae)CL!6lU05_m={)~4f`2`pq9BkkC^ClP(d^+uy_Y3&zK3n?NS;F}SpnRn*AJd=efvS~OMRO-Jv%v1FY=!@7ya*(_*O+o z9qzExWPu+6+a=Dx)uI7F_NpJwz)jBslpP#07gB2!6nw(}wKabzvRtK&RPd~6t)*}? z3A53(ZbgoEEM~r6&C|j3Jui`4|5LF5b|=|72?Ak1QUcZr=G$gR|2D46p>%R|gh<>zv zC3;>B)CanfcE7oc#k)eP^*xR0Bw6<*pbyDv0zj>B9Q&(>+RJ@K*f!ce_Eys1qQY{r zkRm|()!aiWRb5=RurH-dv&Rtl$j{X_`i^$p!gHlLPZ*nVyv$A2I`+{0#PCs9UXX({AO!*h4A(rjioy zJZ=FP(Ypd4$&essI0&VKI2rksQ1jCf*#P#{dc~?MeWWoS`ON2{{@dmIqLuRBbr${?|YwwYm$QWG5igjZBP5OJs#fz)&jppye*y8I^5T62c&)xxcV zH@PHvYyIaR`c1%(nf$Nti*T}-tl>>sj7C=*$LrIh{rxAy-!0uPk#P^tl-&fy9c`^J zfTUZ=_CsuFMw; zj{xy`-@$j?TClbNGFoj6uv?9LUhl;Lw#E@ax(k>0n%f!{_=k=~42>KhK_Nz?dm7lE z)z=cwOaE7Bb-zf0x70>mUWJ;KzW$4to}l|W z8F@$eZp!esu@*UgS+E90#ys!a@HS6>&{KcpL~eXK3_ z58p#JofY^d5MxXIf~Ph48l*GZ=OwJC26ErQpTc(j(v2PLDr-;@p?`<}7C?@=o3cP_egKe!V>wqUJuNdBP z7(7j`S;n*bw?Ip#tO61vR(vYEOqi*d4QEcef*DtH^CRzUL)e42zhB~{0h-4M9ls3* zv=?UIO@0PKpIKlf?b$6n7k13@T&t-hMg9jti*6)9TuqtS3=_?O(Jo)`5KMG_)O#up zkNx(8($k-O)Xa0nfW(g4MYTsAElGj~BS}`@WYFRf@Rry=w>Z{pj?(pHe`=^WdolIb zOifzj+*xhzW8KrS{@NVxXO6Nvm$uZv0|Cr~*dyKl{Q$^^RT;?P8sMyx!2!QRI}MuC zEjAJS<&9Bmf#z|$KzxR7XM^`0|N1vIyQJY>Hns_EHY9@a8JaE&`9e(HYLVl=f3sYJ z05a_WIuU|Vi|r%bCA%Skz$`0ONCxVEWfn~hU0%)0WUDE=Qe|Q%>cq1-6Z15g_!6CF zk&@!8v{obiL|!k5PQ_;rotl?98J%imBA*+pH})mQ7TrLKok0~+bI;zJGWf-#>_A*< zx}UKcq&;K%l%!cw@RG3zqTsw}Av9U8V}sBf#&;O?Q*1iMPj7fCA8%;U1V z-Wvl6K9*bU!Iv|Ut_m>j5#QPye(f}^v60BVGycnTr|2NS(PC@Nk)JHCB@lmsZFu}b zKPa2B`|;MOv--slw(IzP_c@qgHxMOI*=o8}aUhm;V52{VrBD8WTQlj2D5;$T;$xLd zWslbf6&`9U$3BTqp2Ts#zjiHpspr9N>PDgk2qee>MMCd=tRsB?I+kMKwT@0xf2=xUNfz;7QQGtytThJ)rS# zmGgs${?>j1We+Int1IRHPNPg|acJXSY0TD?U{P=I_WRQjW>>H+$zJWnB&Y(veeb49 zCdawOQC&kbJVRyfTj|0i`rfzA55#j)3$jeZjsny_h7+4}UDI0xnk%6Y9hkYkj{a$L zuALR_Pwq*VsRDQ&-jYyZ$d_3*nR??`I(XUGw;`%Cd*hABzZpMekmfT9+O>E~Od*yF z;dSF5;C)u^g*A_8yi7_ z6IT6M_J`{hVYmy=@zdntgNc&Oj8+=0qYypyp%EwB-&+YA)42%*B}#R-)~2qPF^(1; zcM{{vGLMz>QusI&Q~8QRs<+DxwuI^xn};_WQ@C#E^mV0X#+Ljh&pVRyrq=j%bWdw` z*d&h5Nsna=ueDZ|P_H#MvW-ZXu(68$ww+G5`Uk>z?Z(rMN2Cnm$wT-GWDSzxS zy{y`00TWC0Iq#LrQ>;Z|tO(YDwG4XhCFFv^x&V0>Vc-Vz&10eLE%8!ZnPVWKzisY{ zA}h0M%EB>OQ$VvC`KI*Aq4XMjzwsb7pnke}-_Zh~&moeXv?Ibm^pC|#SMX(aSi`87 zzBh}<$-jSI(rtkE^^|qV(?*zTY}wp}^0RLCttSoqyWip4c&x7E=^HzjtHaZQ@s{hy za#2NCQ;`}S(^(p?214X_b?PlWuG-wB^9b$-NynFC+w zIugBnbSsroDRk{4I{=2i(Rm!Sk*P$v+3#u-utYQzA=X^MvPf z)f3s~Q=fGV<>ro>g_Uq5M1pz490kZlWEL1BxQ-Ok%7I)w_OVUBXg^oeTWPNp)B2Aa z1asx)LPMU|8i|bi^jm1$#=hz{GR>xXG)>u2%;{o{a*y9T3DXSChx!wH+ z3apniA@UAhu#j)fS7rlay9`s=9z_RvOd4J?6dw!jC&jEw*7t)MrZwRVxm-ESjfiB` ztzPCi*4?g)tfk(KRIA!QC1@RXqH7i5j7zMx;I6`cwjTAl!t+kypDvP$FB_i3C~z%~ z;F|vp7U_?^_p&>>|A0{i6vYBEr3(P;Ykza=BwzdyA#%0ZM6x!PqcF}rtrv9%gePe7 z1Z4w*G}aW??E(v6fHd&L=7A`CAa56Sjyw+B3?GCf_R-`&ArBEtiTw6o7Y*!yT$=~> z`Om2Ja;UHFkGKDJ60VyH)EKd5i)F5Fq-r%Im>OzcRw0j9-@ha9EterN(ol=*?l}$tEMx_-j7c;neTf_KWPnMexYu(FyW>*FWMiL(T7RcE#F%7aZT@|o~!*}hQD ztKiCHo)^k9Ay?U>W>tq=MTWHt(L3Spp`Jxg*KMm;NFkh9%t2L?rn9*}7hs#!^L@UJ z;oHtrc-Mfa_f2V6HvB*@#?yVU@?U6J*p$4Frz9 z4ID_2S+?O!8jZMHO>}kiA~T&e*T|@fYB&JSUw1abWYFhtq_f~rM339PL5wH#cO{qE z{#^Cavc&jA$T;Q(I1MBW{q}M@4BZFxJ~(UNM8hSaw&!%IpPmWwIOzLi(I zGR#K}7Z-Hbir_g3s-xZeZws2`S>i*#P+g4A(tP_NZJ-xzf{euer2E#*eLu0f*!6xP z0}G@;%<#Bm#-YipvUtg&h0Hlfqc%#f>Z{_jnTJ3se`xph)C0T11&kP)q+Y?Lhf)N2 zZvms{hlh~5XUj7pM*t)$0u;NmD?oXU0lY%=zXHq}n$MdL6gQIXl^z=T@dt)ds%f^$ zEbj&9*ELoqWr3}1u%F%H*0cN82rhT7e|s>AHr;@3z+)I@ct%Ko-RQ;$>{9XM?|r|n zE@Zypb5ImIwXsL$by$SlGP|!6YR(JUcDa|z8mBJYdu`JKk#b#6V6Myw`Fg2xN_cJJ zB}k7?Z9qJ43I3*+OdYFbA*8}a{ekzW@B@{+z2UmYwV=YdR%^qn=B?eITMML-@@`rL z`8@{$)vhyo5S#kU5TXMA+6t4xlC|%deafwZR|D2YMdQ@&y`E3c1VfLG5A$UVCPH^$ zHWwOPVNLl8I_pxCi?WAw5sF)MiSMMr)tsrRXC!w(e~+8inU=3s8QD}i1|&XOOq6iw z%=SYB$X8@8of@7x#w|NEzS_1r@7nW{C4sSPXuds7!=COE|zZrA|J0_|$v+u=E z6C4oEgUC{M0tZtoGuf3u;3k!3-u)prtsKF9otXWb&O)x*tDE};h8}fQJ-Vv+ZtBNE z_hStwac-J<)Q0tMSo3RB-KT@3b9htvIrh{%oOktb%yNVg(dWZjXgddmC6a0GPLR9O zgzDiDhu&!8%M=Gn1BaVk2c{*ACC0qsR2lWZEO=AMQ?A$UK;z+;1PuhYztE_i4?? zh{__^*x7#aD@PWyf|x3QJ2BJ@F+w@--gcgDKBKZ1!->dCoT~Mt7fJ7)6LuSgwbO@& z`TL9UXC~VN|G@q(wQj#;N_D*C`&)zn)UmTbZMj6CZn|)z#jUaYNB=@@lMOESW#iIq z1hn9o0vy$;s8VXbF|^+#F5d!>W~Hg~^LkiE6Vhb{%_B@ambVbmr6uPwsc#@$3`}3N z_de-jDm09QPqF{+PcK}bVz=$>j2;SVg)T*pR~-#9c4^q!R=|tL=~KitOuu}ALA7vQ zNO<<|D%6UF)rG4QN`iP2osk-IzBxIy-a^*S3yH=A^FK|iydl&>Otd}9IeQGw<0-sm zME&plF2n12#<0V4|C)ybSz|aLu6iDfw)tnQ_tC3D1vDSvOh=_mq`EK4Sg^~cu(5)9 z+Bs+Vh7qm-dgJRSa>+D3w&Ts0b)Vgk0WZltAcYIx%o^YrHluI)$jo*GIx3j3h{{Wz z_sG&lW@a*<7mwJJX`y#07luFXsN2eTx2+<$?N`dQmdMqXfk^4j63IOhNjb+goFz=p z6X}b2c^)O+!9SSK5ah||9Nq60lw#A#?cj5dZPfeZ)y5C8%(nL~0d_(OMY^;%YZ0pF znEd2f+T--q3i*NoyHQm#NbXcLJ%Q~|@hScuO`jypQcm93=MG|K-DQW^m4(gBTu}yN zQvstUB3wan9I2b-IWcKk7S18I(W6?d5nLU2^|jA~#((5{dn#$j;OK0Uoxa~uI5iJT zG_=ULllQ90xcrGOPY^S1){twAp8jS(_XAa5aopzehQ_ok-k=1B$BY>EWP-iIZd(R( zT?>Zv{<6VEgSfn9gd=sYsz$#R@?=bJM+6Ytj*0K++$v^>3Y76P|Mt-7(3xONrgC>W z%D(Dt%JAzh;LdP#c-p2Wd8<*3ub+O!$!W5qVI`4h*_1PpaVg zlfepfUQ1z^$lg4CTGYGDbDJZ6!d$0`QX=B-NTGZ&=v4A6-fTN-6`zb=hfci3a-m?D z)ZQF)I+AgQAx84m>0KLy+L#VWcs*;P&9lDEr3GR`PW8lz)th;0nZupcXZ&@?!6)1W zP8tX5SPSkcIGa0eSxwP0y|=Z&Dlut8Z)$|yvDTfGD@zxmN^FLo?x1{_@L({;y=WO} z&T^BmUp>t?QjE>YnaY?`iPudT7fM~D(yht;qq*9o+uSlA0B<5e@DRxufv(Ictrl;b zWx;>U#O1aR@{MKz`W!Np%s;f{1n4*u({+`UBJ3OcE^}3W0kex2b7_@8c-6EMFePhf ztt;r7o>R)GgAITlg|9utzubcTdoQcpI&P#t;30zKRA2wH|GFkImmR% z>eDa$)E}QqW_xYk*ITcB-5%u+z;{Reg(2u(OvH!wINQthS@Oubn;=iWV2yHRtXRV{ zzSZi!(n0Y10(Y?qpH31s(ikJLfv>@BUCi)`Bozm-#C88)8l1mMv}lD4urj1G;F}~N--#hG_=ZItdfH`AFZuqi0RoAoqfv$w#c!vt3AC2i z_mGb=oe0Zi;-2p0IaQe$A7SKTAVCxbmCd+*vW7Y+Yiot0LY738-mLZJqkbYq_h{e9 zLdEA(=$#Y9gZnWj@MM&Wo&|%Faf|hKzuiEyrY-I!7ul{Vxkc^;(j@M6GpbNB+wupj zq?RxjCd}T`oxb~7ldY2J3rRHekt~@7L3$S^>1I(2ZrduF+Yrs>IcHqb?XU<))I4nN zN2sFxQ{ksn8q-MI6mb&VcPgE3faoqhozE%Js|#LF`SwKuzYR)@Dw7Tn$1U( zg5Rc^FiFm`OQWNa9%G~--=IytpVksRt`pa77n0gvwtn{>y}QgUmw!i^O>{v)CQ@_G zVqJ`|oc!F;I_Q#gXET8ilBrntNd4X_K5uu6t+s+acY5sk7P@nq z489oUk_1FP+Zvx6{pU2P`%~l%CYhgIUGrbdpRD@Y>uu&XS0@Pl1Rk(Tp>Ez48?m9D zL`2X1wSEh_uLlkBUDtaKBt#9wa|=ZR!^Yyi z_S|Ct6H@(5lXz{Mymaq@WTZjs)?243`Q0GaLK#pkFbG}*lw4aVI*m?yyFhrDk76QM zuoI?1sJ?k$jn#C+mD9l@1mOPj?fd(r@~8(S@246;fnNXlS$@ERQWzR4Oy4%RRUhA+ zSc#y1E?RdT7`<3WsTh|(?U&!EUe42dmynRaX7eCdnD0j9`0^GO-;aE?yftIL_)|YO z@^ueMrPbu=_?JSUz(W`#6C?C1yUJ$oM;;3kwt@<}vE9Uw{ z(Pz7;I5PHFV{=TitFBp9Z zbNiLupm#e{X^}4#IkswDAO&PVl(i}+En0gxqmPFWG4%)~R}t*P21&)^z+uC_*+Vfh zrzn7#S6{NW#i`&vM`w0uFBq$`3E{R7|HPln0JrVwjj6)AsEWK%Q`Du`(e-Y7-!{N@ z)X9y`S2K+H)IrwC7=vKdb2fo=$Lqy@EU|ZmNe@fO3-4?Q-Znk8nhno7CkF<{XR8DE z9XbJ4yO&73R1~HuG~r0pAzUQ{3OLbx5KfP#gN>A13Yje~Gg1?cQ@k>`{a7prEhg8Y z$Y4j)7OXp-(X`MGtKfzzipl+D#DvBv8oHtD2Bz1bz@551Ld@!$F!Z@{)z2=-Q9n0i zXtnw531ONrKe;^wBgyECqumXpYTn zFNWmBalB+|nYtC`pN~}Ramsj*v$uIkBMLi=n05Lb>9VK$7@t}1n*M}rnsADZO~Pt( z7owNFa~cg#ZtZ-|y~3%YrzxnvSUcOYgTI!py&{Ec1aDhc6WfoYAHb_&hI058$WtgP>e#s z(c;P-K4yhuN;=41b9oRu*Y2S)<;bVvrSfMv_ke?66iIROtHJq*QnjOb1{*){7j^0D zM4Yq!CrsvS8@U}noKPDIX!+L-UNaz?E=@K6xY}SCwdk}#%nMJy;N#&6GO$_E@$C*=16Inp|Mnx&H!9bJ(FzSJP;rnwv z$3VLOZ#Dmaln{u17NCHLnZD2+_2pT-$(XjQVM?9^G)-HcOiS5v#H2^^&+y1s&!@#ASw%Uj!zgEviGBJuj*q_}eaR@m6wsbXb> z*6ZKmm}oT-2l>?|Z7?T~86o<}@o?gCNPidC#+Rrni_s&Fq}Jj;0%$lG18YqJ-~GNG zkJ-ViVIAGd?eCnO8@RDRy^va*=M^B{fy~GO@bb0k)WP-_awm_%8naacQ2G#`l0a2L zGY)ozoks$sZLrE%t=>KfAHBHueV*b;c<2CRuZe^FE%Wcv)L$UzXX0kHkU{-VpE*U? zDummTJCjh&+G=V&qV`@XrAc`EIyD{iUq@7r`wT+)f7Vv?l|o}v%(XJMqgxBT2OUvU zW)S|^Mu4j4mDI#juyV$*t@Wn4ZRviYQ9 ztsz!UuMcHJ8+hvb4mILuph*z}C{ML>41BPTd;PbD$jXlox2gmJ=E5$B(GHvwM%ith+s7v~7jl*=J14dt>r{Je7(OQ?UfiXJv5Z5a3U1>d8( z3ZBS{Gj0%n)zcMKP%nWftMPoy)Qe9}B;}fK^=Yhmept+AeuV$=_|%)s^+<*L(HVDp zg$@Yi>4{MAan*`{{XqcJc{Y4Rwc>A>AnLT2d;JeW|39=ea-il^v2t)}Ow`WvbYUBG zlt$ZM3*VFj@u^d`^|C6;bKjZX=oE}H-E~;y9lMrP^gCX3*&NrO_ZPFQQa{>lNH}Gm zHmJUFZNa zEf<$o+#E%wQ$!xA|9;}k3R*UvH@jl9!ei86lMk2=LoJzct~S8)h0Ip|PIW($BZiXm zfa3=icq}Iv<8&3hIFFBxc7Kx^I^Dk^+5lo4b+jbniq@<1!U>*ODI^fC$+x$;Ga9Z=rGeWWd!UR|Z}S zDUk|&)`ENPPCGKlyOTCtO7n=L`ll;@P#IiQP=JWK6!_CnUa-aB{>E{-Or*ei8p%1X zK0ax_RKq`fGHWIFv$a+2i`KwwAe|H~3DW;kvJIkC0nvU0)itp2>&~8m9u+c8q>;_k z#wr?jz%VZM6P-KQ2_^aVTV7VYia zTC9~-`!3P-Zs*4V)a+_H2PC(5RIq=a-tYA*vttGDDXD4geyH6J7m5t2his>Qw6{e5 zJ7W+sp9m(1)5Q2lD7qU}S{Y^1$*7Nco~IG@)iqQi-0b)!!i}>8?`;Ks(nf#t>%AOX zbu)HP2PJhSTEd!y`xpBi=c3FNDd2VLM-8X#37qR!4g=N{1CzBuXFRCi*S@i2E3YzS z&+4ClsUHbKi&of(o_pENVl?LQB?O6r!(j_CtxQ-#k=<9h!_M4}9>@^BLH!s#BPsccSSsWbuwO3bea> z%?U!Xs>P3%cz0+Y#H=)-6)&VMQ^=HhqO9)}r%i<&v!Sfeh!fQ2+&FpfGgQeZA6LfL zWV(?1Nfc{Sz*u$Ft!>82i2uk9l(rR*S~zV)1JQ6Wm%%23BJXOO7PZGTCFPP=OW`$K z)?X6wOzR10nsGTIfy{9jzh>nccH=vxQ1KK^UqGJ)uQcxW!boqYgBC zt}+3>Z^ss^IV+Bcv!kiSC_n2i;7h!I@3QD%Q>2enBgcj&;fg(l>Ps{JmP(78BRz-~ z_&cXTn9pnc)E1LjX7;Ba4Ef2~YEz5(t;^%R2#P4n&QHAkZfEm`xL~H+!j9X?z=M#{ zKZb6ltgE*}tT8<2enjUB9Lu#1S4Ko-LNMZFovzRyjbf{0=?i^E52e|v#H?% z$^1K#^nXKB8}fmb0Yr=`^#Zm0&+$kLRDNB(zAM#loL^7rkYRqTk;DU;&rRn+BN>wy zQvx9KSBuS3%T%-5`31d6acX(+`Ji&EcO*{=(%Q{Kx&j{7x-<2mj-P!dlfv2#23f+u z?tJ1zZ5;z-zT4Gj&6L$5{Y`ZLe7z9hK~A$XS6L94&D}-lX=K)0Una)3UmAf$KEap)tvcL`lUZ=I*ZNrQ24Yc#^wdnrNaij z@4(s)rT&`Z^SF?`VP#Tn_Ia_aQspXC|Jd0=KqvdYmL;m?Ns+8oBChso;M^sndVi%b=v=7M5e5k28qnJMuNwqA@Zk z%2!d}GUD~f{~CBvm%=lR#vaE}`Z_Ik?rmGo0v|Zh)}ZIZi92#j+$uo9;$#qoyiMLl z+~zCMmC`a#9J-X*Agk>%-EYn{YjW|;MrHw;>1o`#)N9RP)8)0Dh)@f(q2?r%XkWDL zJVaMOSdT>XsdwaXTr^Q)>3qwYEw{k!&FGsJ(zT%uRt$f)s{}Or5xibbvWr({KaK{O zx{!GXaWjUg%#|48w4mq8d>_1%#bueBsD1WUE_;~u%{xse*w5GW|449pM|c5&!^U}0bzH4dYFMI^0?j7XCMM?Rg0@)d{d}1xcUz~_Kuz&B8 zi!aS?!5)Ov)ALo(wz3joaW>{XSatU*9W*N8|Aemai_@X|50SWv=4`rWfq!IIfqbci z{(U#rZ?@vRp;L4*;m7$JvO^MU7P`Gntzk;OEvxm@fqIJh5>vnnC2P#0mAPk#@~P1q zL79-Zd}6hYzXih|Onn^$m!FDBPVw-maz_1iby$2_ndm6wAuzrCm#mAs!J2{qm8ne5 zLq~eJi0%Ce2}&QsT~*r@;=?ZQBLol6q_t5qc8Jt~&C6GNCvmunrSI!G-7-1}4hAcW zv_%EnwS|j3JhV2iZAez#*S&Bdg{b)tnK`X&V^hnxAtYfmu7HXuR-AfPB33U-paQla?Yjc3{m>`VdUaJuulIzTve2J8*$ zDs9{sSVukNJ8?b*h+Q6&?#Ed3Qzg0^BKLzfo}H0&`?mZGzn&7rNr;AB0;_Wn+v5Z# z6R}6_eOu2ctd%nVD*rd2u~GLynQu|8zqbvS@=5BD0{|xWhDXr0oU3MiFiCJG*NoB* z#v~fXbE_^r?1x2AE8=lgj_Ud~>d?JS(Ixn)r!p-%GE-&fB`Og_)3Q~C%_o*mROg3R z{|3E1B|wvU9%~Hu+d3xxwakNtBzG%Dw^`@U>wyHI2F7RAPf7`mEQA~`c6O}jT@8|E z`j;G{-(frCACUR=Gxn%}GM3j?J{n{PqOenr?kr$!T*%z|n-SLjY!VlmciU?$^rYJ6 z9PLV$9-MtU75I12kA%nheZt`D;StKUqK2Sw`yf@dy8^o%nZ6& zNt+RlcCtFuf&m12GUVmir-tu_wT zdKFua|8&`b^s~zH(l3+iZZ`Ju4gUdMc{Ui5Q(@2P>UOh?{L@SRmT2W2eZ3D}ZneGA z=sUT$e4PzF3vcWyDMYm)Hp|jiJBE1rB6aQlz>1sIve&K_klu_ul|VNXqm&t=1A_Ez z{N7Fc_;Kk*U6#jJpHU`>I>rWx)KNcL=Po9d4woCEu*JB2AU{gXqudldF8S({$bO_5>-oC%w zsIDcsvn6$D;#t;|@4TD{8OD_DK@F^ac6()SRg+qi$3a#{uX{@(dFB4@iX8*?r1Zd2 zY-qI=Y_yQCYqcTa4ZjyoQQjP|+yV8zphNJmEtI$|_)!lU3EgXbDZn3;UVTLt+)ej9 zszV%iCI!Qj;?Z*^nRL4`96N`0WbETFqo)eCz0KEl6?pGl#yz%pP|Bw=6f0S~XD?1{ z@QU9n`7T5pL*613GHx;A9N#?3Zo1KYlAb>{$(`)(bUvSGE7vy2>h3Ul_yXlTnBi!T z@Z3YRnlhl47-~_l1c#;#CMj+Xl5VlL?fvxXt27SNTEay8I%bP=o~P%9YjqW6$X%XM z-1k8&4#obpYp-0*<6cMR*8UQ#0P8=WKe)Kxy~v;=vHqB@V{|2RZmP|mtj*j=NO&PL z6`nq>xcQE#&Wu1T#X+;#@Z$y^`!j(&OWeOsG#PSUC~afsXI0)X4U}@}XiEP9r3`Y7 z*1+PORE&J)6DxvsXU2$Tx%t%vZ0(|&Oo^R>Qf?CAGVa$iHAIA0U0zX~AZLp|%a5Q@ zoh8qJ4}02IEI~`*bC9k{HH;oZ3 zjS^&C2BSj>tpOs`h2mVIfV~*l-RbIv@WBEDaVTt&Nk;Jz`JIYlABK(qBjlgb);rfL zCwh!*3TwdB8-Z>O?D=tkDgn(1Z#xhFyaOXcGKD<=F}qbLsdn{%IdHl7F6F?crs&NL z@qcgJW^pkPPP0Uwyv*ONLAWrWR||nc;Hs9Dd zi$^Y@2CjCF>y7F(eX3|l`0U4yCDu;W3S1gnT*j#3z>3l*dI~1?HMF$j)l8z;u&i+B z_PEW{mXyt^p`DDYn)*g7YW`TLDfo9r4Z4;p2ZHH^teRY9fxjhB^6V!nvEK4r|LXBv zhZEOH)$8x6)BNq?KEB*x#;n#j$ZVn$m6>X0Dm$vJ(F(|$pid`P4cl5NE%&bB2_vYn z0c&q{+Op7Cl=;&%SWaFh4fH?Bfx7#Rn!icSk_MbfjMOULWd?*f<&)oKGeoraxv&(O z8K`c=i&d1%xVk^jKb!~aiNwt7CK9|)Or~Fpr1Q_RLz=1>cud;TuG+SR^VF5c#v(lR zP-GMD-xr`uM&1exxUP5;$)@&gC@Zm-e?;Z&hH@_xV1996Yb8YF-mNrQVthz;XQ}T* zA-jdLaHB~UiK4wmd*_?igOZIKTEH(lbnffDA5jxiYpGO^1iDOJDoKX!`B#E{J}^@4 zJXV~`b3&zxB4|hr@b}YqRMPIxoSrP9#+b*6?UyN_SeMe@1@Gk68KN|&b{y73{S7EF z*Ntcu#MWT?)IiS2Lol5=1?V*HE(}fD6qg^x3|mtzW5*$0>SI2RRyf%3o@sXk+_a7zoSqw*dmUef_kMY)v)I%2U+ zfMUHmHs7Ec!9R$6)Ix2*@CwO+xO}(0``c0U+7^8S>angIQ2R`!=x?kaHfO;|k_YPW z{0b;c@!mcI(q)?qP3xy+l+lS~GZP%!)=?Wh&7F32?`r~fUix_8KzB0Sz4)eb35PUP z_;m~}DenKeXV^0{Q&@?%6dkk*j|Z-bogy8Uwp8Fc*E$kKDAvPw%0D5iZugJ&iz-m+ zM8Y&cyzjXi5Qto((xbmj7|3GY(A|+cfO}SG2w&?#lek(%0PJCkrNUeF<8e? zAHf`eLNB7!zf<0k-HQle{THPSHm=?aOf+aCM}y~%Wl1xCe$!iAri28X3G5hRY{0vY-Q4FWSW)fXwAGhC~qJo{zx)RD5#kRWARdbf7L{{+1nVnpYHw+~d7P#hdt!rQ@sc=G1J{1QWgQVSytSR2f5S=}cK4Fe9z{mUw*J z70_e^)`q^tsW=ANT2h#D-rosw;*36mW(^A**>+?9Xu@uC-M`0$taYh3#{LL2M3W}1 zU|R5b8>Zjzx_-o_zb;z@w_(fmeY1LXOp+4qZ6ZQIU?{o&{n^~HfAp(t1cuQ-N+jXY zljnKKnu1XD5;I~$n!Qdf+lk*kx-!*v%S-uc@D4Wu^@jNm2dz3_o7;l#A~|qW}?(Km=kTvpx5mYx`5i#_ko)(u0!ZX(e)E@E6!MOtxD7i(Jt@x2GP&OT}zw%EUNQ?)TRJ80zF9$yWQUgm^D; zXY3PRALu1>*^(PIDX}<5`y&<#2BS4*BRyt_m9WQSXp++RKj8g}9c^?86l*jZDxl6y zZ%d@m)AfEO6QQMpN@n3%$w34;-o{Ph%$k@(q&8s&>1rA2qZ{>bP*~$O&jvbPj;v~f zVBC551oN@Du%81eAl`ai%=!HmeHS zP%c>gMf9aUO--$dCpj4go}$KoUG^lFz45&^e^Cx0QPIp$_BxDu1d~#u7&$y;afVYN zA}PFl$F6L}dDwV8T2c=x(bs<4gLob8o`AUP$(2#oo*|&u#54*zQ;)0@725aeM9^Rv zp@@&pdWLB2O!E@`G4zP3$BeJj>Ah!2QP#c5hw3&~fW~G)2&x8?FCn4a(9vSfIxc0r zXN+D`8z$-?!ZL?P1oIHq?W*gg_jBe~GUvT!HoCpe>tmJif}@3DWkz|5rN2!oO)gR? z>Dku$uJfJ?YyPfoq?Xuo3YAy9;jA5V=21EutZ3jQC8ws~Z^{6U_T}hO-(~lX2g zal9Bv5E*vti1)qU;thf)@?b`oZ>91w;(|ZJh+!2vP9Qa57Op0<*IM9fNCn$Eu+nc+ zRM`jO&TEuKZzpacka<|eL%KmFuu&}GAnv*L#Cgo!*&VQ4(7sKv?9}9W3}6q3Th3|n z*Ied_-u9u^aTSl){pr&NvP2}OMYZC+)OjfWlZNkq=qW7&pl2*?>Rt+YmYL+&&l?pN zw{g@;A@pqo7Z(@Y=8Za4Wu{D8D8E5s_YuSwP|)AkR{`{u4N#3y*X_BkQ0pOFfj8Y2 z74C{Jtn2%qLrv85$m7tgeKAb1DS4s?XB0541$z%(Z`R_Fc53PgFrj?^LPY|CX~0zw zZASAHgSM!B$aP-Zn)^@FR9{Y-=#}XDf|^L7O5Q@B3?;J0Fg=sns~=JSYtV|-XvbbkJ3YbC)e?u2F17uvU#!1@<26e@)Ap*xR0McEgzsHBsainumT zq~5ZEctu+MC{*7C>3#?kw{vbr7juujw7~HuQwFi|bCcXDwJC7X&Y2Ac&FiB0hiI{+O03txiQ{eH`iz_QnlBpS;C@LsjV3p)8ew zC}VwMst3@4bt)}#yK9TmXXqj#cd(}_W=j#{lfz~1?10|`EjR}r%=ta8pPC}DT zq?brSIq{z7d2!F2^ZUO{CYebl+1IuAcdhkVEpg*<&u}RP`MbJQaA)YA+idH>f>=8R zJ;aSQmkf~~KO2s0IK8*7ALPgxk$#4`w54=y!bAQB>4Cwj!#aQlmXKH?$Ehr=?_Dlw zqO;8`!;;>>PW4Ss>@mTcfU%#@bXJ!!+yV^@K=jZ?uv8fYlJGq&Xy_+(;#n<1 z5p8U`nsKkH&Z~r{WvEJ-0+G|%Gk_nvVO~f8>mlH8v&rg~_;lUumY>2I$F2TGPzk58 zvwToW&Um9Zrxp=#_BRxpnc*7HaZ#Y798oxbKDQc1aDi%3jc8F@A2ThcYWmg#mhJ?z zbndGjx|S0m>)Fq)>mjnIT^YY@m%s=^#pRwCs6jN6AZ(9SGiBT`B|RfQt>dHA$Ca-# z$P1bzNs=9`0iGl>NSdSVFfW(_6p?_evH2)_Hm*FnmXw~3%5c8_^=4VXaO2NH9T=yy zC4Kv5({XcN4c?nB{q-@!t2vpB?M{k6E{Dd7+bG#y-`o18#_k4R4gd!7QZ)Uu7vMGG zED5OT(s&9PV9?dyGz`+)yMBG|rPTj$C`JBrDAB&@2uyT}ALYRCT;uGH<}MOo%zL^T zFrq>DPol*3hU?U)?31Qlrc>$SE0-ZOW2KrsN_sj$n~as;IRlDAU_SvUMP%^37=Wa2 z4arY4tA%qWdB4X^M9UGuRgfXt;t{t^ZRs!>MTJ-Cvx)VUkIzP-A=*u)^cd}!)W|yy z;4D?tS@n6v$dbWE_9D?dLdypjLW!r`B?|0VTy=e~T=f+0()w}K$}0AG74;3PehX3) zV&lAQd4PG$SPZq)@5UVFBDZOG>+8WHgkmmaP$s*lp?pxg};O~tpJt}cF z-zGOg!Da4hWV#6BLW0cGKP1q)I~Z<0ej470^lpY5K(T>Zj)DQ_9$81*m7y;GR*4>BuF(Tw^aHr?C5&;RFsv zOp)`GiMD8WMvuorSO?dNl-;pBx(-hcsxME6D!9>ChFd=*Ig6N2rKsnz^zg+WuDeU! zm>nuoeakv~oNi_XaKl_-{GU1VQK^QgWljMSd-fC4SY6)rxYaEOe#0$s`H^aNZOJ{O z68%8$%7Po;>_oVK&?*h25KnWM5CI4x*Fz*>I|M-!=MjEhYbNO(QRY57e|dCYT2F>2 z5GGN~zpQ?sq+Z1tfI?L>!D$jIC-UUopR+fXWt7*o^SJ@%saPw1WLe<7L-#|P2G|UME+0p1fz2L@wzX6*-HB-&3Qv=Ob`ndSmVhBvnm(B;2-$;a-dr6p5HS@lQ4!b=LZKr_Eo(Gm3a=~(2qd`r#8>~yjf3iZk^?^vg+(Zg~ zb>{FcqigKtTGw|GO)%a1=(Zg2OHqp}M8FWGRnhUJzPN*$+@S1uw`D_C=|rGl!-dra zNE?yvS{d`B|UGhF1I)!vA3v> zoAMYUHRZulOXI)ZNeRft%k3}&gg|k}l#bkG0S-H% zHrpQPkD4h@7L>%zw1G^)G>W8)GVF_&m;sx!2q~(R09f7#Jy8#}yM93eKXKizUV}^F~qjm5En4W0LB(aLEN#roxAB~8Z za$xc&B_#Xv-XKX<$G957Fmu31(&K8aaB!FU?5%_FA1m-3P`tSo?z0DQPUn?GNLNozo`qkq#>ArzUo~O} z+P0Kb?=EXWHEnpU0;?jC2H~^5x<4qh5}ekd6zgh5Gr!s>D29r#uezCo6oeaE9B-3v zmpkun8s4nXx*=73%@#;rAo&`hSrMw=pVSVjN3fQ z<#vdRcR_}UVHc`_D*_Wh?`7Y*`J(vr3ToH2tca(0I)hXxWJQAur*guD+m-_LJ%G@M z%Q~Ff)3V+UQWkYj=`yPzRxS#4Mo^)U%wws_G=jUCX8j_hTdnPGm!3tQxkt(5lFgws zQ1=0=q|s)^P8yRD;w?Tx2ZGgdK!uoryixT5^^!kFfJt{>aEN&Kngmf?Zic-q*~bM@ zDrC}n)Av&+%JXf!r>d*C3cuJN6$GQ=uxozb^&3*?FE09>%AxlW29=c7juWh%L$||{ zOZSZ}hBNR|9z|(4&Q?5Tqi_{&J2TDwh{1o#ZITSpzdWkivkhGLX83vgjd8-0v!spVD5jf-vss`b?>G<`g2i1zq7}pJc1zzW=Jj=t281 z-#V2PewQ~X3I3(*wmV3*36uIW7)w2Uz;ejc0-R~%CRyZ`swG?|jAB}K_0N4&U{@5~ ztO`VaXLU8elG|TS*KK4GxLy=$h3JIu%aWx8GLj&gM>c0Oo{*L-jixc2&2p{coCbtQ z0VgQJ4szz%-?cqA2(LfGlnN(K1r{BW;msCiC&SGhJ21(_UX@W7S~y zWDzTu>eiDl`$+f{d+?PT36kDYKL)~v;rbPPvmM+#9zSR99v#glGpibPZw+cHHkaOs zyuBK`oyUdx9Rsl!u&$3JGE7LEL7UChfMYX-qH|>M{(nY1Tzmg{oW5vY?SndRr7g#` zE^T61fp3d-vb}P7X3W5&@7HyFZA}|p_A@?UkXFTWOpqom6Y{+UwwqK+W42@7Mnx9v z#_wBCqm$U+s!}n_L{R=~f2OG~hd)T19cISV3%;;D$0jowbcmo}Cx;gPt!|ztV_d6W zGG5h@Dx=AK8;WQp4%-pWGEhA|)aRRY5*zZEil1I}w3+=yC)1_OUT6z)e&%uzMeZN^ zJ{K9PlznKPS&749CUZmRxXWn>Q@QKu&XYSS62!@7U%jNzrUFcNJ1HYZp0L{Y&jXU< z9a*ac4@J{M2=*xf-6sd<=1r*FVTUrG z#z|o&p}wJF2{E70Bf!v1tmYp8%RwmD`&P!1cjCorHQsT#@e3BN{|DLy|^(UoiunJaW!h_d#p`~rI&-La2`}FZ=_Xq$U z$GWvYC9f6J$*l$Ti$BzW-3cBdrguVjwjL<_^ndo1&14?dj(?QtY)krno=VFdniFa` zl$jihP^;eDqEm3jFVVf7){#$26Ti>=-Ry%Y@)#0&KU8+u(R=jI)9o1jdD%^xUxY{e z@^1)a374TX3sR$LqTW=IHILr?eusHxqwx&-ARn)coF7>8p^Pkp6d+LXM<(HoR^-BS zv0e4ZpXqPXD;HPq+1|Wf_47Xz;Yk|(8=<99+Xam)AAi9Gu;P_m!Bcoij4Dz)v}5t~ zY=-@O)As?u+KF9ld&^p7B%F<;;T&R{MtqG63k_RV-2KEp@$f;o|C>`b9(BIrqxzxJ z65r=|IShX$(p)~VL$oj)<6-bfedL8o(S-os$~?l0CdLWsa+8!HqjyN1aSE?w#npkUZw>n z=nln(4`KjWKT0&R?%rT!O9RuSaEstWkoY|RmN2!SbbnDc0~mCc6OH@ z$bnZmk~W~i<}P1tW@hsh5Aqpl=KY=G!^Vyw=+`wr+K$Y6pXRjvKC~3*I3`<7Qw%xX zJnn(6+!}{ea#e59bpN&#H(yi7UeAx6%$r^S z8Ls@TViAp-9AH9>Yvc_wT&^YSgc`5^ZF=_NpP61ve;^fWZcsRMdTq4V((UY9L37h7W;@Vcv<=R(d}lWXpN zP3=-1MQqiRPVSvq3f`B&Cw=_W=G3e_1zA15)W|*!Z*C^q9BPh1PB@EaRJ@EWG|El;fI)shfZ5~Q_1tApf%aw|1 zwVA{3NRY20vZ37!m}aeeg`^EWnbJLFP0vpG1Ys4n%@4i^)i)y(GLdaW`k5y>+kQ~5 zdkxL`uT-#CX&~lxsZpKf;<>$$LL~i}R#DU|bI3TXcx)-vKio@* zvtUOeLm_*;>z^ zs)pa-4ad$BGdjb4UY&+53f6w#1SKo-j zg@t+Jtpbs#^kC6a8kx^4FI^7?>%U_g3g&90)5Mi@e*`)=0=*qft#X{y$yO-BIs%v- zkbby2vNI0S&Sq4@E0$M$zdTlxSzEQc{Rq$QwkWmgz3hKG`{z*~`jLWRbN$;KrO?Hv+I^0t^j{sX7g6s`ntxr3RLE%nh{(h!)3v#}UgLu!kKhMXyrdvPpZm0@tT`SE>QX zw(QbImPL)u4#bS%+IjXmi zeerBMxXdip)bj~`qoO7D^sAFLxQXQU1!~T$Sg*BV2maNGQ$P4hx|r>p;`>AxZinL1 z51{aia##51lgc_dhgN1m4E5C0O4Wv_@v2;8!2BPksaCtSISmAJ zbo__T-Ws$~Q^So2msrOM?pAcE;IPp)`_ggBl%3;YejE1|df_8xf@BPz<4IrUWOxqM z7Kz3Qx~c8si~Y&XeC_PF{iAl-_X0${ek4_UYY9YmcnPtC^cz{ zKTbE)KekxKh_fn-&B+uB@la(%*Dn3{g?^5}C;)kJUz7B}ZK>;IQuTy07vCa4g19Ih z6Q0{slXyx%$U-!kns44B6~5~awg2nOScVGV zriS@g6QQ?0R2#TaXP}Kkpha8z;8Snn|LOAE4?ESp+>depApAD^ut2AHqMmFUOYqq* zKZ4U<8Q?~H+Vrhx57XJwf)a2e*3L@aoyKAoJZRW)fb$wk<%pMw@zIS z?R82KwtI98>CJxe{T@WnR4lG)A-MXevx7nEn*Ct*IcA20be`IwvfKG}?nG4MS1?gd zTZSjUBZ4kkCNI1_`3qS{OC}d)5+AF+E*uv(x z__Q%L#};OX!V{8%{H3h`od#5QT8#33gMuz>t$)C2d=IkHg+pe;;O^UtI4F@t$-D4( zjHE2`LC@p#4yH_e)VhA)sut$^qFKOWS+c^@1&}uJ_me-{l83dRT5yBvc8n8iOrSYS zd*)rfQY8Zchm6i8oc{F(M|$__d@Liw(r^D-%QtxB4M&*d|4`aq{M!sgR;*-dpfC)r zx3BUeLoz7lORF3P789Nyy6w(nFT9ffz)jzj;3Zse+rE$6+E|&X$zTSIOGnhS_%s26t1b!|4qrilI6Yj|UIqwwif=5x)dWP}+LXs0JW)v}W>6-AuT@z*11p-3AE zEG)MpUJD}E;6(aDErUNZ#A7keWmaski#6N?slo#3;}0_fg312y7+9sxO65Ye$XEDQ z^H^UEm3z>T)@F~#MBw?U`8-_9pdB!Xsc`Owy+3Uek9)-6xT<`{UtvWX!(c3*U94-l zemJz%4E@6DV(;8JtXIcTJj*UI$iRNgNqx>kAZf&8&JWonJAfYrpBadrR^(q`cLoQp zNC)`jXI`!I1eF?DIGkhTWI(Yo9r)N*$YXERCb6O0QdJRCu}?^;f<#$Ly37*^ zPB*?YjF-QI+81sx+6skAe-_Pn1!48V6B|xURpFpwFZrR=m(#4DiaQ=O%qn_A>Nj(Y z8KC=wAgdz~N6uePQ6nxZ#IxJS(9&kB6S9@&RJolj!}*>^0AR@GOlPY+K9Z`YfkgXd zt$An4*-T)LbQ*xG1(0O=<6>i0e@8V0@lj#oT59{`G?lyWJ3-AaB1=%)ploSeG&UuD zsy_XttQLH%h3>Z@`YrLkllGXju(d^L>y|dn`Mq_cn*lmr!AcFd1TgtkTeA-R{n_x% z>9$|jWy%M`0*tvqoHci>dE*wCnXLVTSaWK!FkIp7*=MreP=5_nId7*nl|eKd%0aP_ z`+?au9#vV{{&igad{0IbJjBCBgc=77*VP?z2z}KT5&lh?N__K`k&k+K zF0%1MHiaaABb+Lc0g9Th<#tclUmj*FRz`2&xZBRlt-e%$_$vx9oHdd-7JFT1*3kb- z%%&S~0vbPPqdwSc){biUIcs^Xq+uQjD-ii1MM_f_{F9c^!&7u1Yq%L`Xs&dMX{3 zei+(z*flAE;5F3~2S|6V$RCLsGYJwqfOY+DHHe5(VBr>LngSPrNGwK0_Q{6H3P)~K zor_w`jTc|UN38Y`gxxop zBQhlKC(G0S)?H6jKD-AmIWWW^zuXMVaz_@Q`{t@e%Pp1`TZDYsQuwB&(8&XZQZ^kl ztiHG(diriCnugiW@6O92s`3wSqSRc(@O0SlHSjqnXO;xr+7dX*o_K1CSZp7{C#6af z`XW;4Hn%3ZB69oU@q}K!y%N8U%B(`KJk!t%lM{$FVuWssLTy-l+qL6JUO}}^i-E}v z_=TZheCD)gaN2Y{yA4KXMMv39Z@K}XE6vdmEbaNZ=C~&3%TCq6>5L(DUz*6gLlQ+x zyb>=3w#-q71M)X4(#6tdfarn{^Y{Xa&Q=KME%5{BYX*B$=Og_ehdhPEI~@$|#O9=*E%vw8+Xz3RNfKpQ zy4X)PtDmfN4j>Phawd~FY2<72ai`AAxB z=kXlHf8q6Hx|v(8nYwOu@}WWI4Ag)US zCr8J}+v0>@@$iB_D;@HIUv^OlJ?ftS&Uzk=@cl#GaLwudH%-WHLm}<@^>s$ed zXDQbTUR%uP)hsb~Mf{QQbn2=ppR(H5?{7SJ^oyxQ#280DYww;q!dfe_x?ELL%BvN) z4x)So3gFn{dPcmv7Q76Xm{=b;=U?X7M4G~5u&L6fc@2tOVYFBbYj z`mJr``zmyhu&rFvU;n_Wco*lZd?idLtiQ>}3)hQGWqMKpD}6huuqhVArfu66xx-X$ z%rP5S5|pSn@N)A#9pT-pUnCoU-c^u~_{eY?QAq@oIyQ-`$(W_0fEygn1D*zt3R=d? zL2gB0`x|cp`_m8^vQXz=`K)GC_olL%+0E#jEDy!l;RS!q1K%zg)-IpjWDv! zkjv^PD$k*~T7*Nyuf4eL3+UBz)_dE)SrG+Bx)OF@i!;^3rQA8co*opPKMT%9pC#>X ziBh!pko0-ExtI4$8$BTHwn7$EBT(>kPwckIA+XPPra0^~C7^T3FRtZ#O?IH~>%Y2* zQ{1nkarw;J=U{xtq4lBwXL1gH=p%G$XZlG#X8gOUCd|F0>~Ls_aU{D8f-#3`yGU)i z;Cc(HDCB`Srxpv&^!?F2nnlvdp`vCzbK>b7JOeCcP|y4) zBwixXGc!YF@oct^D$%X_FE8yt;gv=xOO>qAfO6H3G?Z)CK-p;I56jiaud$!lb8FO88hf1d8u5N8mRW09b*u(&Bg8_*QCmt-fQ{)X92k9&FKXS@*i&L zNz&h9-S0z%W7);`ca;}UTxuGc8WRqMsIBXK-1xaTgt0-1^X3IE2=dbMIMGWKK5gw+lIk@ROALFwRD7Y08R%-Jrd5__2%%D*D{Poqf%+^Vt zQ4_1j+)B!Un8vz{Q0mR@xOAf6E0i_ws!{HxQkB*FbN;7{vM=ZHb-^da-J`K4>WY%6(W2d-e4r$++I-Hv0~A|$0Y02RtaRN zUT#C%pQY}3p`WgMX7}rqxxf#mUwd4a85~GATE2C!x^W7E6^Q|6_^+Qw8nH=N`yrcv z)jgHUc&{w_nH-}JruPxk8HSE+dTP!=Z`U3drYp3VH_Afy16@6-`ix34q6C-(SwLel z!_)6ayxMI-+a-LEQ{U?20ojHMi zvyPx^*WE(5*n+09fX7VwjUp^rAOAn;g#U)m!dAG`matWiE&=%qNe&oSvrcDp^bT-C z(X_tl&Kyzt_okPuERi1?h}TAQ0r%gx%&X%{#}RUJ?$Iaer3!(Y9+fM)F-9I+ix^Az z#c$q@$*jHd?QVBVrdEcdCc6?4%Ew%XQ}l2vT*v*o~!pPTFk1FaGp zt$Xu8GEsmBfmmW((R3Q?5iP#adFFU{&^X}hhI*X1KHnKco}(G;;u;5j{^;{ zrYoPa0%h8xsv9p9u6@118yx7RK^1(LXuz+wHyqbc@JGxnCDJ@SQuwH7Jl5Xce&gr! zx5LRI-ber19ycC2zb7ERQ=^?J4NrPra5~n0$;USZ>XJ}wQI4Qmz|9*6ERr4C{LX*4 zEH%?=)Fq087pFrcf-{3J&cnC=O8`*}`JjsXAGR}RcAzUA-RC$$ord;3!_19PZnw@n z$P9@!I@glFTGmz)&*v(k2*RYIR@X_@vr2fVJ8bw8Ex?{h=S|7ctYsH$p{3$r`S%?X zb`!0Qx^L{;swD?0zU*(zM;|{B-`~;quFNu$z*Nk!;t~m}hZexw3dv(o;2Zgv9r5<4 z!&sT(U=LX))oQo%pYn4C`#p{8RYWxdiN#lz`=mU{h$joPcn0kzCU3D`aohaWi^SAA zFm;8-PYYo|1y_0}+WY-$WW%*TeMb4uhvfsTK#RxM25M)fV*(t3F&(q<96BTC2R-xK z=P_#Gc+k9esbLDv4~?0)xDd2uuG^y;=^ZuG7X(8TW&&xP5M)^Ry_(nLgnQ5EdEbuNfe^%v45RR+3YFbMC5<1JOBlEA z%!yIZ4$N~b;rPDFm<;o|j``SM1FbhJ+lyncVK~}}ds6%C{&|;GK2y60tlItXJl4Of zj$C!G0$3dz$kXn#A?+<3WLbUf>2UrLLJV4)3*xIvG-mKEv#@wYF*} zJnx#0;%}sbqk}pA#|5+NXYx9V?-->b8HV{qO}rW2@$9Qk`!B6F##Xoc^(Snu~! z;`=K!Ug9qscl@ou`(vQC>chLimzP=jbJ?K1yv=}2hs5d`5i8ZXOrz24lJfxm^SSn| z3NI)0c166WAECOSLycE|XEop)SOXp_F)myvi0f|bg z0_KHE)a(xnHW-yY9RSh}fF?HKQ95a4a^wy5M221f!v^OpI|$)B{NIFj4BvHEym4?2 zNO)X9RkO7jo3hNQUO*-3#n0}tNe(67s-H_mao_r>j7$s7zW}Bh&T>gW+P-J^CrL)^ zg$8XOcln@Tr<606%G)xB)$U_3kqcgs@o)K9`8hcYD{+``OF?6MW%J_Ec0jLZ;_)`} zy1V)1rseo`oOYzi$T^&fdV+9BLqj72p#@(g?Ah?3d)ShqT%$cVEMDx;X3R#;T`HX) z`?XXuf$~3qp)~Y={r>q$P&3b_Uu)tW;+LGe`LNLc)fRA-?ikVrEL{Ay)L*Wg$9g$qD7J zs3b|e9uQzHm$rYs2~vcy$(inQZUjNoGeBwEcXG^a&7&TtJpy{?SQm7^5=b_ooG#aL ze;{q~M-v`SBo@nB{zD5P@Mrv+2FVE_QKF7Ee0xKe{_k)XW${h;`jCKYF1kL#WiQ&2 zqJ0?rv7YZpy4|2HYKJ=-w+uiMRCc`i9-*t+dR%9;MC*!x^HW^EPVoQGppybTXT zs_^auNq%;@srR(i%66lm-@?tmf(;v4dmzSdUGiMS6$%4{Im^V0FWo@n=(VSaJITRD zc#RUjG0m4ZPT#5neutz4Unu;N58e!Q572F)y0uI;*9hEp6v!a|&tJUh#lJw>i?we1 zZR5@l(k;<{YiI+4x`bgbqKJOJ>?)k4Z5y3qy8hL1!<((vxO)IwUC=b*{u!Pk z!_QSC&dl@O4f|jAZcoeWALLo~Vd-YJ^Qvxz884KuWcGK0v@$Mte*Ma#@FM%-D_R8E z7VziSFFmMk8P(iXYkKsb&^pc{1%QhUkGUJs)s6i~)h5G-%SxdIVdVia0(xD?g4AYPO29}EQ8(eC2IxKE6@1tUR&k@!9 zD;Ckwb3suz>pSk5>y?GcL*+4P4l5z^YPUzhf!?j`o=%D)`O<%nrw;-${icOCR_1c! zwN(&qfs@Urb1z1*L30_HArf(%$uoShbSBAcF4^ob9CkC349V;3{^S>8d+PVD(~l#4 z>KTm|TP1aYicPu*Q{XJh^}h~(q~rk||BioH@oXEhTRZX$Sn1e23USq;`zG%^bHQ2; zyEhD8|FhWcf7q((cTvq2X{T(7I9)LcMEZ!avDcF-{q+6l-~98~H~Ok>u3x@<@B<<@ zx8+6^wp!W*gl|qh7MRkZE6s9Fyy*=S)yl`N5)nH}=1GMTc3q2tXiuOXy6B&sBx~hy}B~ z*o3wZAQ?59O}=cGo15)cY&`6;&aWwaC*?N(`t&AyLnYHx^dfk=`S#H)G7;ZaJn%Mc zPG76-SlqYPosP(1+jO=)W5`@c^mJ|yE~nsK)zAa~A0Oh=zYig~#eepcZf(aS0djJ_*Wtd~cCkNKk{nSEX4si+6rWHLM+cZVe=8JRK1X8d^p!u8EA zP}2GQ+%t|4uD=ltyv-mte4S=w_}YvMwI>FG)ifxM|dEC4|W=nEq2@V&VeMv{O-EKmDVw?oBbJ5D=Jp%neXupeuaZ+uQ54hZ~}o z)jZNlGr{HOg`PZ_kTuk{kHKN4Ra8`x5)0-K9X1hOBE~I8;;>1-W52E-Ruv0P0(U}skrm}cqaOmaAzx#bnL&B z!FCQD`{3d_rGJ<`d?g_2HN6BuzEgmVCFoR|9JwX(7$(A)yjBwl9LIR+Gz3APun7o8 z7O9<$`**zAxANgdoZ#S$b}Fen_|y86bVAgB8w9JnrR%kRj6xf@2=u`_P9PVZ8LM5h$`e19wxVUP zy?fqlMiyskObB^#-47hxmDnelbth1U+V@|23-1(mv@drbhcj6OZ?}CK3J4xGo=rBu23?(>^Oz=A306k} z2fYre>g@%Go!3=!SgK_Jj}BY8dobr(ZsUUF&7KzZBbdeJxUm@y_@52A=ZRMKSiB&! zMnkD1?;j;YrVScD)NQ9K)tIb!eTT^z^tOK+ z;Fo{r{U!J+ma=3i5w~|L2?_l+JnFZ5yL{;YoH#i-aXwsb^Ez5p#JmAtW6_Pc#t@mf zqK@#(%S*4b9X$jLl)V3M+%1{df@|+}p0BZS0L~Ns1Nbt(AO|9A>yC)_-M#}tmj-G%fm&vcg!zIxHw6CSnC8N8~)dRoEkCthH%XS^p{VIP5?A%OzeHL-p!Vx=Kl7emZ^4&Yx%+vbY zzVpV~gtM`vY3OyeU?cEU11viZMi*(Y$O+A`U%w$h&0tY@F71a^fh%sZ{W9C0kry*e zr*oL6H*GB$6v*u;mAlg!Q!KbK6t7jxTzga&cdy|h+r(*=OBF-Z-9L-3ww0oH@g;XZ@1NKGRM0VS3fLw~DVXj7`=u9MpiEtypWn z&AUTt^q%Yu*=%F*7g-OTB1*P|J)TPDVisk#5scej>)z{>S6#KQ+MjLAo~{sS^R6)O z?x4POl$7$`z(44W?KLVb4{^QR&@Wc=bNZtOn1Hlv$YML({brR%B@`QQRWgXA$7U6{ zQt*Ef6&5kaIM2Q5K??!ibcKKHIp@LeJUey7eDDA3DVDTPM&($kTw z)ATWWXKPUWC5*ZEVh^Dr;boU*A@gWtu~GfUMP)qloey}mNEQrM0&$pC8zoz!>e|At zhGlf|Tm^9m5tpf=ck|R7Ll1x26@nLE9}GEtZ1tUb2vR(#v;$w9I(A``Mr>`iN`__% z9oX8dtJvnoQ!n1~yuH?SnC3uJZyFlT{YC8|q|lxjhYK?d)l;4ygRdM8Om}m4DDm> z%{#!rA_2ob*?`=?+m=EW+^dh`9_}>YHBJX)qzEP@E!=1CA?3Y1wUVfp3ykkbiu>Lk zD?WA{LmQ{p&(2pUn!^)Z& zK;xK0^4;YhlV$p4l^mV>wGq4hx{j}}j%-Vo+HcK!1qRlNbyImuj8}N1i)-p1lO~oP z(y>jP)atwi4QG~L@_Id5OtKUl+lY~6Xc{XcTS#%#8hEx)qZn#WQ#8#n0W$aeM!e~a zz-ipLJJ(2(u*Kd<)!@Qxu?`0b!nEFTjnzqj7o848=Ba#j?)`$hXP>@HUf;*{?MDBr z@EV10{exg+KKGrpsB>LYLcraNVNNqc(q4ZY2}r2wg)N$0Z+~J+9BvoU!2dzL2$1}D ztZD&%NL~U_FOD`&c;{{kwfkS3?Y!o4-2XC^Yte45s8QlTl6LPy&ufW=4n7zh7tg5ivG_`VB2&-{kemK}H=)g)nlPh7=L4s$s`%xegJ&fjF)C_KubMGD1lki&#Z;vtF z1`ZjE>1XE!#$421z^Y;c?med4SF2~A%c+JcomJr#Ka1_&7!=I zYM*2hkVOn(m8HmcLP+QYMcp+Hz#$* zo<)DZ9b5MISunELZLfnEjshLX?8%58)(re0WgLV)xo5lXx&-d<_xFgM2<~Dzj0vj# zfN+`eVsU=;lvCkhKJ;ONyYOa)dx@(+$4cYq8?}}g4SjV|1N&<6deC9r#g`RRe~#$O zOq^?BnycsL*y=?SznsaH?>VIk_Sjd_&nwJK$==vpAB6BWTF4k122agu$i!k`92zZ5 zS^PC*G=*}Pz6CKo+vOPp61`9ata@?Qxdf0d))H(QL&-+tt(m^~IuP3+cPCKxT6cBo za?@+W2NuneYX8x-6j5aV$zZ)KsxR+YrN87*jP3p~F8}Mi)*aqp(?hHlC;nj>c>v#L zSNoz~Ta@Cg-}zY~erO*@%WLr3H&q*bY}q>h&Fum5UvmPfWTG*mjNAA-x>>CCk+iLDIp;~K3)h^L4ouCewcIofQGcOuSLRY zci{QAt>}*!qF^CL&HE1++*OJDJ?80{@jpH}610c`NeK7UQhj(X_cE&RnpM+P7Tx;; z;=hsZdqG$x)@xvJ1*3>_gV4P(V)owNy0c*DGVy%GR57WL_cv>zLJ=@IIL;B|hHbQA zJHQDx=y3x0#Y5qZG^V0D>d6XLZ*PciBh6EnWn>CWS!Iqg4}1iD#WE>BY&z*;(|)0m z0fL*b9j_t4I?zs2Buqintagj0gp>c+ESnR;dk+pX`-?;a5K-@yl ztaYD}t?^cby?KPHxUrj?^JEA@*C&k zj{%e`3orB-QJdS`0by^%0|x?E-|Wj9rlcKwo)XvoT<--geB1@waHVw-S=EQEj&LD= z3o}QHSGkq(R>GIxELi7?C(5YOzW#E^BXpGlYiH`xRd=6b4KIy+H=>2D2D%%Oji`gz zr`mG^M}&C$k%9p>e<$%iHdaO^- zuh~2g8M4$FlENi)wPF)bHnnQ5MA-h5y{i1^5gKm<+VBngSXY$4J9wi;Xe;hO^w{6{ zd4HNke7JPV+G4|H$#4cK^^)cd8?7$DnQE_@l(_7D~viC{Q)9paYd+sGXW85=FKP7Ge z%~9pMCt-PRJiX}!kcNbI^~UBBaJK}TAX(x+o^f_q*W!jl1=vWF;2e9i$QbQ|Jrm+n zHh+Q2zxXfcKKaodoDyVXLS8ImZo zkE>fPu%takkSB3M5C6#{d=&u}<{gy_nA5idb0nLhmPg33g`JCqYwrRHgN_5e3Y)99 z8Y-sqyI^a4oQnGI7v4K)dSA8o)lBY;zPhB}I6qznC77#}eAEt^@rA>CdMzBb5EfN( zz!&!&%{7c!f?HoKb$oVm!w)fY+U6wB2Db=u;X75N-2C7iBIm+GG<+FTCR8DQSi!x@ z8TR7dAUG$IpO)>x*a>9P;`*ATJi~u{O_@`dxsfzhxxWlN9HTKUAIF}+l$uQNNw)dj zJmF(}x zf3Rdzq!dUwc&WvH9&lk{w)e$ZI!U+bb3%qp3ljb^;2`31HgnsHcdQ4vS57UPtgQR_ zeFwRGr4c=~WG?D&e)EAYTh;l^UsspZN=p(OkNQ5FReBd=H~w9$G2H`1*#5g^y~7TU zI?uSj_8YMMI2Qw-Mr8N1DKg`FzyF7@uZ(H}e*aZK>F$QnEg&M@Ej2)Dh_uw`E|JmQ z4I?!5Q>&%Niy@4n^L*g4O$@AFB_U1Q>f>dRc!U)w}U0Dw}K zf&8o^p~FhdvbRh&*p=K_!)e)U&l1KYtwuN2N6xq^I5$62;nLJPt~)E`*mJ1 z9J9=BrN@%VcEFbVhs!P>#d%U~Neh%Sx!h}avX9te$mGo^=?A-sg65fO!(60d@ZHHk zT-!V`c)7HsQ@~(0WFjW8+M2|j>-uy=K*A<(9kxp)Y#VpJKPOS9GdN;}3+$k*)w#QbzXQKpGXEe9E1tQan} zHBQ}%xaBu+VzQ}qglcskV+P{Od z!wCHD*8-W|_b*E9nYpGu=d3vXPHY5>El#8MgAgp zNqy&lXt=@Hl{>S5I&;BzXb{1sChlPAE5MsJYgwj};u3XqyNl;lOvIw((?enNABJuU zq(jm01N{{akmIefY2qwsxS#B0Hti@joMWpyg`&g{!w3#yEy9|Zb;*ASXuXV96h{+l zbgX~z(UrH-jWvZVjHBBR^Y-L>aFq7NTrn%R*|1k==zE+wD4&-(IoE*&aayXG$wZk1P6S zD%SdT?Lye9E*k1joP;SADsyjW)92@QI0HW3x7k>cZ_+_=+0*6fO-B^#Vfj0+MTE#G`y%Xk9oQHO$(>cixWBiY1+Au!j#$Hq)bTJPJcu!@eLA|Q{wOf71Ma~d@ z>7pSo6q7OVC;OYM)#1uSS(q5Cr||!LtNt&8P4jQCcefm`Ry%&_h8ueJbnWOtQJVB6hwVC zZ6w(^k8zi1C6s?Q={zjzeo%Mou5(`^p_GO7l31-!0d2Hz#w>A-Gmm`$8!C6-nCE6X zoSM#QOu;Vz&8QeG;dST7W0ra)P~CWr%k8{b!kZfy(sB;@oVxBPU3VsZg_Y_0H{irI z-s=F9`898-S)CAd~`PtD8&Sqxz)iU*8kM4z> zC1bjilUP}dc#`A6GWHBKe?(G-9fCW5i_dx-SJ}QF^?V!k2;^bauCQm zha^!y-v$Vl5-dK){4Hz zE)}nUW`3TP%*|6AQFvMjUu|hfC-72IlPz^Xj+&ce@_<{@u@yVerZ<{XS&_r(+Yf39jm}yc6B==yu#@$HDUCAJ68su*~ zkFDDCjB^}^neX`iZC&%HZ&ElB0o`pMJjijr;sxMP)|fHHl;nY+Rg>FaT`msmy!qJ- zI(L5JjVJ~A7Ib)1r0j0;lVuWh-1plq%$qhH(!K{!rt?Q2KKbH_)*i~v)>{$6uirFz z)Jzdvq+R$3YzunJXDs=s)SZV#Iba2=G3oI`BC99DUDJEUh{5lD;uuT@d-# zOc69B>rDm~6QySd1#ys1gqH z*^OjJIUN7hQ!Lz^?{`ZL5Q8t50|cy6&_o|fcH1UHe7KL1aE}&;8E#ABer(Y`uT+g8 z{qWqIzmVMH<0)+!FG%h**nRG;YDk``DU!=J`Hdm>~v12JDj^&aT zw_W-!faR+U3K`SXvmJvtH;vau1Sb_4&3c4r)$SlJcBXIfuMY-}CVq)`nuR1Rw-q=x zo0;D_uq?J<)i~hIxdiFhy0jbak#XqhK-Da!=&c?jJuF}CPYLa)H{uoly-#u`d|&ad zwY8@tM&dc#8_jDrHE|o!q`fWSm1OymFCr}^wxxr!mEfSJ#btc3S%q=LqcviQ4Cx6e-VQ6po@F4ywyS{d{&I0c>hP*ESjSkK^Lu#~p0(#0OK=I48m&BYxo~$)q z8h9$=oKkrUiVFzYRHVw zFxrq+Ybh?eN2O}7#2YhPo5}pidY&wH&wxcsW;fJP5C4EPRQ(miOrpzOGa(}Y6)`t}3U!Aa=#ek=2Xg&ADUwF#H!Vvz?28fr|5x(8b^V$S zH0q5{NtqGn3c1Y=bX*Zzf?Jx66l4!9?_s!I zOJaPvh@3VX4!Mm7mb2@FU`MMmCj?~0W|OsOHb~vC%ktT02VJ098#1e#iw(b73LzgJ zhtaL~=L@-lJ2`GISzQa6vx!zJ2(qWD1YDou>E>bAu`tVZof&n;%RZ~xEW{4!?8#{5 zHJwO6*!562twT(C>s&YT-8vw-%g@~RQQ;tyK6V36_>kOn&f$)|`-8`Z12*FCdm44` zL`ip?32slBdTj|+=e3e-otbXW@pkhKKB}KyaEZMlYi#|>w3NJ7&9u7-1WtGfzFxld zty>yiPH(Jb3DC37jXLzL(#S&@m^D{z5fR)Qw<$<#h8#eTq(> z4k0bpd@``RN~LdQ*Hxduf$7RkhQ|VLia-v{=@JbeV;%bc6$tyU#{~^0TBxfd-Tt5S z#GXf>d4wc91+|IF=}|4h)Re;5c`THMfH}`D9Ij86twThW=6^<3U;F-96+f}nKk|?N zq`UrI7;l-*U-(JIRPYx$YEPH94tR%|@}6n$VGqI(MDB6=LRgZ0;`pFSQI_o(6SlPC z*4eW8a7{Mow}oo~R<|qd(QQW26BEd((^LdE0*~wTz~|I$*YG!)h>(NE(kn)JE%2Wh z*P<&t6pDWT`6qQvsWcFetZj&hZYR=+v$5%} zbZI4iE_OX~$PYR!DB()?0AtXWU`u(yIFyo!(_7X*3J-K*4Ci}<^&Dj-TM2h%9><*s zHS#<+ex>4(CKq#CUJ6)22r(EusvRtW)H&Q?;N(3b(Q%QlD9Ld_gEyGGN-PgzCmp36Z=d%L zK1IHnT}?TCHMo2%yIjySwx)6*1qZZ$bkyu?4!C*%si5&}XJO^pI1W3{46B>Aed$py zcD*@C#Ldr@<$~6!tg-iKs@=YY>%k5b^4WXrm?66%kB#$| z2i?DXKBwd*9j5vB$h&Q4j8}iXH0NseRBawQh+P|P7T~8bEed`Ui7tkp+(cXYo1e_T zjp;?{aL)CUUMlM(5Ho+MPS7Qxq>E&Tc2|;1F>{>hhS0YtH~ST*2)vq_7j4gaPSv6W z`y|Q4IxrNa0 zT_Duq8<3LOM6?3iF63|km&7=+IM9vq$auxBt9DJwV6^From`zxThmU&rv=%I`e6N^ z6e;{+;d6@)+O;O{ERIjmKjI#A9Y4HWDJyb!oB!#h=CzbC#A$LxbjTki31brQFg33}~>u}B-G7fbvldgj8ee0nYwGG_c>7Z9JPpa7T zoK+jygaEWYK{=6lM*}ZlJX#1zWX+h;v$MfC$pADGjz}S3NNqTgaX76KHoGGCV67F$ zy0dfd9|33z!nFvRf7Z0Z^_9|`lSzxQgu;swb(;QAT4RsO;7hVy;dnPS5e)o&WdFUu z_4Zza?IQP)vn3i9lq*fzga65%2=#_;y!!yorjO_qBOn$e*qYK;<*9ALg2sPqqc}HZ zGvb!QvOn2`o;eeq47{uhho$GqR3jM_4KD8sDX@xi!rYh zC(Zo>HO36%1IU5ndk4~h>bZw%^8R& z!WOV|so5rVh0Wv2)96bo52uM7&V<~R0r9y^q_iqhCtK(b>Dt9It>Ep%WBI(eyecxR z(GCxz{{={rfr(<4B&xQQNN$8F^+?v>Jn^w9`#gfoAOs+~(oJ891tm?g^+9G>$@nsS z!dcf^ZGHIu6${P4L21h?l+Gy}MB}bda&I?qeq5VMlG24TIL>`rWUVbQtVSS8G_nt4O|MsJjOcja+F zKFu@<7SkO`J0hS{%x`XN;iz-0Hg-~1u5n0IVC;tEkfKx{@@$r2{5i@Ev(ULpP$Tn6^<`&Ein*w zxxMfvO%c$NMuzwez$h7PBQ1BOAD;I*2zxmnd2)+!M5_J%Amq7Eih!iy?=&l*2;cYo zO*(xM0HVY-;K3M6luQ+{k;Xq+hg5%9gR)(;nP<#nSY|Gry%?_QUU)_I#qL#A>)Z;T z7Z`{9n?qDT?hw-{>BH=>ZrD(G(X0_`W)x;Jkpy+>#o$h1uMaSfM|DpG(Z{zR`gaLrrXG!7|4Xz6$I%Z%wF5!I%$@cqMF8UJrtx0C77k_5a)v zCW0yWzD*+|)y?8%?p&!^$^7E3-gJCd^8E&H%=2vLAsb;Q@F0NI%#{Q=;35X| z7_V%N`cRj&fjAznB9SWa@*Sm;s)DDb5eK!tLs!Dvd%A|_V2JD4M8WciIzZ8?4?r8dyPk8Y3*94KkXU`7(FN}8NU_wfQ6+4<&ebJoD$4-?c zJ@|CX=;dbzTRXv^_ft_43mHCTgJOj*8R>7pdO!)F@8iu}mp6`RA3y4x_$+lkt@q0B zbWCgTEBEt1Cq!gGyXdIsS#4+2!VXMZL!BRi!J?xJZAi>}E)8V&?QL$tdkItLw#&T= zcdM};qWUcDNapYhxvT?)pfkMQS=3n^x8&@_d2U78aJfK~eRvwo`Rn5BZgG#IF+Vi48@o20;l1dzJ6T3c_y}|ZP zpgW7>rI;IX`^#THG{B#b&wuTI1abYPn^MjNiEx&#GjbrS4i9D07e>0cn{|74{ju1l zh-PaE`Ke0%^$`HtmHyHtxilj?tvPYSd>Pp-4)PQ%QQ`zZ^s%!2-$Bu6;^%%mAnAO@ zra7j)lFe_6FI8AI<(*4lJ^(!>fAV~$iu6#@PMC(O06D6}t`&Wipmy66a@pYy z$C8^4PmSU&6Sc!ZZQt#Zb$|%^O)5g{%h&vOt+HtL_>yZanC>PM8~RZ>c8mgF>na)p zfg!*u`Us=NNHjOXi7eH$Z5Xc<2E5~``~|=PSV>T>^SG;W8r^MxdKh(4n(|Ey80lM* z>b%yXqvw#BE?6|iZP>E}7wypyFtdO8Ky=dj?Kyz)Ua~$SdiVpciEw(V<4AKZb74l* zdtT@`XQ%BKrh`b@9?-1p=Bflti2SKPX4NQh@2^yO&HH?!wFtDMMA)`df2-ulk3iai zKr*{G`zoU2OBUFU^*bsPX;@>x0_{pbGsP_v^6gI-3CnsZc_*k_O^`NHYtQVj5Crjd zR{mHbdBnYg%b;}f*`HLP7q;8l^7om_^F2cf z8s8hxCw|7RzD{QHc0GPJ zX7-gI=VcyT*Or}2(Lx5teuoC}1IKLn_dbScuBKn#hu#u6VWAS9->sGTI?WZklCG5< zgNVT)4`+p%f}1GOQbaD<_6>J^Jm_2P+wXKU<9i_Hi{`Nxs5vG6qeZ4mhUVBsuBSl~Ol49E6N~kidmd~&ya6#4=bxO67ElJ2=E}i{h1Hb7)OAv8>}Z>BErrz10dwg< z=?{U+MAd?2Wxs=#&L9XHvJZ}ep5+QTAMOk|e>+r7x>jcGs)|yl1O#FC9D0!10<)u? zQmguTey2casjxZvhWsC!gvyy{u=3j%85-7>D4uTCJ4iq+#LV&DouN;Y`fJj zO4gX5p~c!QJoeXxf~8FK-?XB4_aQC6-Fq~x^AC z(qlQv2!z#5+2pks&US{7VOZ?mJuSbb-{ns8P8^txOZ zwuWEbScEU{KZ$@2+^YBeR~Eo$ zR=Pyoz6|V7I{ZSVt_V%1T8{$PA8ghsuEm8k;>L-Vnpt$R)Le!i(RGiO1i2`i?V9<@ zR?z>t6&RTUxw+UgLCL66dU8{diT;Wq<5$>o@lb`7D?6|N1R;(E-Ga&80n}k~abhIT zobHg2x%8A_*3Yg1@~rV`cCl{VSI_HY&f$z!hghMUa~YIO(U-DmYF^$GqVBaabt1WI zIl3oTC1~kAhl!l8aNU@%sRZZy@RXak_fdC?iMiumqb|f76hp5mW(>5~+jsAq z!|e{^Gnq$ywQQy1HPlevRhXPZK#%6WbdH@qzj}U8e1wvbcn^L!V!w;48Kkclys;$#_ZS8M6|-a4p05SQ zT}CWB%gCN^eHpEvNT21&sn7cKZtd-;@FebB>BJgvk{JBAw3I^Hgz(--w&7}d-On_d zq%q}w2ETa?S}@ea)f<*pJwAJ2#YUV#Q*4RGExqyV;=7L$Y>AcwS;BO_JL$Vo2T-y- zM3I!iXSpk{GgF>DgE4Kj9q*PGwjjW5wFzG|omEj|KH&&YoA$AJn*oY&*y-KhJup)r zj&A~~kLQSt+$SmVv)Q5af5Csg2r70Jqpz`V%H#Zp(f|3E2n%95W!gkb(;$jg0 zPz`4n4JxJjVPlaVYOu#forGyYcf4Z>=-VK)5A5u$)6?t8*lqOU%k&XfOrz_jf6tCa zQdv=@`#H*WACs7QE_5uqa)!6a|Bti3Ld|21$G4!M3?8}4l8g7qC6CXG$%-^xi-rdx z@b&#+=2Eo)ygSqp(tLdT=dvV4x2A&6T@&+!wKS5<#lbXk@M&LkDk@J#`2nh%(h+TG zji;V}k`%3FJRo&m-6H+zjUOW=n2qY|$cO!hw$p>BSWJCC7oRi3A+Mcd1*c_h^8*7; zFvH)IBs%Ya)ZMi)A_BobZMzBb!y}Nck;7PGoQ-Zf7(st^Lx}k4T&-;lAKx&eoj3NaS%Exie=5QdVH&qJd^5o{p zis?p`nf($$D$-dy>scK!)uXY*J(@UB4Su$f!~8_`L#CL9SUZP9S|ayE>6{&`nosR8 z9IY5eu2r%#4IEYretFqjT^w1Nu4>J(2@RW<0+j)?@^xS)X(T)Hk5@z-)iYw&$zw?? za^2#Nh5SHb@@0{VH09&92h@U3QUhiMUq%P+5EwX*G#4V(_$>w6t{lw5h@|ZJG1=q7bCcALg1W zqyZKQfoKeM6V1*k!332pib@WdfaYL*Ln7#r*+qw?nD>5;T3oxQ=2F-V88J`K;Z{hdh8h`I2KAMhmAGp3T1~@BC(Y1LxX;tK&tBZh*C~{~ z11aD^WmeUNagmJo{1945{)?6ICiWg@TpZEbOcXD`#U)aaGRT&!JM#Vdhs*%d|Nf8N zvsqj;U{!}f66(c0w1O5&N~2YTsu|GL zfj+dmbBWRV#i(e$6oq6I~)@$j3j**!bERCT0*Wt9U z%`hf*bg-7AHPrgr^xNg)6;Mr0zH>=8%PFn5>4xvz_}R|$XR8eb0xlNp{?!-7@o0@k zbLz#+ruI0abPzCQGY7`tt0E8bYk|j80m-vO@kYK>Ok{|P^1d0&wfmplYfp{3{CLIu zZk>`J^-sR)OFlcF-bwE?^%5xgPb!S3q5ZKaWR&K3F_4YrtY0zNA!e$~{6^!DT{M-v z@l7h~)L45VBg5FNDzm@P#G&klls?@L3Ry7&Z6xAV2`y(@AnUZKtn$$lteN%fHdV1+%{U+*5S8#RBhm>7g32)TB@C)w>>f)dJe)m@1+7m@_ssCGnbc= zUjI1_$(RqhS7|t>PqzlVd6U-XPBEpo4ncKASW#JLjYiRkDYCUB}~ z*%fB4DHwq=P|cIuse{>?-;OZzcYZfR>O^HkSJ)^@s>;2z^B$EBl(5y7&UcY{G#>*U zh!fKU(~Cqz91tcJu3)NVri4T-95X|dwZ~f`3x>QPrYn#hP3JCb_}}Wx7&pEwTlB0c zXCjWNC~bC;Imf19y2@g&PEu%mOg0BVS50tvn`FRQ%B{VHzxo@tUXwF>OkOAv4*)DQ z|9eqppE;L8E@g0OiAa@viByh9=`-TVH?QXFYd^Twb(6EGgW!$yQ$H@DHjPNUa(PEJ z-bN%yRcqFOn~&I8oCFz4dyJ|E+X4dBI}!7YfYDZ25_`HGohB}=dCB$34mGC+EzXoO znWqgn7Sp-==djekvwI=RkeWQH@_ROby+rS1ee^Mh5{M$en+CFV1~WRqU#PWH-W-ha zatlWYVVT3d@30|9Ovr{9&dIAg6LWIRMfHpglZN*3Re3Mcm@cIQo({v!mIfBzsCj)} zfwj-Y)R9qw&GkMhgY(Z|LibBuYe15o!%9X4m>kIKebMsv3HP%@@=)RJwuU zO`K6F(cYVekp%Py;gs*b+E10SjZ|i&aEvQ{=c?O90g)+?TqNIrZzEzsdYU`m2J36j z4O8kuofd>rd4{jgc1+dOqVK01s6uWur1@bK;VvE0@n%sU^KQ*|d(t7jP(t9DR0f z9$>Hb?N^}di=ApgiUa25K|xoIL5eh&9+SqdjszTTA>|vSajS)+d`&8hgHRWlydAO+ zLcOt0y}}h8e@85DXC6%t^p)=;Z5KK9U5inYnpt{-7C9UeeQ^Y{w5`r1ahhhnc$sN{ zzEBLKsGTnSa9-c0&F%h@O!Dxn1qozX7Av(u&pj&qc>|_j z3t`xG%aTD04W%E?j=?-%FInzK?s5s7AN*7csMtmyE5SZO;Mvnb=6cc~g`(?Nigqt-AgFyW?g9S>|1rs zU-zrqn8}~#-D4OzY?7k(6TZ!b&0Fvp2_!3-2rCNn&w@;ml@140ZJuoEFI<;@nB;T3 zU9yhqE04;Zi)|PV`t7Gm3>UvUxTmTk(5MkPTz+^`N2QCwO@M4MOTVBtqE|ko4*N}u z`Bd1$wm|O_KjiCDOX)DdVV$kE^@1qlXmdeL*I-3h&6!CQl5c;WHcsV?gpq>f@dVp0BD%RZt;;ZWB$o{El!k~2d^ELODp=`%JbK;gY#$a0|aDY zC%pCvUaF7Pt5kjZb2O=GSvMAtf*E?E)}}|2rkTH9r>aQuO&?={`Vvxjc}bW5?BBe@ z6z+IGjV*4&W)|Om9(&JO4nb1_Efr=zgsi^IU!A&mixpL+iW~nHG7cR@)U7k$0ZViu z+=V}H)T`=oCURLI^7R*e1NlR-P-%Dk!tH#vw(2YNv28TrOZ(k+D*P3LA@q=$xwOD} zhzduiHF`u{qgn%HoZ3ZOhel(hR;zH0lIa~DwHh8~vBTR00p337v&XY37%D$cP{C%uWlw%Jr9eguYxEMz*3^G3V=vb+( zBK^<2tn7w7%6$>1e+Q3q0L^-bU-*4|IdaKuMEEIIu^ z*n=ac+|MG9*7%ZTBYl~DCQ1tV3+D()&UF}mrt6e$Bi_f4zcrDZmBTMpELP`qsm!>r z<>Cz-i%#9K{kM^9J++^$a?WY;Em)%WIYgslqDy^EZw0S3@> zhyW)!nE98PazuS^BC=BY>*qw+uu_4bg{G#K?<~H?suMhrgFpQ{kz&hZw5MT$=-ux< z#3kRNC>yf!kv5W<&MzE zCnvgC+&6u$Q#4-6p`1XKVK~%vqm_buvEhpY&wF#cO+SaRz-C{S>lQmBO!tvKAQ%NCUR9bee6$kBLw1|c* zAp3$&5Fl|m=-R-X*(j~lTUwo>o^(0RGzep;*eXUw-5xY0 zeZ3T$8*yIz=C+avthv5~4@%sB-OBe_ZV>3aLd)znI8x!&mOO!IW(%T zuhIPnUY3dJXVyRL#L6VT-k#U3hr8}c6b*J)&C{RMZVhhi(cCWBvy!CVeHF>~5oOxB zamZv|H4TrFBI{f8{)9?tByqm%7t%PW+K-F)i?!{~(j)w~ z%?jQ+`u;m`#~DtbGLzbEK43?LRXEPoG=&)jk63HNjmZHk2Kf`P4kC@?4WQ1+)h4(} z07o>Le~DSJ)ob{hSJIOaCUmXOuf<}~*a!g?U-eo&Ou5#JeU9JkYp*yo1?;>*d<{RP z+9gzAFPJxoN8_7{kb6$hVJxSL_tt#v zhA=ERkr-xX74347aTep^XIWSd(n49Tm#V})yPByAoMx+o#M3idbmKO}(QwOCl#TLN0qGh*ey54hlK%)q&ib~kZdXf$t62y8Qo8|~ za9G#B#k$E=VrGa%LM#}-oSE@y4jZe*p~Vgtt2n`IlAbw`<}$N14!Z7C>-D^g874=E z$HgI8GO8i1!tl|5)?{O9Z8lSlTY27{OsATuclM*qPa_VjWl2j6aS~eIv<1B8)b-YL zJA7rlG4K7hFe(QLrVMYOOqgAM7_X1-6n zOg(0u=oEo6X&wOj^FV>nP3})o(k)Gsl}swyV>bDXbVc3-lXtktJn+T?7f8Emyw+!X z{sopbJcukA7lYr-!<=pS8pmIIr1=ng_Cv0IIpCaWbz~x2Z&DMEdCqV@7*uKbVsB}0 zMce%uSKpR6)>q+n1;qy`Y`5iDKj-z1X5!M-=g7JOOabBssT;f3Y(>l1O6>@()&7^M zK{ZzaQ##n*;c1x?!+_})^T8QbqvrP^-2F?N(PChFr9TXcA~!=dS$a&{E#cE;qq4)l zJ7()Mr&Ea#c)*G9kS-j``=5&3P*j%iqGNr>cGSy0i?)pf2@rDfZLNu)C(?MU6#Oq9 z;9uP;$~3x;1nzBb4y8K({r&4-Zi!0k5P8#MvijW;2*0Rn<>61H%zO+vPhRde%0L45 zSBSuij^jzN&rkRf`Tx0U!v9@0)9y=m-NQ^33jI?bTCMZ$+?8di2NYU^G3cCbel&it zQ6xCn)JaJ;JtDX*+We#Ty84P33k~jti2P*BxS5#t+BtfH4H^XeaR3OBcz*PX$O2f= zw*e|$AdW+zNMQpKJI!7*56kT4-YfyO=$n*ZCiVjA6kNar;6s+BMA7^G$WYUUp~Y{* zU(blpVn$jbh4y%q1JbGb$@Mgpefx0s(TthBGM~2-_E>Xl4ZrwnC=c2y$|H4~hT3J| zL+FxY+tX@y`+xnE;g6uj`R23C+hnM-(;WGm`c){E`6>YF0j9$%<-yQkB=Z8iFp+1k zSQb?M_+#{pXw%|Z-BH?NjI1R?PAoyCUCF9sqWI3s`-5NNat!a@OOvm1=8@Oe@|?ZB z-u`Bm6xk*(XepZX5{DQcH!~e!N>pga{K6d@H)H&D+#!>^mf{CvY<^j0T2tYHx{8w< zS(0sJ z7oUUF>juVb#cpf)ZpHIEU#irairnwk%v%P1U-e$U5{b-8S#KiCtI|D>o*s*MKP7#L z%f)M$5OZ(zOpJZfTjq!eDo;=t=``BUS9+lxG8|p86nUerm^JvC>sy2za%ic@a%pI$ zR8_25A|Yrl)PvlLID3u&Z!_l`>Fp1=_7=57bgL`R#E#&eVTM{qmaQ`<3uU%{gw?1G zCRmERtv=Wj~~{KO|3@UtV=iNC1B4gy6@Q$Nmx*G)dzw zas%4)TNn>c-v5caTIv?3B)6DtXU4j0aoQ{mh%)`8E8+UncY&dugK2#;BIT9Ro4o~> zgp@yV1#-yXqctgS8~`vzyd$&Z-Bf_$Z5dg3rK)wUoS^Sk(H`9^Ea+#V?*L=+5kB$p zD^uVE_7yYkTf8Zmhm*wd+D%lur(2j98k#5FrZ1zRvXOdGnQu8@ti<(Snp8f@ojo&m zX&XzyH~)B8V_5$#p(d4QyS-iVGM`&Ct*XsxfZ(4F>rE9bTiCAE$Ze_3$LAb8>vNcC ze5oV)pRC!ktJ_Q{RaI`7RjP9z?d{7@&fT&=`r+7ZbLTIBAZz3_9M8&!tN%W=H%uJ= zX)3$Ki|2Z5**6NU(imEw9VEt+pCu=ucYnDuug$TlOHjyh8YRUQnrD=F8@{iCg&#Ku zNLjS}DIKwPgB9^N5bt+5Re3Y32`xN;w}1X_m>AD>2Cs2sPlKP9&k@$7{#b3Xk zqgA1AhAomtGZ`c!qlR_(i#sl4#UK$_6vV-NRI#ek$^PU?EOyZ@UeD#__h{mO9LIYT z2?D4T|6aFmvetVSjj22u!_Y_NEZF2Vt7v6E~l6Bn0 zILULO7)9KO0m=>T!75MGecO4dJ_ZPd_Ydu^q#@yIn=ct#1_n%%fSw2_Y;0M7um6Ls*lBh!L zo{{b)nz%~tm2j(aBm??tiUtkfpt0%TPajFP7ydH0k7PjyDf^-%eu&4;6YsJB6a{}O zCaF?av;xl4GBwOddMgZqH_WP2Q>n$QF{HRpYBS z%a8lcc-_9c&$khv6aPf3HOMk1H(_pg(Q%QKmIQWa|P0{bD@ZzIRUE*oEa6m?wW# z@B0R2ls6CH&J6l|(qh@Zrw6GU)J=v0hW_SNbS@GT9EoB0tnu)gG{?wj0HzdQ@MIJW ziXC3_{`nmr^A)q?bJP*1;O|2E_{9(?SMYA0m`C1Z?I&y~cxkcoB;F;9>_f}|dudMR zBWlRJ53*PmV`5{w%X+r<3l>xMZTiI1HPpC}e~qu%*x2r_-Y5Qxbc3f#dThtBAj`D! z9m2!I>uS(BPs)o{ntzF}iT}qb`+V|m%KDoq;=!!ToC`^zLq;_U%9_-20GQ z+*JLj=4OZx729bBdI2ym zihB`ZUn*i+sr~-;9c-oiO2z{Rp8He?!b^lBa}!-xP0nWT9I>p~oi>N{Y|_d%b2`)! zLqeeG)32%)X@qjWc;)_ZX_u5^C73?9T{C$(K#U9KJVHed6IZg^Y~Iw8YnA#^@~e_h zfZ2}jOBN+m+Tq+%agv$pV6Ccp$9rZ|zZ0)`QvMSWjZKbWhmO>(3ig!2`mUT`_tU$r z_vLSzC)RG>uJN#%o1c5Ca&dZPrP&}{12``WrZ)HVhS|Ty@DP$_OG0?n*)tM6 z6Nbl;UAGn9J?mAM_(VJNvNqMx9DTjewEnf<0QX0OKv(G_)+u*%AAZToT=WGt!wMhm zp$6e3%lb||2PK&8bODWWV2es;_72wYvtC}L_5qwGN*1a;O^~X-ll>De&HK_W6IOY_ zuc7Z{)p?-&LnwvMy{HVdE?!XIHH7qKU=$hGfS+*R)ST;+Nc$_H>}JgYjGT$1zQOJ9 z`HnGHv~u`U7-4Vc#8nwGdsW2M$utkTObUXClia5k&_A8ExQS`8@c@y>zkDT5v=qT= zLI3jTE`Gi;{q}))-@E^){Y;Bi!n=m$XURLg6o3|MuD%sw&W zD`J0-o$=4KFK zuro_keA*5dG*u5Q{Y!^ei>)LdQ`@LJR;-J!oUsKlGkgU~c3z%8z&poAlg(z(ODC;d zO*7M~RYJc@b(DnDkX>&Wfa!@HT0xFXB@~t#91{Qg_I8wz@xt=VA`J&NE*|cOfRV z5@51hIT-%;XG?htf&uj5h%2sz3rf4 zpLR-T?(=+#xTcy><`*gwlpqN-pp$)3bOiae;Qho4(at7_2)pglgJJN^L70(>>;~(5 zKa<(4;qVU(CNAtOJ&L_4+kn%#wN$1(NZ2XzMi{dB@VO3^NXR}@ZXcPx`0Mosq=tv2 zD;w56sfNQ-IZ~44lz0kM9~a@_$#-|A5yT|Ntrc+?2Q7;(8q%ar8>;BW9|D#$Q6G8# z;}>1Gzb#I+jh8aqO+iFS_7zp%=D0;DFPabm(I>59*o23sWhHR^7^DqDOil*+#WiPB z#D>;sXxmv{^@QOvr`z6tBkZleqW;45@1eV;Q(BM^q*Fn^jO2wOLkf4k~@o*Fx%$h#mPHYvpuIv|oJ&$}(Z^Q%#S z!P}mLZ_~)OyDEuWvqYo1L`18?G4EgyF&)3e4F8y#?Q|t7$+22*J5Fi18s8#+etU5` zmc2{gI5te3)fKnCjJj~7c5AucAPG9?uFu&%w(i?H7q+c*Q7wJ-9%H*Gv%cN=LHkej z%!R1+;lrFJd1N)EQlx;*MoHUJl6~;=|nLw1{e{?%-?{i!Ygx_Cj$`(bf?SOrX&kca_zm0&Ie%EEc!S zzCD)J&rrA>E9kzM`>jm$joXR?q>>9`9$a+Mk8 zjY#{h)=c=h@`Sm(eo{2Hb;)uKipzu}EGW^`Bo3FFkp*ijjC{!-%Cbjo5@p^Th@lB1 z-%VSks5U*dl4u!TD@5gsA#Gyalp`ea> z?JdImb4H>Xs6_RbgDway&@2@PB(wg-y5=GqXqiLsv^wae*o}XhJOialBjxSer771`^X-ioogI}DB zGe?due$r|xt^hUX7Ps_dA;|_JCB>0}S{c>$`3@UlLKSM;Kso8bib-I!Pz7*=apaIf zQE3YQl+ULj+m6rZIYAJawu<#brca(dt8|PT6~lBJ!(Nl0Qme;bTrvC4O^v}3EFm-k zI3HZmx68tSGm&gXk7g99v}wnQD1c#HA2Z7Ceh)YbQLHpxie=`?>wWK$Ww-F0>=<@ftZ z%=p!2d;nc&B$MMJT5dTd3|__}Da%IYx0kun$kp{!V`BTH?!&Zi*{#8rq<~?C?)d2Z zCBa+_ELo8BZKgb4AcwDGa9Mftswmf4WXwtZ6jv zIYV+UvTi@6pXU2NXw9vx1$tFSaljx^jaj{WjtnD0?`ATOqOkp`&nlH<0?yhl7H`)U zzmfm6>eyp_QXHF|)U+$wg#y2d;rVGu@2!2)NZ$n$z4jZ&@Ll#misFXvT0Q-lRy-El z&e0i04h0i3k2Tre9_Q*fXGPla6ZIM*R=Am#Ju9ji2N=*!`6X7!B$&Ix>1t@~8Yr&2 zj1=z1heb*;^-0;RPKuTXHLtzXzmsF1#z3uj7~scjiv>u?j4Q19th&RaTZ_t?6nO{r1*g?DLv>Gn9&~ z_STfDw+_xc%Q*8otiO~(e~pTh?gT3px$hRobH!CC?5Q~zmwXE4&}^U4q%s?mPG-8b z#>|(JoZTN!_C(D;mh%<<1>U)Un`Fb7w2 zm-GL%hhaX~^tNKTavo{v2?^@{Xs`?3R)5y@z@k{yz`rhQ5F-$eSV~}>RJS6@|^*$f2!F@Rs#|FJjQ>{t2);zA2g> z+$OwJG+i}~RwRcz6~ZI?32L;EE;8&;v8$`s+jg24tS`nWlvdvU-S~-=F*OOz*C#3c z=2CsWbiQ206R+Tuo}K`)d4CE%F^|pdC}i|YfW72j$OHT43H@1 zuVl8FuWfa+{2t6o`lS780xW7}4L6xKuC7e!NIi^;i2@NeyGOA7mNAf6T=mnUNIJ)~ zn7W7TnO%iyIT}Kh2LlzwnxMKkO!XyIMo9=_#lTnUh%`Q~fyel3&T93ZEDqaWD-k4< z5Tr*?hQ+O$8V?ji45<#(BAba~T60%o=u;EpD-=n_Ti#D{>dWyD_&<0bPY!Qk7u!{J zYp>Dpz=F9FV;dY}!D&=?!{^wy7;t4Cnd_C~p*q>o*<;vT^J^|;1=nWwO(GrcM-|0t zI5W>{b?@Y&*b1Wb^Z5()#)01-adPsZJz&G*lVj@_IYB1&9n#3I7vSX0?Gt#efqJoV zq!m~_Wo4Uj)WXHxj9a2p<-AkTVRZK@^TA+NM1VcI@9T zrcU0@3{|5*SZ5e@v`4ffyI z|M(1oc7#%7xGk99N29YBKcO2e=+I3}2@^c281}||7_IeX*r~2at@RbEWUeTjoj+>* z9CH(Ad=0y!>cM>AgMhHc1wQeVxy!}1hnw69PAV8o0Hh4O+v|N8rK7>`Ph!S$U~A(oCooPu zb}Gqx_BY&n0^We8Ut~;x}vdUO%qZ>Vo|7CYO z_wZkDrMN-Mv=HvQR7?B`J=|i7TDL>zto8OLAm7%6yotD0z?1ihv@r53Qj|g|NDB# z`2Jnb>M?;7G39w99bx7O+XlVp}vK;jpPJOR~}K-5K4*7}fbw zm7KDmCo3C6*x<#*J${8TQ&me7L$}7oz0JY`cMtiQ=gfjq^3|Dz9gg+RNXqheqdnCi zGanyYD^n^u}w4$ok)p9s^XKuL-5EAYnOx|Gj18+b}?vzTNesI8@?_vK3{ z%+f;F+q?B20+^D_4Qn$~#CHJ(;U5%g>Q5dX9cEQ2wQjjw)W9@<$+(0wky=#C0tH(G6mX{`Dhbmr!wdrK zkfn?1*a=2$mPTLC68q3{m2xT?5yE>8RFY3oD*T3$K0R|+3GsVaXKaa9RL+dAK!742 zvS@iEt0_fv&GehJSHAzy^gR3MOQlv41~qImU#co_tJKCBfS*oCPTtQ>&JdvDYh`>I z-0^N$0L!VA=5R$!(=nBwP9@la2Hia1Xv@t8F^^XmW_`RSYhycdp1%EFA$xGakcRml zF=lt^*pWAD=;584cdENCvb9!t&J=9_RTbZ1Xe?sfL>4MdSZIulUWSRRZQL~+R_(F{np=Jc;yiO?DgWb< z5w9jLxogrd;`gSHm%e{Uzuw`-4ek z?n%er=;j0}&$e^zzdQ#O6>H4ABIjDS7TIXJQ(#-76g4`PS&2DjwHJXFTiD%=mD-aF zGLMFtFmp)e<=112X{b*>Ji74goqxPf8A-3eeLW;s#?E!p;Wl2?b@wvMJFHfbpJe*5Aj=%$t*Z6 zA3w0#$UYyP1c9LqUFTxbledi}&@vAVrSM<1dg`62PrlrGcXFrmiRle+)ig zWVC-Gp~mw_;Kgg5U6oo3^?$9=4E*?~5SY~SW21}2cbK+w+PnnUSUBg+j^f5&s)S?y z6twg=uyhL4v3&Ni5z2wb1MPzr2w^?ZgwkJIio&L(n>gx8ZYNiq>mY_nH9o4u;(FAf=OnH1hw$%j zPO&khNe%^J76M){-(*$++IIoohJl7Tb5jC5Hb!z}R$Pa_>#M(sc+4r`H_I$rLx=ZU zoGl1qpj?8`V#AHc6YeK*{FpiTZa8N>jO8X<+0d$c`7ymu3hUQ0*)f(ivYoROUwh8E zn-_JD1>LS3crPmJ_*V(c1l=oFv_AhlTQam6RG!yyLj~f+lQ6_jy6yipF!tKW!r02R z$a>4OE2Sl?ez9}mcDH1mDc5#>hW6u|R(*C&#t^VNX>c~jUmwV^k#_YgviAjCHTe?o zC;H02z3kNHqFO1xnxjOx)#ulvf*A6U6jEBuJ)a>vKb_f>;N$M)6xcfB1vmlp&*Jg) zclt@Q2E*d>+L!)mZ5XkCJ+mdhjn<$+D| zgWQzdue>Z2E4`d8UEb$vgA6WFAth&gCjO6v@vOBr>9#CF4jDPQ^^$j(xu8BIKDWq%S- z?Ol|w%GJ^4ikHK4CrK1%X;^0_ujjOG6D-r3og}Sin-!A6bCIu`1{P`oi+~KI7 z;vcCL^u6ut-*VUEskeQPcjJGuTON-mIxHkTke?*|7nQd4PQN4#&WvkY@IN)OKQ$h; z)<^5a5x%1do%^Nl_C7{5 z^?jA|hd)xW{T{~W3U-Tvedx~KWp7T!L{q=LjzA#pN~e>Hw-(!t)#o*RNh~iXpsO4Y zt?Wri73)SEk53oSwVc2~Imb|-u>vQO9$&FA=1qNgEX#yWm9O+4y0a|p%||52=|dY? z)XLQ})a?pLZVxZPrD^QKu&3raJ0RzKoXo}AyS82BWeExa2}tcN0TzbGxmEdAENugU z)LA?zA`L%xn66Yl5DER6MOo{MaBh9)fJLv1@g{BK#WP#6i(~O`o%}#;fdpT+)2ectq1yQbgT#ZG|XbC>{U$I;X%+!|LUC4M)GOiN%PMZNu9H3)|EbchpsX;H6_ES>eIRBJ6;{kNI5-5*)118S9UIDR$Vdj0yD_CNJ# z9|QjVbM6-pSV{Bv7!5}wW~W?@hiN1VJa8`tWJystHucVE%YW*Yg4WXwZE@CftYB{{ z7&Ynhw(CQ^2ia8k8#D8pfcn_V|D&=`0KX`f$uQsbiOk#ZIQCfWnzs*uV0pZr#L2w(=tEN%A0@kmk9$&b+7Qi%`P) zP8-Z}TP*nCA!_iuygq4G$tYeIEYyyW}`* zf>n1x`vMCsUdHGTb?WK4_vVw8v6yQL*}ib~6;S|D!|P)+{%E)SF~Y`p^JS6>D+6k? z%%x!Jbinay%Ru-6$)bdH$0JxxNL-kX4r*(u+@zx_9O{I_ixH**rs*F zap#=N7pQ!vS4^JKZ4>NyDSka+eeip2&4U!&svVcI<&?N4pIkTetEy7~laE=l;2~#a z0-hMv>d6`dsa~YFPmy=Tov^&Kwdze-GePem->c?VRu>6DNBAS&h!eCHdrbqy`Pn?r zV>=M|3y9dX5ec1-@TlpWACMw zm^6p6bHr*TGhCg5FiQc=y0g)ueYsWgnyn>Z)6B#rLiAwHX!9WquT6lM7tyeb^Y*~N zBmZ0JvnDx<{q-FS>n5~TQ1<`b9+i)Y<}Uq@rW^CN?y?9X@t7sh(~)fN|} zwc^JSRu&f5kX4wmc4nKyKr|_@{SqfRmw80gp66CC5_C67*?uBZpnvXUU&D>|dLWi^ zv}0|AYeBs2;r74#A|EaPeg0|MVAj9-jr{)~3xFzvZ})(B^`~O~H`OFXdN`I1>vN+Kz`Lr`wSHZ!BiJBns?zu({_ zivqPVvPCpAEKxbkD#6~hs#s!a=)kadX>VO|CyACm9bs)P zotj*7@C*;`Z5XyasP#J2B<%V~;M~n{DXjj?Ydzj-#*Jr-_|?sdf2fu9rJ5F#dY$!zJIo7o|iM%#vvNi{_CMzJ5tbzrr=w{C3JQQP&@kTpXVa5V{+CR(DC;LiFYpj zN&{iwo05Jr4}{>f`}QDU3co4$vB|m_6kqWBK|{$wvK`_xQ{_AevGVIRq8hXCJ3+|jQUQ1O%da^~%HCy`(9PE8;475}Y)KutlCq4UZX)FbgHu5dHa!7{ zK-1U{ZZZBc_bQ-_k2YRuuV=uia{uQkG|!ao`ye2enIU*orEWModCfdqsKZ zN@!hP_dV~tC=q*p7Z%g;cvSHtD1GZK=#LG7R;3P}aahJ`n};2Vkf%MH%Kr1d2R9f* z>9vY?*7f@0JpF|@6oaFKvwlY^hK%sG8K)2So2X&R+8m6Wf|?iM-NK4L-k3j9zo*=Z zd0?}n`OmW%GAoX}s?_rCVH;5{mZ6W~@cqnm!H=O1IvDd871lM0iKXfSU!~5AyKA9K zZCcN5F@rW8GKb||g4p%z#0-=~ODZ_3BVuKO^DZ-Oxza z-wUq8iGJ-dCB=YLg{d~!o&swLrMjDN;4;&Ep8OM3r%R;!4q6E9r(|ujz}X=3rb z7}4^f5A+2)@5TAj(K=DtmfxV%i;MRp=sK?x`BsR~GgUGn8X(qa;fcDEnth|x>fK$u z3;8j$KmvwL^`ZNK zgkBE+#1n4FcEI>Je8kD(s;nEYm!;E_mm~|IrZmTg73ufNjZ> zj^eUmeTAG+v{T5Prf^r_OBkpGX*E+U%8g>r^ko6qCH8)RSIk7A9S-gfA6Ry(63dJj zFvVzQ#MX_h%bYPCitPdJWkkYnGSepKJCCDTbf&m~{{SCxY#m5iN&Ttd$H8akMS(-f zh-t^@;k(8H)Tik%OkxYXc&<0uZpY8?vteS%p>Ap$$ax|T-)eGW-p^9sEF%fc~{Sg}Obdx4r$5b`GHM7dj5>nD1!og-LN}EA=`EgUsGGYl9^pi9v)ga*` z#P-zhb6wgJz3TZ-JIt{?+VG#&i2t}Px=1gC^O`a*DJ~V;Nlpde3TkMvBQ5)25S_<# z=jW#Iy5c`D3we}MFWNpH*5UN1FK%-6j*Q(EBITM4V!YC`IdxaQtR%WGST zz0)hw>W?BB7qJC^6s&g#eihtsP04n5Ukn{UdVnqj-|V}&)LW|vgHL>Jkwp(`BEe#; zx{%{|GmxiF!9fF_TNfdDhD=w5)@hvm(5!iDopCEz4{>oVWAUmsBuBtk+uJe(F7&NyF zud4Ri^Mp^No!5jNm79VgLSOuHc92?bu)?9-3(mwrhJ0>;)>+OtIK2J?@m0pnrc*_4 zv^FHf&aoPkIIxxWX27z&R&r8XJ=)Q+d9Ry`jeERpSPDz^O5*cEM>OuCvX0! zaLgwWl?~Al{6)oIEO3q}OUB+N@bqqvH-{f4S5B52`eusc`xN=s-Z-{wA;Xknu(5NS z2Tas~HKa)-A@kUklt=sgI`UdFHr@maG#53mk2PB`>ro5-*Qe{g5Htc>0wp5-awg|y zzlW=fegfqXa897;-L+RB#H(^`Kd5Uw-yIEgELY#9{_5?22ROvJe*vyr_Mzj895z*! z>^?0F5sEQ!Px-+<(f@dCx@TvUsYSohmM}|J)WW}mLGU558%#>(TYPFp8Rx%bXEZXHu>}t5K-xnPw9w{2Eb-#@U5=-eOAkP~!acNM*5g9e+wn z`~)GhehtpmeFbubL}RIsedKeYHq9-H_lFA6XMlrOqFRZtHNaA5F0$c3)d4O{6RT}OwDD+La7Y;gJHUT?~i z$7dBeK6l-{7=rXiJ#~bhzut8jSc}*skL9)w2zT}0WjE7xG<{D_@`{@|M+^X_Xb+jN z@b`H98g+e-vZU7Be&{G_dRLr~Q%NpJ8i@9mA-x$7pa)MYIFJ=R#JRmQnEbJ%EVg^Z z7l6mbiL{2ua~T)jIrfQ%m)ZI4!2((MH+>QzML?x+bEL3-6sb01M0{to8x{%3bKgWH z$Q;KtElWa1{8RegD8Sjd`dmDa2yPQ0y39x0G+R0mR7rV?vCuK8rxQY1h1|c=@*TG@ z$R;a&QshA0*^~TgfkvC$zFkaQv^0M!nM0Fz zeIw{Ul*lmu#;)uW30pi?_pjYY`iE0#{b}B2_>f9~j1P`TjUm=%n7^l0j`b=c4I*fD z?4>@v6?c_FMn`&2V-P-nRO=+>O3Poz#1p$88~VZWTGf{I?yZ{#B?N;8gMZY-4QtUk z5Fy3MuGbEm5yjMMW!_I4AiDPI%u`wXyA}mxhjb9|sEgF?0Jydb;OwD-Q(VmPq-9

li4?Q0)Up3PWdejJPGD5(1G>T@-k3cGCe zu7_pQ@t9BZ;R+G_^b!4w+Vz#6gIyfoj>S^=d`}eo~aa~LW^_vC#hT6rNmZba0+YIc+U_gaF|BS{$%;y@acHnUire{d- zqH9Ni*FeJP)NPrg46`v`hz228xiKHYbfliDhrw+P-{FfP1;A-{VDHosP!um${Eq7& zg^L!NxQjjGAt(CLyAc`G09%+DNtmo2x~Bsx&F3WHuSTi9mxU#NU$J@Hl#GEb4EuN{ z$;+>tLbIY<>$aLP|6?$$Fu%8E?9+LjzZW!!dRRx%w^{6Xh76{H&tJ!t@v)k1*e~*; z1j!ng;svHpT7lFpvOO0y`53#2ax=rY!#eOe;Usr94x}hd`r`3qP1kotzOIk)zEkXi zdE50-mZenWe$8rjSSHn;4BM){>C9=z1kE1RoLbm=Jyhw)gbdvn2UQPymjxmI#cD4E=5*~L&{0X61pr9 z_>nc}U;}aUAW(jJa?Luu&>#QNDEZP|YU-1WCdila8}NJ42wNn#=T=QiVfzmtL8Kg+ zmr1#cAa}j{+;$}YL;usNMyfZ5S=M<76G;JeasoU`&gm`}B5dEpAPm+5!@F%uS>pr4P{~jXWg$fzV`Iwa3Vxr8CrBo8ro4J?s-787m&$$F~)4`uGRYR+C)g5l0VGJn-J8!$91SP zBm?4?<{ln(O1$ZZettKWn#*9KM+XU^@n(*nj<_R`4S~Nsy))E=iTYikY8!7QsCM#z z)D8pF;qy$y!YiIbE7f9TeQbG%PnEHBlTG{y+!fskygl5{u5uDEm{0&;XMjur;Jy3eyv;5zA-P>b-visSOBUN>^ z7)47pTe*PylUAQF>f#R?oy-okq_Yo@k^8U?bDdWTVa9ISO4xmy|1q>PeS zY3`UZyVly-^~f_$&3wKDbb1pErujeEo#*I5Qnp>%Jssj7!#{k(p2Yg9TE+2Pfbe~& zFT9GLRi0g*wUWLErv3*GD8K6j4SiA<@uCx%sn~2!Fr>Kcak{UB?)k* z`K4Lh(<06*>66R=k2yi|LLZegHk+d9+(lgdRR;P%)yPKCPNPUB8ChJTQP#Nsk%UsZ_(U%MKLiUlkgjrwu4tf_-pvNEV_*M< zWUr^Io1qor4EdDo97D{FrYQ9EYvQ1oh?zHz671sQQjmWU7w~FIb!p%8c0U98sWb}( zliDM%L>uonS6QZb9f~Fq>)#77W$-G^GdD6-5fZZDSS^&;O`_jaSwnpj3}g9oJ)YwqJ@n5N*KR8>!H|MpefK`r8u%yFhw@40uK^LS5=V{YS;>cBNI*n?}A!G zH*{GEYcZfSLe^&_>y=@Wqw}VP2o&vGT5d$yG-@iG6CFFihi9z}x>wI{E#SV>fJx*< zB{GgrtzBgGRRJ}Xr71e-^o@B}iwJ?*JEM!Q zTi)m5ttgI{)=-lgrD?Wd@8)%cmn5s9)=K zmb(6neIakb%}r0J>cvP#_f)-&4`4B?wZ96)fwCi^-fl8^;en4Lo3(^jw&ya{id`L0 z<(%f!-lS`uyyf-iP-+h_*12EK^!%g-Z20nV3JoNc{T7VKam8OKa8oCQNvBk*BOgNs z(gvyum!J`9o1h%ttBLBq7Mj|GkKyUb9xQ+N*M*vSKzvt^a=r@_u|}s{KylMQu zxTD#+)C$+?&{K;^4m2`yV)dKw#${N&_u5^>fmy;&#)9IJfsJEr)k!6MK#FNGTU&U7 zoUfK+ckOy{ijGV!uzVAh@H|^k+E%3S#_PuH{Y}#0w)?(`6z@|r0sMCi*lSRtHZ@`^ zF5jWr{Qly(CI01dNI0jpdV(KtFq6VO5BO@VlWD}R&WOcjzkQ(ko)WEjVH(S@7>#T- z-(oN=#v8hZDVk5{S0GpzZ7jhAGCyG@`&g9=_$HRA-Y+ysS;rvyg(NsIg~w((4d}`G zy!k3Mq!%1iO#jzUekh2AB4~@)Q`MDH-ypp=;#*i6hXR} z`K`7MkZB5}f1XrAXfe3OjSyO_W2Znn{4rDcR!D)RVb^B=6=7c{TCt>V*~GR9IloY# z*A@14FIKmn5LQMq$b$Qx7&g8q=(_p4Tj{^Yb*LF#x3M$cCu3eeFu@7=OMpY*9rI(v ziOw($z8uwgO6R)4&z8b-Uk^DIOa{z`z-PvPANThB#5$q_T=^`reJO`o-v|<$=g6n< zg~<`~d@d&u{V1YQJYMJzlEV#uww;KIE*5VSJWmy(+e8-BAP1*{pAhJLp# z122#b<>^Ob5>qBUfVT=*1{nQBf7X@@oSwN+v2;%ht;xNV`X?vppS4b|96xSU%9hO( ziiHNM*ENQ8SDDC}=-w%*+AbrIGb$37Rc{8JHv=D*q?=}7(f0xCmvqGV@TC(Z*VYCE z4?lzsLPYXA%fTp$eAaRXX(L*54HoG$1s&%TT5^K4>=$yuX=?+%?TObFU%QucelnU- zE{ylWUWiDyBRBoA`=cr{K-6qY;zfoTSzGuux7tX<_|HLkXA)@+k9K ztI-Lf&zTPiTyGMf>rJ**`M&dmKLNRpFGR|R*Vp1C6ViG(c>#XCenZ3?B@4eho%QCo z?3B_vO8egK76%Ly`@6cqnQ*IWQc9y!;PTNj$3>-#Ij#+wu=&9&2;E~ z3Pxzj2VFOSR@>cdv|1gF^#od-55}Y(Hz8M}Y?VdIdHjR=|5MF{hhXS|_okKi*aF2z{nhB%LPyVkE5|0c zbs8-($(5=ydLYhk-cTQ2Sfq+M`D9&)BD?2$<`tV`P_cakh5<&z0s1C}BpE;SmW5cF z%>^%=W#meqVQ45H?`7vM5je#EMA}zHCa$4BQsWlL`HedZjeWF<3s$sby!U%vOT`Lw zi~gVxF8m-5LSPl(dL3Hi9UL|uVkoH3T%LzQ&FPQwceaDJpH(VlIEN8mY~M_b*%$iC zlq^d*FyX7=1hQY~uROZZ(2+5KNoFA59#bD%x}t3U8O@mAaur<_kA~~nBDVqnWH1r~ zQ1=^Q_|g5lYh2>SQ9vYljD7U)DdpmXCT$+Dcs?x{6;F?X=^q?r?h(tAvE#9eiZmMpRzQrDUQryS-~p^H*;=B>~*ErYw3!H zZ+El-EnF$`-=*$0J=e?!a&LASZc_whLoNc%m)*uAe?sz8Yd;?MZsR$xGwJ#;-Sa6Fu6 zn)G88W7aSf2jBo6%x)?YT~sxtW!Q~vL>@~|k$)g*sOcKVHzf6_kZ!StK2CtkLV#N3 zaX>fR8hp4qLxzVnQSTd`#xps<(?)@Z$!{4WsvMF$lH<_h`GtfUUEVElYYS?`L)4KZ zgzm|5NP47oUb%@L$Z2S^+g%lYbFf(TQ0Ec+-2S3fHZbWMAS|T`YA?;ZS>{y8`MC=6zZ4TjRjPt^P~SUWB#c zx(Pj?f|nXSAEVAYexGB;ELaf0jD!s}c@VGWZKe{Ku-ep@1#77nwt-h9An{9Pucwv0(l82LCNEP(}e z%fz2l+qH*FyK-||_HmP*wZ<-r%HCYs&UEtrOLt-w8a-Jr^>YogO6AQnw`I|K0`oiJ zk78efAVtK98h9!4I~jwO+FiL;P{bK)T6;##B#H9j&Kk7z>)fRxpo}h<_&6DEvWe%N&4OyR9NQ0suN8k@i8BTT4tg%u|mQyO71QD~> zYj~pZMbdoUJXE+?cp-E2$8#7mw?@HCX3&in=Dw2V*)r}I(F03WDdfe-ae{RVKSzej z!p(rEnuC7W(*6ktv)JzC++fFGD4$#51HenpL`1nv=2b|~Vdyf2D-rt;N52E=`9?^0 z5s^SbA-PlcDmr;sF4ux_R`P8!hQ=me81G^T1FSa5JbF-#25KLa=*a8SwoCtZ zjv!`6o`&sgmm0~(ixI6DIZ@xO`e7JmfDmSR!IEkn_gS$mj`XUsd~4GCcTm5o@zjbc z5Evi|ymwWY$h4lSwt5^@!{e?@AIv8n4}saLr;c(rpx$2nwlGW*4#$b`fHH~pu=xiY zB#L|N@J7wjdEFO5@Lk97yW!s_nr+i5k_eh-PiXHm%gv@DOhe(+72IgzX+~H z4O)X={(Pp7qtgF*h>edh86u}4yVMR{l2b+5+MKqct{x` z%p2-i$pZQXKK2taSZN_3_W2RI>83ek5Nf@kD9F4jOdoXVU*o`65*xDbyVunz6=^n4 zPPGx*aAb*TI^q-W^|>vKufO~4h_=6t`$p^B4nZrTi7#3=Rvx?LFsuiIi6UONfhG#Y z+hY<5Ha$0+dxJ{%+~+jC-#xFBWW@LJF@QPjgcaN0J^ul38rv?=C+P4|!{adNw5^48 zdwa|+zG`ByA#J-ultK9U)HzQ`JU4O|3nL(96cmEOW=8&#QWg*mu?^JKnz^U(y3jm) z>FV!!9q$>~unaBs$lZVHe*jFZ^#la?qA3I2%K*Bf(O&j0^)Y0EeC20U%W9RIuJiKy ztz9=TY-xJ)r?RbY7al!P#;kr9$6H-KtZ>CZuwYFl=q~O5I%~%@iO96RX^p=P#LAL+ z`h{~ejDvdt3R1Y zz4GOVVkcLi|M|+^l1VcH83wVEx`!I{(d|M04lg_j{Ay{5@J(zIckdVI=h+Gzr9e+4 zT{5LT*~3G%`mF~d^CF4ikT;c=BwUAnmF)X>DA|7q*lREC3t*pEmshrrbOwA}+cl(u)g>$fC+smS*ft13e~fgX(`;L- z#t{=eagB=F6t;-7DL2Zoo-1&X3ajIh1Sw3_u+qZpVJI1!l=&BQTQ*d=7%ZxXgw~X{ z(@>QJ&@jAUjR`#ReF?-pJ0)}J@NXz9n|}yQ1=(f-Q29kHH__^}QoS_>Qc=6g@U?{2 z;WiF0?REN}eWI5+4HMqX7y)bBP-@?P5Ew+kmS$(#-Q+n z-nbx5A$2(=V3^iplT@O*gL>^sWz}csxcw%r3L6$qX*;`B*PWjt1p_ZW}_O!l>yJ{p{!i+s`!- zXEaT;ekO~wAoZgoc36kE3MQ?-s>F%#wS*$5PS#@>Ju0$fha2~m=b1!MawGn;*s9i) z*#BAINI4KMp1ALTIzq{vjLZNFKMtQUnQBqGaEGz33MFaxa!R2H$4&j`OqKy~FY;x7 zF18$9y;mw)!>Dss@s7CVl4v~gJ?AlAr{^0z~G?v{#ZbpLjC0Sygt znPT;bz_E$TV&P@#%Dl|6(Sy=KxYTd6%c#I|8L6fNLkcwzELm#F6jpcg(L4hQs!b_5 zr;)9!vz!<6IpvS0%=r9q3c)QO_(9V0nLG^SE@eV(O;B)CG-(v?C64nNq0o~51~H7p zuJ(=(*52tbog$Z*0cgem5qQ|fEwa&h6}C|eW7zYaQYpnl#5~t#bYY^_{dj}b*N0bo zxyLBy_*#2O}z*3VeB2w;DLt`U5 zaN`Xy3}p!&$OC`1e_kK?$>T$nd}ekhmo4p^qNC*h!PjijX**mnoaQYT@T`m(2XY)Kj_M?Ws+83E6u3yTr$nBlnjxK0)GObkxU3>|4 z5TBq~)+5#J`8xOOY0tZj<;>q7dX*p-(VJIb$4APH432%OXsefRJt5i66|f(WZ4gxN z{QV&iFm1c~J+$8S_l{Yre##k9P5P_pT%oiw@UZUgWD!}=`xYViGdfMQ+hm{jYH_-z zv)psee_M(}=4#phlxqF*LI3<0TJ%mKsY4L-K$uu))b5m4q&N~ zcdp6@r4q1m|A{zw_`xA9-o~Jt+|OIhzHXC$ies-9u5u1$g25`x)ztT0T8swfr?Bw(GGbviTdPWRD% zV&*l{M&+ha=D=c|QvY9!y#-TTZ}hF%jRc3L2^KtP2u^SU1b5e9!3lxJf(3`hHH}Mf zf?IGWIKkZ-hsND~_}!VQx^-**Rr3kXIaR0L{qDWjde*J?D~X0Gm|+l#@w+Mh=Q>7E8R&_3V(ZRgJSiO_1 z36qt@A#JE6x1Gky(0?GMVH(k<>~xNh2`j(!Z>pb$pd18|EtJ&o>E~LtK0RLvJvmqz zaVCi~tj5xlHz1zi1LxKCrZO`?V&Q!~J^!(O*L;PT^o zm6HkH<$Y|zh0s^CzpIp+>P^R2={vbbdK}7Iv*NGK6oC585+qOHEzOT&W8m?6?k!zl zKYxmYH3FuuurtmEic_hSH{uxcxyM+2B7+hVtd2;KPLrW7=lZpVUicjm7i1pz>Pi3Gc1R!e}L83B}+kLJGcI+vzsh-)PJYTtu<_=0KU>0CeTDjiD*twFl(o9U};q8UXX~N zNdBz=RUx|P27RazB6|^?Vu4J3@{|dSaM;qeGM<+tq;Y5eG;Iaf(buKwvRYVKk^a=;5At%8h{tQFwH?ab@)#avsJi0ZVac$mkZETT0@z#7KH>Z}L#*+kHNX63kOWrIaG|^eGx=?o7JBk0Dm; zIJd(<%KPZq!<8f;wI1waVy!ml8@i$SSq1UNf|{zFx@Ajsr0laC{SH%nZ!OI{HmW|x zSqJ&&x;PO#U!K`zLZ-ZO=%%fNF?R)xRUG<%U3S=8eFuqL{`yav2f7*ktZQ&4!r9W88T7 z$Ud8^qix+Gcf@Dv1d{iz6URC^;yl8ZSmFzH(3YJ+-JP6R!onbI0l{DCVOafrS0yX) zSTWo)i8;?gzIvA1U7{0I#-?o$N%vWXSdYh)YXkDL=I0G}_ow#loR%TX*=%v0{8^ae z=v1l&b}?rxsQ?-ov?Q5}pcolD&oPA79^4;~09D{PVPMF2W|lqCR0Kyb&v5 z!;YKw*rcc1i^;7GB#dp*KGE&{QBrOk{QHhy&^+_;-tEYx=-gwmtCpxJdpdhKE6Yq{ zYF&cgF9~0@@dl!R+1T!wKVln6Eb8Mi7^xuXO1}-l_=CRwz?5fFGam2XSWrh#1aB9U z;BOD_#Sims6)|-8^vA@wtPiWFqVW#rnTA-T6s7*&xK^4g2R zBew%BzS-nKvnmgh=gEs23GJAUF>wygj-h)z=if$pt*k1?et&>o6UL7HN@@1&b%c2; z-i59Sw`bvqm}mE+AK|3dO3?e@F`E!&^Uu;X9&7$Ox_wZji(bhudt58KS@v?wk9h&(ODCt%1P^I`Vmn&L zI@6wT4wpSmaoN!ro9d^>`;e6f1mDjg`3wTsHs;FXkve+)5O?6UC&qXZO&8rko$Wk0 zkHFVfB@o6U zU{F3slIdM2UX~8FYL}|NT|W(-HR9jd=0nB|mr)DhCFIv{R zHz0;t9;N-GFCARV@f@fa2@ri~%fKce)cubp21iMRrCg84djH zTCg1(13Wmkq{l%%jQNB|7CV7YOZ7Vs1D+}s2A?>7n-r<<^-6SzwqMF4HP>fYS2tWI zKr|paU_8Jutn5)4w$7^k(b)P=3=`0a3>~2vP4`eNr-~<}6dOX6DtPuK2+T;;tJ2>yl4Au>jq zTrJ||-cERbbeSm|?_`T{Q1N*jWP#WMPbE6&Gq1fG__CbNSY)N)XJuyxsc^&Sqvy2AL&}JA);hH*Tyj+!sP-0@c}p_N{td@N?)|zTK})!K}n|~{Vw=! z(eh0GYsDuJ`DLYCO-S@#hTWrfE%rh>HLYxKa`tw6g-4i1PZ%9ARtKrTN+LzN*f#ngzm4`}|5)G{Q@lk#rVQ752^k4 zB{&GFzq~_s+XlwAP@qFOBKN)jglgw7sL>JruBQ6SJ<4l&?j=0q5i_6^Rp*qvfiH?! zdzgXAbvBGN;BBcB%^D(W3kh3G2rN3X8%$gyTGMI#baLF+m8{0t#j~0EO@eYDHCSG9 zm&p4kuUq<+Gp?B*ofWoHDCmlvPw9wb2S(>Tek%M?$)Ds~f zb-ia*;7ek-{n`Bo^mF0+5pa~{S(Mx41t2Jj|5%1j=nsh)OqK#*9g z;F%8(g#xR+_&yh=oreUPO~K6uvgLJBEg9>{##yFr2IDck)nCN(Q_UBN;rGHE!a~B> zjfX8(N6l(;+lEA)20`oa$|kk|>%MOy-8BNHSk$v4oRrwoJw8}%U+xiAJ??Y$SkA|Djju!)?)EqM0t73o%USO>uA@K@R+_uk zc7-X|6i_k~`)+-3bj#<=fEv4=$J{;5>BrHh#Z%R4ld!Rvss`3=HPkm4d`CD1IsG5V zZ}UNr$=^y5CiFPR?(d*!x3dh&=_KL7@wP9Kn~`-iUlGm``r^sA^QeKvi@Xpx35JMh zT1lB&t~6(QUSlD_pRygt0T0cFkM{@FoZ%rqKWa8CISsP@bWjl2`T_5GxlT0Y|38wi z|3d?|8uFh4%%?ElXZ@a#9u6ai2)P~6*{T|qrey2B;k6|ZRp_S|fMdFMw#So0q=2t< z0#%ZIxR3_XRF}z-TEZ|n{ldusEPoTzfEIW7XVL)q%7Xve(z6`uypjzaAa=8$JBHW@ zvux-@;hcPS$Gd(sN&m_{g^Vnb^mYblpg^W@dxbr+T#M28%A=b>53YkCLG37uQ)WCs zjP;YisKb-9UlxV#0OUq!T4op+5uSK#3StmwKZZQ9sUGefs}U#%k5m0z33TueaM%WD zqtKO9pRFV@U7ltJP0A>+{kwr=wxI0#n649Bm+T^``4VYJj_tc_khJ@ zT=wT90UZ7g&<%hVq*+vmnf^seO!xvy^9z>LXIPVN;30&lJm(nD4H?PsxQ3rC1P9|Z zSD>#k=;q3=vvwTn^t(8JT6@*8vF8SCkzBLXLyNONP<9$AoSVR{0M`w{)tTX`fR0O= z0$F3fK|Wv-DrH)n&d!feY#_VNodQ34WeGO(8z1Ik#*>IbhrsWprWxR9&ibTVe>(DE zd+wxIGb-e!EC0=g>!S2zGI`IY&gJ3CxMJ=CQ0ke)!4ztMn3NnYb@pu&imeQOYAK`N z)u=at}6`IY}4Y%w^?4L$4QoSQ~PKlyHn^&GnZuHCjs7LR6Oks=L#Clvn`pClm84Vyhm?Fb3=AJP^cFR`;e$duzQ3GFG<6BxP1*E^5Ze-#U;hpte8n zA_U>^0eXqiy5WDT0^-V7ldw;`f{Q(mZa1>|A6vdvOk^Wdn+l!1k_JD#t^N|+@D;DC zMGhq08O5Pz2e70n$QV`ykR-kbL(H%UAvFq&==Jx(_;+Am5u^dLqYn~+@>vEr?~28T zw4TT?aQMKe*1TfRtk<~?fz-{Zf^#dV=}8j(huM?}Ua$1Z;^3}o;d6TUiB{N?{6$i9 z&0yKh*fOhCm;LGFhJlRGcZR8?Ti4P|kzJyJJ8|myA1P_7LCcg2#TZw<>9G!W7>orf z1}%iOVOuveX|8V0zM}Wx0&wcOVf@)*)iuj@RXc=F38{jy55bF+vqA0n<=@y@9J&pc z3R?b--lY|b2?oKyADhjCU{m(OjSJ*f#S-5?FqWE&-kt8UOSr#^brZ@P^;^@+Cd)og zRksR@g)&ti8^T{qZC5(zBLcC_Y)<@2dpQJt<7_iKp;Rux{d?i|3%>_EYN}2At~s|^ ztc1x1l50ZR4s>VJo>D=O;2Ev8D|{Pc3_fDYLU$L(VDm%r&}9!0q(sdvp_^YNoBD8t zy}IFVdIC(5N~2t!2c64W-AS^ZNe`Q{EHpl`pb$bAf9a{QT3V~{EsD^&3G78a`e<#+i&QG1c@t&n64Vx|n3^8x{JPhTnYjWtqgQQc?4iZ;-1j&ibG-r%5D_ z9pupb^Gxu$5EQ=leRooAO`3nHnHpqITbd`CX`}3|?X(qGui7BjfY0Vg;#^S7v2WQg zdAO5;_j>L00u3_{Z}|q)OAd` zFv4t&CPX<^S|eljg`!AihveocGM^5gQ`8zRJ6%88zsL%?h-ztG=ekbJ;?OYvtwnnK zjTN)m`0<;%Z@BjB&M4GDq5Pg^qv-nmlg~>;3d6(NWo)$qN#?fW@~fd+MB#_h9iB)lInN(4 z4x3EQjMwUMjqLD1abHm%7b)7;21D-yF@`kOfoi)f>2;HgJ@_$LhWa%KPqHVQ^Ql?M!71URXw!`~2ex z2!gN)!WvwRX^Ht=`L{T#%0fLg}%; z(GuBM^Hn&fOQ{8$&cqn|AS4@#X|6$q4vUr61s)p{o4y}OGw$1+C2-Iu-Bo%FY@epF zpk$aYyTB`ExQ~Bl)JmJm8n5@=g=_=s=b9ZD2A_m{G`Rp*aDdlARiEk2kFG^xdbsTb z6h+8sQ$j0&GC8ffY^yB_}6b|#-Iy|^RR ze&YwqoA28BsJyvT9tv-FztV}65oKz8wIu=>gE>fqT>0TLLxq3<8 zex+sKrpRQ}=(L5!(|Y57)rsc?5mmp3&WJ=C$_kf!fz%;VB^uVADeLk(|VLy~hEf#nRaR`SRUnu;D*1 zhtW&li9!>d17DD)s5@t7U>Z_zSribfC;@!3Ipd0-AuQBkfy% zxlLG2;HLZnZ5m6z&vJ!3oIC)e39oPf=hDbWt$b|kHAr6ELs`uDU4JoF8!TQ!th&-R z8OeZpJh9WK^~+MYd{^8dX=@+pHF}Y`N*u`K=NK^x_CPSCVFa_|#f&ZMa}G&bpX4!?9kDceq6gmZu1nbiJpqkqQDZd_(apQl1!(neh`dI!*; zatbR#IPuiGe&2c^UFg(W3TP)()zFfvI-#8X;qM@?r8ZRiA>`#u#h#=mZ3L=LKUh!! zWnCn_O>|3t`^g9MCfvBhxvK%WaU2Fu(qKlSG8o#JsuNx=8UjY0WyJsLWp0gv(*Lpl zZ8BF_pu4@UZ{3d`r*fZVq=OHgU($F~8odqC$607$_t0+p?XW?u6{pF;IC|zKzrb{=e1n?y zz~r@71HSe-UERKzz5}q zC(1yg4m&@Rrnlay?0cEH?2v)Q7H@|^9!AGAF9!lhj+qljZy}(`#or+gr4~1!oYR&8 zr6~dwovpYuteYR;gkxdQS_rsQ33Cu#J1qaJ!kW1@4cAN{r?$`S+f@+&v>z93;yiA^)SrJn+$$ zg7=qVc_uOw_mH6lu9(~m?ZXvOIAh;q>P3OJ7Zo8S@_BSDMhBUx9;yFOvQSQ|l#+Jr zklz~J^jumF?#W6ldl1<0P%%xFDZ?DHFKJvn$X}MKP9HA9xMth`k&ts{;IMh&{rSGme;q5B zJ2=8nW~akVSGLN(jYBbr%sg^WYv0T`l!m?_Bb;ffG$hYK`~xTW^j{H;TL3E57GlWrWB8CgHzpF>3iv`8!J|5z9`&m`~%v zWMrxP5PlQcOZxi*$3xaer%fX7`YoPv!vWU|BruN02^%qulDwfcwW;@!5i1hA9D9kVpFrhcY} zUY|<7avl9An*x)MI7GIoIo6zctxkp*48e1<9}69eV=Hv{squTXNb!+5FYh*Mc!}3) zB>kr8sA=Cv6_-!0`X#bPCTNk&z6D=mMxmj|q2zO|4c$mM^GIaN8d3*^kSmnu+f4U^ zoe$$z1gA*5IsSWHRQL;hpR^!_u{x}?!v>l|myWV>?E8FA(wO_gA7K&Pqm&cxQn))X zyMGB@_w2o`J7s!g`yT|-QmnCHh*a6B=-+M0T|{8Mz{1d0ok>^ljYEpCwaqIn2j<0v zIy*8&u_NSf@93fBdQO_fE_t!?p z>J=NeOK)%F4A-?^M!wBEB@QtPK9kBuw2%Rz=TM7nLzl4o7P;k8{NrEffp>{Gd^khV zB{Mkwi3f}%Z0@(OZs(1Vttp{Mrc54!DBd{7R_Oc~QB|~Ooh0bA&T}NpT&fE2MhkZuvK(0a1#rK=ncU{$u*>9*VodMM6@`iX~ z_iUIL3HDY+?VB5_+Rzo1qhz`eRg_l1u9>cyn6!V@Ostg|K!zS(41B6Zco|X?H6aIl zmX_d+;$tCd=R5RsQXa#go_pXAJEIPcmo(%KoL(jXvZ5h<5PfV<419-c!rpOh9&ev( z6ckDeU*6dC;Ud<}nVGZ#XZC;CaeJ;U+IVX5R5bFTS(9cB()yKL3hd${Cy1Op}E=?3rcM6d}xsu~rv%Os|2+QcQbwVmXo} z^i@I2PiGxj?s53iGL4-^aZdiV2-sXD=O(s=cO}Pyh$B;kQM4uCtqnn*{upw3$3{47{6eMg-ISmlxpR-^JJRG&Dh0Y@SRJg^)z)D?uj+v+d~OHz_)Wx#Z} zi=HDk4PJa_1fo3VNvoBZvi{?QAeUwuR@4Tn+;lnFfgrzWU)uPrj?FEQ7#O`??53L> z(#|+EO%%$knYvWC(hAB#kbfsoBpi!ZoqEP@|4|H3O;Yu)20jleJbX;25d4Js21(`F zHSPlpg_|!o;&t(-b&}$V7LA3$!)C9tonxP?TIiW5^4jkg9Jt97$~RBYlk%T=R)m9R zS}d%_*Fs$WO{WV7ofbDbfbY@nW8R_og7d`U=3eg7kjY$}mm;_-*vRu|TZ?7a*Ur+! z$Z$z#^k9qu@A*K9d$GhLq8_Rx$KEZRrM%j%Ql4}`?7v$68 ztR?yT(PT#ym zs>*MWxsy`5Xn}ZZc9g`5yg(Cae);{$GnnM|-NOnw1hyCL1}0-;_<2AmSz$cr;LK!r zx-_UpP?Rp;Yq_|iUeCWhW@)5yFN;(fQt1@|g6{9)b*F`ixzH&k;zy)u zoI_(_h>Ja$Xt@M?EVc4s3lJuApz=<#N z`g0swWE}s6=ffZQG92i)gC9x^cp2u-n$3M5PmepVBUZqfGj(?n=NVWyMlwMDl%RKR zFFrpcJ(q-R78(aldY^DA5=(n7?~xz|35<8UNH~O^%_4z-99EnZek?MPzb$Tjje`gK zY429ddk3XhIBurxnW25U<|fTMR=&Y3WT&_^G^BBw$c8d2e42HbYxHwQopu8klWUnz ze4(soU*s85(Rit-;`%)^95)C%50@2pdjAedMXTv#4=JPb5mtK_h0p|1Xe^O?4x?S+ zvUj}~;w=>9(_ANdeQ>v_8`^bxEY|Sp_eVv;%l*u81gCFW1e@;nFKnX))L$}l%3*b2 zf2!;mGQ1RG3_h>FvE2wqg`-JLIeoqXp1%Bri-HoG9Bgkpk_AaQD4Soa7&5sOaNp~asL8xN49q!w*%(Ki{>9_t75rfQGVVYjo7OER!ISH9E^tJvsi z`b`p1diA9Xh5PdR&sXMg{hDhC;V<6cptQ|ovts68D2Kx1k z$9jW!duVF+a!M?Wexr39_bz5lSH{tIF2gezyoxhKhus=oQ+NE>2G8Unh_5$$Y z5KX0dx@pzrb_`oI{QWoP#6oTn`rr={lq%N$@f188h4i44hdv)Trmju?Y74+1hjqx{i^Dg;(Vz@IswRoEvXj)I{R(S@*(PsGQx1)?owF;cuB-xeZza z=fwmC)ciHdKXA1r)wLGB`A*e%Ve~SxY+E#k6(Zh7JQ}wP*Y1@$ujuTH1-9!oAmuNV zx#^byPr0m4l! z(J+iO^-DVX=gddyh8x4TB@_bEmf&7hsTnK3T{NJ}4+6-E^gnxd<*s%m8kD&Zz%N3; zx+w(T?~n}ml1ccagoM6~dRv=mmWJ%*B6h2d8e4+&{>spVtx-MofgxC5^N^8-b1yJ% zO@)0cm?d6J=hJ$vvkO0S8F~LZ{}us2{|4}`+j?kY8Yd^09dN#tw0~ci3y`-0{c-HG z)ZU1j-1*U|x)c?#O0t=;_e*c1YNh!$`~B+D5+LysBPI!I7;av1s5Zy&VbBe-9#H4< z=XgM4yD&}w&IQ$Pl z9mKW5*DHzZny98FSb?AX^b%O^0cuvJlbrV3L0qxK!#J$yrxU2_JqdTPX6Eo-CCQIK zU`{|4!E74_N4=n!gXQF+s$%xptsf|{1YNDs`8H)N_%ZlnBE@GD`)TH26=&{>gsca7 zaV|)1WH4DvqH2W>GKco>VVrT-_#YUnzJCYFZ9f7rFFl-pu^M;U_@eWjbBctw;@Ii+ zx(2v& z0sO&pZY_?brl|6!K$jZ$Fo3#m%0KhZF9ZCOS)LaZ?eN@u4{M$4KlS zFpUtk+@(fWjT{r!P-5TgB}IL=8`tx{2PL286T>xP1?$;u>dIN*_gV3Td2EYZ=K3E& z&{JN&yLxKXnd}BFoE&kxo!CEWJvmh9xD|hAB6i3$22j6aQo94P$^?kC+>i3@(|jGU z9&5S$9Ju$m{e1G9+%e62Fb|Q}3Yqh0yCsnqr0#Xm_J5=bCdm~27A*ByQeOi}&SAy7 zk@{%I@Y;9YE@V%k`(2y8+a}WFa$#!r09MeTn9?Ss#xE)nW(&Zj<0{U$|G8{%h+k$y zI9tlQ|7^^Idz*J7b@~9KAp~51@R-S*8H@gnAB&i82kz)EgiCQo^nCVB^E|S5QlK&4rBY?b2b(n zC};^>@yd#Y!M}oDEgS>tL{Y1zNKLa?S4tQ(vWsx+&J99wgmf5tt9WnH%r!N!P7Ql) zR%&!jGtEmA`sce^7)`Dyw|;%R)x>6;k3nTD{;W08G-r-wc1<$1;!M2>@t_vM4sicq z`&+Uu{P*QewFv80^eLP zo~K?~9qk20nX>U5hy?Z!U@?t4^!8*I7F}XZC#Ny(OWzGJzC6Ra-UM_ZfdM=;4g52- zJDMf_W38T1d{N}aD|6^(1tVxODRMs~CIe@KTAD#MCXt29`jft^d3kHFqfVzR%;(i1 z*W_3VsHe-r@NWa{8dsc`Gq@yrzC>5-oMU5`HqsGR!L~!9)F7+Xb=BH+^D_cMxh2xU zdH3NFf=MIKsQlK73dM-G`_DCw4i|f2+c1JK;rqp>`)qb%oI-2vQH15*Vg@JNak9=0 z*h%tw^LWzoBMsaZuWO$-=U7OE@yVX*hRs;EMIzHd;VxOkG5D;li~sa&-y@r(OWNaZ z1Jqw8z0D@mv$hX@W1Htnekm>B4GTSOUgatdPq?ZuPg@0{&hTW}90< z@>MLMweNTl3%@V<&=jU!^FreUr?6~>Ldnz!T7J_cY2DgeoOanRNVv3}XGECUI4q%F zOk>RbAbvC=Tm?Mr(IC#Yyq?*vAt9B&uFnkBq?Y?h?DB#z4F(-MQ85*)x1}OYW^)*{ zi@u_pJLfC=@%scZp-iiqyIT4C7u)n{E=sK1^PabDI=w#<2*%IPBK20U&5?BPt;1ML zR10`%dD)9c=th=b@{Af}Rn`?Rb$h}IH>#mq-(MEYEich=$ssRg{6*gsj&)Dzb@G=) zePSDTK(JnNzRqGWQTCvy@VpwIZ=vREAh=oGfPANMX*LK7cCNLVeha_n;iO96cKVsU z$2WKjcOLDLg1(q~v3?1;MPmwdsWCwH&&Xe6#XiaX4=Smc%KvOg=(&$Fg579}RFXJU zyAs8CWg@o!#XtbEx7bt66>K$byeNS0J{29q+wi1rN08gMLQovv-A$@oJS>cAO_W24 zY!{IDw~H@(05fezD20vNb+$f3Oe^p{rTnD#a7JLi7s{9y>#925Yjx(p&p(lEFQq%P zvZ2X+%)2V8{)RDctU=kGDD@Su4pK11ayzZKvnE|TLvcbPJwwTj6MG0(asj=|FL0F^ z!XELSA3YMD=!v*Y1Smo{&5c^(K@avkfVOYO68r+4qPR%9>H=0rl`a4;GWS~gO(VX3 zvC8F!bx(@bvEmdciDFl?l zquPz}N&zBtUzNL!1pe${Y^?JgI;eY{-s58-V(lZH_PjH9;+$O;a`n`v>rbs-oxEO9P z=|pE%JrM;d%E^uFG&amx8AVFE@3m+UuRc7hlZFPS&W#&LEl~rm0WgKB@?>L>>rc1g z=^N(nsmSJtF=Z@On%0L`;h8R0o$vg{j$-*$MAV`UY)FF^F}+S-pjMpEN(Dao61U8; z(S52-wshu9KDuB~6(!V6=wI10iMT8Hsr^lW{feOGm#OrS7&f&RA<9rVCqB2s2-;Z-9Yg)LBr_qJS`$LYh*<3a%!Z9OyP@}!f= z{LM)r=SU8yj<}Nx<2BiFX$U}=?igPTVi#KX_wf%kDz`k=z~u3Hh3=Op{=}VcNA2-d z8@Hm)AD63X`fbFe>$4%j%UY=By33p(b~0+pKY&qvVE1yM@%JxO-iwuvB{NvEaf^6b z;n@do;*3w3`{k$0!8#GD=Lh+tiEAsDX$jbYJKNxxX#BJtH>?4b8sceHkN#8nD+}2% zuGpAhr#P>Dxu;%jrv0I!2G7O$6Gm}0HcR2+5m99u5i zBh6EJ&k9bUE|I8{<6q3zwsC6pmlK@&Lv6Fqv$9c zfp-`@M6j>CG$?LtLInRzj%6^=+>zyVhy@O|ynLl;gp!`LHN??`X7Yxa)hs_Vz!t$I z!GCRKSK{JjNz@m+eC*R2Zf_Us8ZzzrNbIfErZW+Fanx{(@wp_|-QJsJXkA=KIxq1$ z*4yu02L0QC+g%?EQ#Cu}MksTao}|kw}ak3a!YHTi@r(c+_)U$=Q^HtC~R znZVExRa3Z;glK6uzjlJNky3jhrdrjnE5`#!fJzAp&vp_f+c9yeP>cS>Je4U`--qom z9MAxPY8Km<^j(R$p$t%T#^gy1*=d{$NKNA;r02r%aF)#22K#h1od(#9iMpov@I&BG zB)-v}nlAZY!|2J&;agrCaw) zdOo;Iy+`!i@fdW~WbgdC0ImXprx*e_Y8|Cd8Y%hjmBZ}GSDH+x3 z!y=#cNu29AV&E?WoC;YK!4Q(S&M3E^wSK;_xx_f@e1wtPBX~5`p6*tZv-)Yb4N73y z@0?p^UGRa`3YyaLBux%oJ;35iO>6J)9}bunzhpDth)1^zeI@JCS->!2ytp)0qc93F zUQ+atIhMlq-f<`FD;F2NsS|NJbvrsnc-v~`d`fQ*6K#?7KsHnK2%XN3om%+(R*~vT zu#K(=o1i(?AqT7KaP%bQ4T}%c$j1(Wej$~^+je2>wSk6N{2ck7(lu9{sQvU+^2*5j z-&>$UzOk4ii_;L4&mO%6VW0~(B**BgWiZ68TGx#8mSiPp^}4CZz@o8k4RR1Lt8o93pC1c$!dmcy0*S^HhNUxVNjU%j#nKvRUvp?Id zcZX%m#WAdvHy^$2wh(gKGa5!JPIvc_jc;oL*l+KA~97b=s0V0)-#MIVG;xXpaPn}O!q(!T%<3q{+qiv?$&JOgMh9i@yKDc zom+D1ZH_OA^7Mq}MvD)$Xu4&sc%~=vp%sFg^G-a?09ckj_KTFN9z}hfOWQ#Q*_{WT zoP8^@kxW6Rdba8`&oo<}ulSLsK{*a<_arHn2g++OQe_O#rT}8IqZJmsVMRV7#EK%* zl@UL56p?r@*R6;eNn9w{jd{NSwT#4kd-ctr{q_6a0U8NCKBP)WH!e@11OcFX5O8L= z)lLBTfy`Thgkka|4AduNnfcoytj`Y-VvhR^uw$k9mij3RT@_2d2t|J3(9})w=~sir zcfIG6pcQ(|`l`*Z9=lTLd>vQsfm3N>)+Fi&EWz4J?%>!fY?He;*{@W8Sw7Lp1#E%% zxdBes$cd>sWdbFWp1=$~z!yIAoa~E>gt9`MM18!2Hms8wRR>nJx7uR0)HMxwFO4`% zUR*Az2~be(#8;o%r>_g_990vOJ+tYng)Wz<=%M{tFcQB&N**{0Zgg64*=+fY{gvL) z<28T}YfK|3ZHrISf@ccl&XiRa&;K7q&G`4dYQlguklp%ih0Ir-q^tN3KS{zdWzo~Q z3&dIWL4fIroAFnja^(`A>Y|+pkw26zg{CXO-rl5jk6E6XY&l0#+kQNlBbv}QHKWNp z#ohCHO;)TfAoi!2)45I#=Jkq6Fh<0Jt^OAXsQ|FZ9#X@Eb~6kjf7Gwlp}4cu z&@3*lpc5oO1^s?}VinZ0Wcj(N`nB%OmrD>YlLBe#MBA0g-z(a}XvE~GEAn0bdrax>3aaKTvJ3)^O*@J&m%N(Yw37(Vdu98WJtPjK7p<)^3N@ z6wmiC-^{>1&C|RnFFxNm?{~ReSSDys0ysTEZ9BpXgYf)NETZX+DyzkE^E6$9`sp)D|@IAq%@Y{?i0zwZ8Mo zKT`((70!eN8H)HQKY6cF4WPNoUva-KH+j&WXy@aSCgb`e;ODFHZ;^ybYo05n#bA(h z@M=xQO3l+%D&d!%Hl?O zS`}8xrmWlZjlgvwf|WT=rTQ(e>Y4`ZY8jWa%&8GOxy=K|b7I+!KLqOo?uLp&<@lR_ zs9X4$)cl%{q)r<2RmGdqk$sL{IqkkWl$wVpQDvdtbO$&I%|zmMgpXf^p625l%ie7{ z7-$EzW}J4+3L=6j4rZ~{b4euwp%#X{B|L?KvyRa?Orfr88izDopL0{=zq3f)j|Oq* z1(;Tlx9I?lYwV{wa6PHtUVk~yEd;5&MQ)~~SLgGRt35D!@MgBjj?qgWuM)-lAYP%J z7%RS3hy45y-w5}zn6A`cOw8YyqgIPocyU&(s6g9e14}HiLb(v9iIHTi$r=<^URx45 z2njz|)2Cg(&Z$^mdTYF{1Ga)P*VPpvUBwB?pqe(jX!!wiXVX;V3+Mg=)WZS%b!5!E-?6JR?JTRhr4bXZP`9Ir*6-LB`?{N6$v3_aeSGAr z`fW_#UvF1F#+rf^i&?Ck4c9w+Z+~-d?R^q;9jP91%vjfqsG2wEOoQYKHz(W6a}y&} zWx|*fOx6s3+h^vsr|0|p;#XmxvWP|tURi4SRoORZpzeiw{l2xqw=vEkedOziRvab< z-?sRu00>DbC{4b9KC;orH`uX@*QV2p7FZYN{KjdXqbYcyCHToF{?V0VrA#Ww4$|Xv z(Up30LKyjXx#Gag%hfT_TL-D`omqu|uU1T297G`RPCfLNe9tJ&v54>`J{H;VBumhV z)^+Y*gAVdDBPkUB`#;4 znc7zJ=KN1unYU|ou7{s{S6xmmGOP)|GH<;3U=70HBz}BZEk~l_dUkaNVRcOvX$$}x zGOzfFGgZQa!xiHliuW|@<=`oq!((wU2g>>vGDx!h$e)=CV^O%e#6>hI^QBkMju91EJ4p!4+FWl0E;SKefe3b zk4obE!d!ao_f<)(>9f}{Cd6Nk&E}YPT{L+amh+WdbHMriRlmNyc%l`cf!P-kuu5tQ)m%4fgc%;Lf>(q-a-HO zcW(TmFfUA005Hd_`9>{T`+&XCJh}1G@6UoR^(~$J2mfBD!Zy15a(~?xY)|9xyB*UQ zIj4pwv!GjJy<3&I;P*61%)m2Dbu8S^kiqF}7t+1q>zZRVSvfftiDEm;{8keJ)MHKf zTyS0HQ_e!qaNS8QktZhCn$c126Wlnri)%*1t7cE820BouRs#K=WT~5+8kDeJP>)X%6|=*+!bEbQ`(Fp{XNs zXzfiu+ks=2TptMfQ$T~FMNSg^=WR#6+3I$@*+*XP(t_^dZauONd7w8&?nYq{SN8gj z6{tV>ghlJlt5AY!Pm)q#K>yQnAyJ1I0s-5MyLm7Nc7wOHqUh!=jCw2q2gUDkcz!=N zZi?1m>>cH>+jYn2Ot7CdxEdZ}3=}x+(>?QgE#r5%)(={bmH>U}Kkbia*2W!@_}hwk zE}l~JR*95Vd9q%;i+YxYmq8CSJ@UYLoLLoL+MeEqwqU$ftJR`^prak_oLgmW#E{N*&7pw#iggRWd zef~u-YxI~;xLH64yB9RR@oquHaK!#IPB45=iWNP9SlDseQ%!N~IPGrnPBbAf+fZ|A z{k3|X$r;RlEs!*mKeB&1y0)O7qF-?+$+T5h=&W|*jp_I?VXVuU8vYp^+~`rSo{e&!lt++Y5;amSuUT6c(pt?bDW@N*CXpy_Du|%!!qd@+ z<^kz4=MVc^lW93d_f>e8b%tEzY8;fT%ybrf%YBHJ=NYNM(GgqN+UP1!e)U@6g7hx` z@H>wOrc4&d>$Z$wP1w7J#Da5blawLf`NqG^chA|2ii(p_pPlIaht4*fs?0skur{ud zl%b04`D+#t57fg=-GVnGCz%7{QB-TMjD@E97dCz{dF?NHnAVt!$3O$&wa3u#zK=}khof7k?}*d*ePwIp^0ZbBj8wqN6GkrO5wR`ygO62LgQHn*`9fjWH-`#$4n#^~a$FWSIAm61 zTDt>kDnK^{e=w*pYL&f`gWu{+jM#1TMwuMJ8%7VDD?Ze$;AuV=ua?}AjTKx*y)Ra{PG<_MKII9hudr`h3iSA^3P9^ zNq%Z_eu_G5SetZZ{}*3x8P-<2ZVR`R7Fyh)xVsm(;#%Cb#ob*?ad#_{7I%shpuyc8 zf~2^UBEf=udH1*1I{RE_o%3`4%9Ur%=azBbV+@)X+boVT5kX%~D)Y7TNO-0m)CgYY zvYw}A<9KGMF#XYa^FwD=p>W5BV0hpSv0XOjr96ooGs?6qwmi0RxAC`+TZg>E4IZ9SxED^r9qBTP%+}FC;UE zj%5KqoxjE4`XOBbwB(Oj2XQZ}UX-?fGlpfr9q=xZ{KogOdEiWC$lv|yk=g0@=s4EL z!wM&mo08mula@@s+3vo`W(#Y%uI9!fg6A%&eAWA* z^{A;jIy0_vRYbjOU4Hg${B!~U6gK@jYsuyAsZKg&=wTeTAy#~g>is;Jc`kB-(N=}= z8u|6tZLied6=n@9gSFGG8Xf z=H&=KJO-Ba12_AFO|C`Z(z!puFD?H4=`;BQ3lwTUy0EKrT>7aY4>KKFJntFz!~Fs^ zc>Yh*%uB6>?5~f0Ohh$ZUs!bf7rvo$btMH~BqiowPgx|>;|>C5ILT;Fh=gw2K_^|X z*T?(T%dIl9m7H`kKfj>*6un5F9^DSS|LpY@QSTm&fu~&kTXHNbE2V-ounoShJeb^n zzogvRCl>& z^#F0{6u_oyS?0mbkx!*0I9Khl;ch+84mNzVvs?w965`e!#q}ah%h-qmBget^K-D1D|^YOZ4)ACHtGG=qkTZwsK!*AA4|o(RQIZF$=js zrPzLdpCOYWUUadXTy)>$L^#C%aFg^bCeV4S;%r-Hj@?lErr&@saN=$%0_nPnIFZ2U zXeyQW+6A6rQCQq;5qB9txNSu?W=8}+Axh_+JV(%&*X~v$7+T;&d5{(sh)}@M1IVuX;gqOZ9G>XT| zWv8jw>65&mtK1OZZ8EdzE*Q$t?fApTtnGl~!%jOuvk4mTyTZ7lzaOW2L9YSh>hJ^& z{dS#4ZqU@7Y2k7J&s%3G`N;F} zRUhJ;FDT(Fu3GcFP^~(tImkJd;>c0(%S_0OekkKlXJX@I+6Lu!G*!r-bT_lQFB_}l z4+Elo2rn?V_-Q$ga`ze8mjL?HD{{qYLN?*{JaO~&E_=$oGh6)8fvi+p#7HH!IJ;lJ z(R!j6VU#D8FYHBgsFO;HcB8e)Fyf6cue;@$FY3?f%Ca6v`$1?pJ4iuUJzgJE8Ga@O11+&6_3Qxm;`t&O6%HR|81H31pmdaukPo) zkk=u2u8MyZei{B9c95M;gscxueQW$C`$YIMHf}V<&a5YH97;iZOQJmFQ~8Z8MAQ2c z&%n73syxfA<$QF^$K2KZ<7>v(v{gU#QH5%^A4_TaNOjDeiA&7|B<4IG`Q|vbjgzU? z-%?|+tS8Z^Xg^|3Tav6H*C12Ac3+pEDUzLZcntD$(u1w?FD`7SdDu+$o3Z-N5O!@P zy(nRQEHbG8#8%u_UjbzG>-5Y<(3&+YF{rRolt6OTgAlaP0~W7PwYR!;4*8B~POJum zt5F^fQN_kcQTN}h;%a)5z9lIJyz5t1y6f1-F6IS%AIggjmo5F&rJLZJfc}E$w5LmK zgMZ98{P*K9tFx)vb8kqKqPc^(^}}~0ziQv_pB+odd9C8b&3xlw6U#z)h8iKT`+~Xq z!Rr#xOw=8R9jmm50sd)b`iO%<^1aLPQR%ZEy2QY-?{8$^X0lY}rvf1ci;y}8qoXdC z&MjzOkB6nWO|65(tRB0x`{}`D-qf+bs?#5rmeAbCAy94wuZU%A4)f==?SLoz6-c}A z8+``OLabZ&xYNN7C;X88#-(@|r~6~^+I@_bDt}k&!{P*wFzD7~t88(d^2Fm>)}90l zW*FR1X9Z4z+!`Z^80iLSci2qk*GHXh0xbgQ_P0g4k}P?l^ur+s(9D^jw0d2es=#OszMUI8U+Vahk7`^~q{Ri_Tm!}D5Zi%^c*r0a{4k;irZm{S9Nz<67}o&4#a z^RsR1JGyY%P^y^E0q*4RjxP+&Is37X2feU!4Mzu@bpdcj+%e-!Vqg|?+IGwC)rDuJV+zy`5x{NmTi6*~1;)c}!$SKHY{sG!~)N-4Y2ZRVat*HZ+o(5*(Q&_$7Lc-Lq1bn1J1>EhA%braXi>aEvz zjrcnHolW6%qnBB%Z*rjLi8zajxr5y6!h`S@wb(hrIl{G168&|a+kUy_{ro^LVPWf_oem@hH3RfjAJ>ID{G5-EvP&tC0VAb=`v&m> zS_+NZ0wLk;n;BKI(g}K@DT|Q(zmesWI*@kXey+yrR>2kmiU%CO1-U}kv29aYG@eGz zO(qxeY=RNBz8t0A0;vI~XWpHvUDds>jp2=+WH8e()blJ?e7+VWa2Ywz99eVn z*c*Qf1;L%^mLt(gaK8(2hSk3G-oNl==AxcXan7=S`6Z?w^qHDuxP(c!$EPLxM@-i%fqtKt zlWm5%kPjjfHl|4LANnA)c00l60xt-rbZ&87&wUWcS}l_P`TL0d{p9h%CkiMap3-WP zQqnUdTEd-;@l9P!#NYQh7ChLR)n+N~W=y13(cK1Ycp=ruqyU7g_JzTsWgP!!2_>8x z%*yo5dM#9fMZruv;9q z-qx9FJT|UW@*iEn*Z8`clt@!uI5tiq~)UTZtdYVj2x^e=o~mh?L_&;%;j z;@J0vpAy^hMNO*>eWpnM=3DI~vY4mM#@Hj2** z`(ABwqScvj+gWt0-k+gK4)(b*>Lhatd%5qcRS#1ahZp!)R|PN}#vL0it@&55n@!bN zi9*&nE7i7yEv}qAR!%WE%7Ys)QCr`;uYwB`%!gtG%Q?4BS%FOXb@0Dm=i5pA)z`|~ zjLW}sc}nY=-^!UpZJ{sVp9xW4X)L6Va)C^1`C}sXD8h;BNYK+Z!{EIUym-BhXZ5j~>+O>Hy))||PV6P7^Ttt<&AUgLOJ>T6{l=l2 z`1{I(^Go=m81Gu%_E<^FE%Ih&sT3{#rQe@jy#Hm`vUpa`}dU0q4 zEpE9vZ&a3YrU)+Mr}{R3#9=Z5ru@@--n@`bh$`|T0;Uk8sGR5-V+MbqhHN~4fHaSE z;AF)}@0<~OarI*rV0e_#cF6B1vwEMb5^lDN}M?D(k1=_PLl1I~4ctd|A0;d=8EX41oq)E&9Veh5fqW zS!94yCIWBr#e&_Eq;)-Ir=jy#M}@B!W0~ ze|@=Ftcb^~)VNL@L^>n>*zAglij0he^fL78$B$H0*x0E5^+*)eFX6GJ`n{(k=+m9dQx-m?#JJc9$cCTnOl?Aiq%Zx;<8!V8m-=mH-b$}Qnzl?= zw6|pybQ0H;U{t+HL%EY-y5H9fyVP36Q3>$n+w_M80g^>j0q5KtF z?ZQVgVQh9ZH&tgYrX6*$AVa1-_c-#G+O`MWT!iNQG+)D3QQntAec*bTbe*O@l$3OQ z&Tj&kw|QvayPuO?;@H+Aw~){p+_(5i&s?hN2&%7xjNW9sGH3uW6`ROiN(&1>fM$t= zzEiZL8O-Gdi7gK|3rk3HokOkPy{^H1!n#^L^q!RUBjDVv-N7xOm^6yxmv=42qnoi( zA?M=9eWl!4klQ1V>%0CN)uxb!HN*NTebQ7`Qi;p_U}H%u*`Ydz$zAHp*StsP221N= zJaabuwPE4r~XQe|Ti=Zpa1wV}<_ z8qiwD?#SaI2lIOAVNhEj$Qc57_H`k+1kBR`61a<939U#kA;4fDo! z$YU4Nnkx8L2QLhB*j1^h)cuQ)oZI@~@4Tvsir(J>@prc4vBQ*w@x&{L&lR1xswGcl9RTT)jwf3PWLO$+{Nf z_!2YXe-dsU6KlD3z?4C0xxV0D5=UybmG%2vwSat$YAM%0g!|e(IQ(^OCyGJcL7?ZM zIo_Oiz*peC4e}DP;G=E++7mvqI522b|Dj{fM%!Cg?{6C-=8w#0wn9 zI~#mgQ%~mA=jZKrcYzX=r__t7!d5gA%mIUqddGwvOBpXY==Ofzf0inAcpk4QZB`s* z@i=X2TQ>alIrLKw&j8SejN3dsE+aB`NLz8u z4Ta>>%ze{l4Cf>$C?s)F`GL#{MK7a2pl@Qg$!>1Pns+XGb+Oh&M7~|JMME5HW*)E^ z@IPYMze8G^J~Gm#cl~gLzYSIQ^H=!u+aXgm$C$9#&_-*;dZ$!0y5JG?&@J>ao2Ra8 zlGAX`4B9{OHs@?PR3M%5t#_1M)t^5g*~D|CXHgH58`A>NM}?&m#Sf}PhKp20C9LTZ zc10{Oy@vP4H`D5%b#SP#-h2_8mr$ViQsKzb&9)9jB)I@XybP#-bHD5OWbnO9+;87O zLA+65;HW)Yty8@2m7);t()Oe5D$J?<{`1Mbj?H`#DAk4Lx+b;@B>G02WY~TrL*ImX zP1CY``^iAGV;DqX_L;A^yk{BGn5B($UvftcOg9ELEvt_FfTE8zg<)|`{uyn{=@9Wu zd8R5`Gg#V=aqNm5hfLsv<#%wrGVEmc={ZgD9SGNIA_pKAa^GL*sRCHRyQ#+~o&}^> zOAo?rLbloQ13|hB7BN#p3zndQ_-!e3aTwF~o60Hs-h`xvcJD#2`(5`YiBWk4jHOH{ z)-l~(aOeHWpuv~%_#m&P^j)`eo29I0JW58hoG?<3ZS~j|HKMFkRcJz`QAKxS>fIf) z?)V&rwo^(9mtc~U=R<9Q8J3y&hTLn8cGXNuhm7r?1&NKj9sK&7xP!a5?>nHuWascU z8Z#?nj2c=Q1#72)Lx)OYN3mIaG~N+H4$cNE)>x++^dl64vbtJ)*Jdlrx>`Zseg~R2 z-Gi#)$9%H>{$qxDSqQ=aLNDYw@wAEOUj9KP|nR~y*91$kc5=iW`r1^U+m zk2)Omvw&(_0cq|8zOz3P?bo31i+D9X*g+86spcx>V%{SSh5~3E;5^@PFfF?ANOq_a zw+zo$Yo7*B+J`w7i-HsicT3 z;O8={y@rf4H<4?WpW^2j%PZBk`%B-l)YH^-@ug@^1mFCBztcyIuKrNST z_i@KkdK;yz2;1H3_VvSRz-sf3sW6xL8`|>ty;1dktKjANBgwjt=*_waguo->?&@@B zHMD7NbC9DGtcbJRY*y(J5jQrvatXr(=>Et~8(GvnjvP}kr)qI2qFg*M32s&xj?`?a zgXo*16=#dJm~pVc0$p1jfLXy&``d^5JGK5y(YGs(Z@ zop~f?1x49H_$Y?rn#&m=g-+lyeYMjr|9-iQs?j+P`t4J#8uQCTpLZ8HXe-TL$GT^~I~L63TuCi3mGOLQ2|W0Hg$qw!YAlNWn2ntXT9 zAZ0uwk*V0cAPQ23(-;)bW(m+mY5Ae=b7W|6PJ9z&Jmdb0U%ivQfjM)-+XWnwC6ZR- zpxFdL^@(El;g5#WvxgnTHl9xq*?{_EcpI1dz~9!ZB(rDSK)NdTtS z{0?QuQgcK0g`jr%;*s+-t4!twjIE!c=yRWT@9Xl>kD0c(s*7ALoYn&n8df$}PZW{< z)Su*T43}+)zOCh0kte!w-3d(LY%Q^fuL|(SI+mhV7DdTa9~#Cjjw~wmD^sSh|86fc z!T=Ih{}kKCJA01q;gd&R&l`wnk)&HMZKh8gkC;BC-Q}%S&UKJ*)4m#ex=JuU%8A_a z*v#(-`P5HeISbR>l%vR|Pt1Yv63jE;SDt&A_G6Q)&$IF zHt&;nj42v!>?Ag##%`IPam2@?;6es;BrhT4 z=7#^RG-xdK*d%7x(+ME*5Ce1cn>9UOU3rQDs`RjflO3PhvO!Y$|68T}-&w|$|DPPg z?S1QABfF9y}^`PSR+?sud6k6wl^Wudt^|+-1~YdOuW8TUCt>}#CfZW zT#Sw&549HkG+za{suu6x!wgREjOR>Cn(G*KKGXgl zBR!V?(Al@bNkQ5AgXnHZ0$}@hsDA~|6yd4pT;RCPTT92w!*k{GUGP;viia4)UObZY zjFp7wd`G=`MgsC`#T%p$>HhD0|Nplzh{x^$Ds0r&I7#;TxBu1EK&X+JXm2w_&#{zu zOwo!A8d{Z8%0NtEs~qTF5~UiaB6;4nEL4Bfq`wBFkm7zTRuL$9K4U0^6Tb2v&n6WJ z9%4V*VB&NM-F));Y?2ew&#v&6B%ol3n42FNS;ledFS=TaIqC*=O2FqNeC1u;@F4|2 zox=(Dg4Ugp^D7 zYx4whDt`G+SlZf8iq*~VB!QBLJJI%H`~K$;4!QI#*?%`_|Kkroh+?CLuTELy_p#Nf zBjcXDC1`vIJ$+B`Xv#q;%CWJwH6coo75m`(GGralRp&SDvXvcZX6$o!-|br@Csb?E zak?|BU<06o~v<@GYAn5m>c) z5$mS4(v2~(d#W!>ZTtq3^W19T8l3wvsd;;*&>Ni_lH@W42=WWLUJ@D_mPqntW1PVz zN1xW{=Xm11ZM4smT>D88Rj}^^eCl5e^NJyl`Ryl#rk6MayM_TNf&}OmpT)a|u9pre z3b&QVMNBF${Nyg{`lmERyJLbj&7L&YYR7>cEA15~98;MGyij+Nxs0~!FRK6JSpL=7 zqKAHLi^%``n@0VMXEbSkXU}H9>6OjApmoB#c-U*{$qZ+Ncirx5yd*T@(6^(wfXU&c z?Q3ab?n9zJ=9o>HCDwN5;FDV8-ZtP~Y8Qpelo{r}vizU;$Dex-!hXY?zoRHYuLT(Q zaG}EO5EjyeAu7W6?hi@wOGo(xz_w^49s9rM1K* z+*Alo)uXRWCaD*GujxboN=I+GB-`788qbl~47eCtPzxL|-K|7WH~ujeW9M~Q3RXUYe8#G9D6d!2Xx z2SZ$M_JPBj!)DpP>ABD-nSW|{>4x%Mhiw-~Ekqmw)U5{A{jEV0r_JMh9U9wQY58od z)FI)yesHbOi8aYjC=qf5_a|!C3xYeNpGO87i9;KN7%^40YlsDi>Gt%v|K5HusjW-1 zeGoI@1D7A4#@lFB2RDr$elL@dJCvqkft4eeb{W|x8E_(^K^N9B3D;?CtoZmGo? z;~^H3OB7fV=of4F+n4c~Fe{cL`XIhI=i@0IP8FIA8`n0GeYAYXY^km{C{eLZh`+qL%^6) zl%FX#s&5}J-sl8ts}|5o*U(vkf(lx@bbli_+ERyLN_E!;+M zb85BZx~05edU~djvom_@+JpNTD_2LQHD6WTLwu8cW}Y$d{bu2tRQ1Dya~=`b^7-hw zw7f#ttl`4UvZiM69$MztxsqVm&G10sC`hpKu@6{Wyw=dzBymJ%q`NoJ;#mP9!DTN>0#_)XkoSE@O*96||d}=v{Lm_H0 z3CsHeAR@>z3YM(?P18IEno_B=ZnkGIupZ2L)Z-gdHkqmf*BrG|`bd6pT=VO#)n6{L zJ;6ApKlV$})>&{dxd|sEDe+g|-`_X-^5tj2ODq3HIwNoI7As3sV|uepcOVd`tF65= zT3l{1T5LBHNTNsm6m`LZO$YQ5mEN^(g_8l;OD{BFn^9$F^o%5=WIcaO=a*UQ$W|qrhtxl{LH2cW4 zCmQtpx#D!)$Q~szeWk}@cd^}%t;-r_pGnR0N0TcqG3-zVVJWL?&bmk#hs>7sa=qFw z4L^Z>0S9ZwJ?ll^6zFJRi@3(;k88!M1PW%BP(!)oI^FRi_r%eHs<4Gv2+L;x?w2OD z0FZisgfg9;YD@H89-Z=%>CNS}#}}xyz6(Uhe$~@)={NG1H{5F~Q(1|pn&gMb+@IKJ z%pefn8R&_}y?9{Lxc4bXb6VLb`==nOudSWJD@UeY&Z0%9cih30`?eCv)r@@33QOw& zS9yZP`wEgdRyIDZ-Y3hOiHMc24)LS;if!=Oa(z*=?MyFBcBQjZRK)M1 z7{U3S!ujXDH4G+@c5vUjy_scif4}MRpDxzFuJxrn6}Iz!k`5CI_U#N#H`57vACKpl zw|!hD!OFLi0Zm2i#v{#EFWWS8^3_=VSp^fJeCDidmJUMp?PIyRd>3*vj(UuceExj% zfk&Zr=?~9W3v-vzrd21rk27zE3JGNSJ%UrnQ@^8^O#TVB^B%*6^ zWRmx6fBSoVYNTE&7P{zbC3g}4TU*^deV33QgL@Y)G40pEg&_k%;3yXffA)Gve`o(g zKX<;`UBB4w@$LC7uM?xAm=CpK@K_>lZu5`f7Dpnxs>j(DtJw7qqSAC4N+mZLb-99P zwgB4X z$cjEV?a&F1Qm|Wy2xOfW(`IO>&ct_i5-u|oNm(LaEmh$2H$2=OI@=%e!T%NjEBgA0 zRN3)-jYfoRt1>vw(qWEc;NDXWZ!_=YZ?P&LpDg(q5unh?t!4`}Yoo`nxtvvEyrthJ zcd>Lw#y)jpE4A5bX0gt&mdVD}!k(*K-DU_}aLINolDE=_P^6+1&`O%#AbD{1&%uxia13`HHk=7sv*JPPR8% z7=PJMe0sL=do`oXL_t8?xqON0RDICvV7iBDUD)UBk&<$pO>&V*zY7xlXI@g>UF-ba zvBq-vY=Lj#KDwX!&YghJF&<3Zv@K^JqMFAX^qFPv_`TD12HLhQm5pL zSKGstqEp zVLMW*)VSu)dpsT6Pv`-AH!F)x1`Rq>O76>Y@bgEv>!ADd+s*@DSEvCCk!St6wui_5 zWtrK|s>INaFiqi}&ItcSyJrWqMWqT)@BX%h6+Dk3f%zwHPmAGuAn-u&_M}eh+7VXd zWHM|63ATD{(2m*mwwU!Yf;+6%QxaA+eh2M9#sY8V_8J2{c7(VSGLTRNBJG*s0RuHcvhY9?~b(0A7hV? z=J71rQr1909kerq9L5KztqA!%1Aur?H9}};(u9tVY=`8IERK-feP`vg72-@QpZFnK zKtWioZIcGq+-alJul5Ao>_8qGd8|0^!L)AM=(O6hSR{|tv0o72h*+C)Rxt*M`Zcf{ zwTvThlg)Hjgcea#tw~3Fpv2i<=!>rJoByGK`FG*+=i#4D=5Et-AXAABT!@D;*uwNa zYsdzDh^(Qk-B$$~^4;b6Nf2^>quAJ^skC`i9WmU>o7zfOWjaqx{&*5Ucs6zTJrJhz zxL`{jlE7#AqnR!vB*Ph4c%-gc1TI0wgHZNvYbdel^Us&lYVv%9v+ z=do@bE~FoegVJ+rpd{1qY5|nRW`=@jDAm>iL6R zxCpl<$#GWN?6hJ&WvhQilaVTzH~y}Wa`PaB+Ik>Du!Yuufv{n)D4&k5((MWkI-V_w z+nh|%cSyF8Pb72OU92{+P|o6LX4WcggJN3=tb3->t|2x?&QJxwvOS0mKI&W%pV}I; zt_@&*Ka7rpDdw@oA7;NqY0rNjxD~^AvS{SuwPPCd?c(Q;OB7_|uHeUNgqVl3WAfJ- z{3mv7S=zw=I2`@&dS0CmDb%&Yv~G zMSQlhL<+h)m)mc)52~LqGPE=>46{y1t%*r2rLm);XNsck`goj4mrTv0sby{>VKbWN zM1HWu2lCdBF&9AcK6BQaCN^_ID*B{_2n)N%AR8J)8gUo#i@CVb~u_c*le3m83D-}RyU?DxD+Q81l*s5>v zBsGZL#qGCztv2$60iU%JV#_gE7PY5e)0$~jD<*0m+e$i!?XjJUMa9&jjasb&D%cHl zitltAk5NiQ^uCX~JooSzQI)K9u0-(Ln5QgYOFpbox;q|V944}<#z9@~uYI}nlID)Q z2ChfjjEy*Yfr@i<@?Rlry*0}4xpzW?lI~uf-phks&<#C%tv!6O8}nYIX__;+tTwOQ z*(XR8EA&NaM&;bfXxh-@P)@09l#rV}Xo64?q|CwZ5c@IYI>wPysMb~})au;a0R^q9w@TVehNm#0Y|5>eba)Fj#9tV-IM&0@I;NaKch>uaxDiH8 zqE`D;0BAIDvoFdBK@O0SVGv2hnoaEF1l{%mzdxM1ggD$lLPBKO1QkTK`hUX~jrcwG zRODME+uWKQmd?=WO?;n4miqwzuLfdY=ARlg{9u&hPeRxO4Mh=v5j;CLx6ZlycoNn# zL3_TPm!G0WL?pVJTY61|MwFY7KsDwK4Sf=SvYROIR!isxn<`=HTmK zD=b)X)oPG${XrOF+Jx74q83)_zkvrb_sO73zAK5rmkP~b-_26nD4+Bh+(YFUpigV- z<&jbO+%1uzTun_d?MwZm(>=bOk&hu=0h^fFo{}EJ3U`DZpFKWINYeM?w;e^GHcbR2 z2Pp|&{jn{rQn7E=H}E3OhIzP=T>CH~L1EInUMHoWpYCW>+r&Z`Huv^Qo~ZdXMubGE zOvAZ-wX=|hld|*;unA>oc#Hp~1%QTMW8#7u!8*@Bd7grm6~#xsc3Zu_BNAFfs(7xY+el0$OJ`g4^5-{K9mI6ER)O1L_Hvb z8848>6Jy-=xI^Je5rVB9pA+=@PO7V#(;{3O86J(5tFq}^Vc9uqU~$PgQt-ozSQ3 zlt%pOSmOtOcS$?=(_*$p3ZZoJkuV(pBnJ&q&+Hd)j`xs|EnCFbm7ekL?W9E>f-N}C zrrT#R;qTFHDNi_LfwBQ0%4-Y`a;)JJmh(CBKru)H^o+5=9FYdfwlq^OlIMm9$}m#h z0R`-9E~l%-hL6JB&5Y;cgmKYEKiZ-j8HfzL5@32$R;$Slb8BG9V4$ae`aD{Xjd59RYkI>Z z4O-r6y&LWIn|j~4H8|Z~x9LldYlD*(B9E64?z+YQs}^kt&Y z+-~-vr~*I$0f~XuO#$s%t%O7`h8IckQYyW2UYtDm_i=59MjOxuJ)&row6J0Mk=>6oi3bI`{Fsw{X3*s){P)8XiLJ^za?zIev_WfYZ z0Q9#PZL-D^(d=t2oU7UdB19URU!xJWU8@iMW4YrT*dG$sS!C?{++p=Dg?9*@nY^%7iDcb}jQ z!DC_4Z5az9j|{pmuOhaFX%DhS2g0!p`U}5__KIz51P#iXNJ{3=W)b6gjHW-5!Z3qU zh>2qErXy)>Kk(o8Wp|m@o@OdK@k)oU&U2!jG7J>?%Y`FGy6#lgGQW}&`{5$h;z_7U zo`V`i-}$NmjtQh6G5dwr+W1PdbI&8da0PA{{;}P3jh$vmvEeEqGp58a#=IuBc$o~Q zq_9+jF?}Z$|6_Je7;Ux;>aREBS;*XaYRC!}%zKkm?jQ6Pb72uzLM*BDq$&=KLJb7f z)QQWa@uBtd#~p*5f7Cql64N!ik^}vOXFp&W<51w8iR?}$y8(ri6bF2}3+l;IaWPsG z7MgPEDl5c_9t>&9^E6q+36o)`MaCAK(v@EcxS`*-STqCD5K2H~IYN!Y;=Te?iRI6O zHJa7AYebmo9Sh#~jGyQ&();G#^jqS#6mikl&R5b(azLo1M7vptc!2S&H%> zM_Y=%v>{k=2pRR-Su}C$t;vYyly9rRlwnwIFik{WG7W8l-LTeb>KzD$u`U8H->nM* z0-09dj7~5(j6Q0*K?hJy_9uLO<$m+2`ecGXPT6OrEUrz>7x~ISWb-OW`hw3c+fmOgsj`}~L#ypm434u%iK~U<)T8FR)=Rpz z=n;PJ6a8GVfR;VoA;VVZ)8R%$SL~1vs^PeSnxnQ#Je?;YRK>~U+hk3Y{ChF7QrjZK zKZg;Av4zpmO13#I|B~cB$XqQ}Ofz;Z-bJYbs`J^M4-C9)vs^`SQZ;Vq?j zpa}PyQ7$6XRs(l8eyvJ0Z8qXNd34p*6lvbP$}s4vcjmG#N%R1-;44wN{cRqOvV(xi z@sWJt5<{XCobr6CqHA(CzG#y8X}D}={XFrihwQuj*_X`7%!`JD7cDhfc~a#HgQqpC z%8qGkycBrU?hm%Wg&Ke*#`_po91Sn^%IvhL5>ebQ^M#*cn-BA7A~fR3z-1usk!(|@ zBHtLW%N+NBFigRVyWwc$0Hka=p6Ns%OQK`m(VAiC1v8PaYxp!dN z`e6~JeOxIdX-j~hPI&_f);gQ#Q<(Zgj^@P&?yprmj_C2Z6J539Ye^5mBZH*;fu_VC2f}84z~p=6;CTxTvLS{#W2}YdrdCT97IE3p9`7D^ov%65UR2? z$kn9P48bZE6+O$tZH1Lb!YUU;Vn9aqqfbXc_qe>RZwsJsK-~q=0FLbV>V$GWSITw z*Pyk*r)3&6(SsLx(&n+wI0OGG<}GD8$dOW-%b(T$TZd_jUB1{lsOD`pRM+(EZAGGX zOXzSsKMkc;iSQU+%)rKSRXt&)vAY4EL*h||P35o_4hPK#MT9t&jU;gxT9A}x!L-da zo27&DfVw`uTChAo{TD@-!Rlw5+>~7Ba71jVJS$Il-pmnzdgq7Oa3P(es$j z*s%9^gGcN*wOStXL3xa)CUnuTQdVK3)Ez^(Eu@`qaV6oCMd*+1VCpjx`$_vzbLXfJ z*4EvRvr^my9kwnu4ikxXYt@Ggrw*g_Q|gQL0ICGTm~5v~H>ultJ<*MQLn476x0E+| zesI;T<<8v`nn%yGwC55&W-z@_ka zew=Y#y)RmTN7||OAoHCljxQP_j-WIrO zJ!6;a;5ZYYmHK1b^gGkafm!Q1x6-Zu!53b>dP8hU+MgSO6+jcm3*A8g^CA(efTt@) zB0vJMb6dL1=AK2H-)sUe)au!})J)qQ1(VEmRCufH+XkvJ3 zf>{qI0t*?)+7$8ID39DsGV8rLS(zJ1?12$<6?CbGk**JPR10>z{5+xEyUZ@KAzNm< z`sC5X4ZfDYy1xs)c5=T46d@phEFKhakrVnj$6X^xX0-$zLftp0HrlF&A5fh#oWP*z ze1%gnt=Eqs_b9Dd_6eY6skQ5j`|U*u#?G7iyz4RBPBY}iQy)c~n~#~=5}KKc294^G ze}T%eGTJepQ(ux5h43z{vbmdw4bzoz;k|X8!yw_mFICyc#DPr}yS#JST6*m*gFK^1 zQcD^)ZMVU{av|eOCc!{3@@8Pp(8!wnzQi!;*fNiZS%b}d@5d+oR7J6uxvo|aWd&TP$O30>ZCbfjX! zeH3jXV>r&`hNTW?W-1(>mT!xDoZeqrbPc<|<5plmZ7r#y3C}f0;8JxJd&UUlsupVP zfbLK{96H>KD)CWf+&YE+atD0If@A9>>$_R&>e}ULRN5k*tNnS*nqc+bZ$Q^b?s+jt zmd0DDep2)DBl;g<*A(!B;cl!nEZR1y9;c^Dv4Y}d@sjalA(tpe@9s1|&JWVD zYKvBFQg55@um$7xm>1F}yElCB%1RlMuIJ@2rUGw;9)7casO2Vlihd=M7e4Q)eF~v0 zjsLYBCHV$B!Aya>!y?bhJ-Tuu`YAOk8*1t+@T;Ml7<`<=86Egd3!JwmYB-bdlg45H z8mW}!q>=HCLIzm^to(kxS}2xrdM&Lw1p1*v+OmS;liWk+iYA+?e^+-RTAA=^YE1^$ z0bOrZZF*>Vc8f#ZM6T%$zORs12?O5D>qd7a3&AGI{$C1Nq|mfV#yK*fCN0YeW*mn{ znMLu-F|y-DAvrF|R42k7dB=u*-zpkY)(&V#6&)nb-e>dds zXbNJkjE=*c>>Q5x)v5ba6ArN8D11g$J`MpZ8Z$ zy!AiPuaV+6p&^+z(1fYR4Vo1ya>KD@HPPY$n#sMfuUORIR>k9IO=Lckr}gCAs$Akj zom#X%3P3Wy_{5-g(~}lY?kGlF?}j5)t;!%5#Ls@RM0a6pek-%CIIIQJ9JTi_ohKAGI7i3~NAxvi!^|B(d zt-FRsX3~^oRAb~6tY!X6$7S9rhi2SdsWZp^(gVjRK!&FfL_4@Xmt-Si^`7IIg?_C- zev^=PEqh5GurHF8T0ZXYquluO)tpli>TnJZ2#~H>X}lDrtmB(>zs{&LS7;RrK_qfW zJZF=xrY;5v`|IT40$s!})S_0cs5HYXz+%)sDam32NyES7p<~_-Uk%*~6vFE8wJ?=R zt;6iSo6~J;np&9FuH(}6i@fw1W(#xXgrJ|p>U|FH({0CN$BGtdwF;R13sC}XIXS@NnQOirk%_@I@CJN zb)tE98Lmj0QWMrhSe~u)VGn~QdoPYnQglJV!)`;W@h(|Qo6g&_?&Z|_r3PF{cYy@@3q%n>%M=h z(Lo#e^yIZKYgECTo^3ThqeIM~)oQcqIwrC=fbkJ>E`<^KkvWuNh58wn=H`_i1DB=G z5n1#T*=2l9$$lGE$00gNfa@%qkiOwUt}5SC|p~ z*>W}Byxp-4o76iyWK$m`q_s4v()OlPf#iDDE3)n$q+ZL0Rjkb4BNMlm>RVbmP!ZHO z)^K^KBa* zcbj+s_Gyu!Kt;}_ihN>gj8AYC-(*M5g?{eAEKTFAYu@W7vSL-T0SCi+Y?~Djk=s$z z5i?uv44aZ|GX&s}Ij;#^*4lLGlsI=vBba8`%H9+elMPv{c!daSspkwe^n!mGX-#CT zTP<^*$O9wC6T_mYY(18}idh!5zJXVU_KkOq@ezu~{!BgMSstPm^a?9TCRY~M5Ok?U zA6eI=JNefQx{{}16d$`%Yfm@zpO3hj$phnEvC!(cB{)6~yb5wcuT1z?P01laW3$tJ zcPMRIo2NhNui8UsPJj$Z9oQIuuuSaqH6QvRtrzzU21Kv1{wFRG6TkXTu_bbV{ElFq zAzWnl13tXQJUr`_bN7Ql2eKvb&lMmNu+~ESgeedg;xiOSFYZ4=K;aO2-UAH$nRzYm z81ThvU23plLgeGZQqxBcEZtfbw#Ntl*)#m~ph^q;@HQ$Ya^1u=GK9%trO4TS?5vB! z+~B8HumWb)KC4B2{J_?tPxZ#U1JTC?KL6CsLE#7JTu_4)bn+E{i1?6 z#8)ETP7(+qiKH`a4mO8LX2ap2ik}uAE)K7Kn=4XUH4}?Ph{k^mFb>X0ejM{# z!x*)Kubk0=+%r?8j9b^l;^*A?o08U3pT?iWEJK0ZneG~miDjDm9qo`9Kf&VP6zW{H zY#AO_h?lpC+(XRY}r*)8_bBGxs}DvFF`Onj8m zE?S>2c`N%h%7rYLJ~|I;Sg*Jzw^VdqqS=Lu|JDYIzCnw-Yg#i@VH-I6QH-7G{OO{o+rmLLj&b|!!VO> z_}1z9$rs5LrAkvqQ=;xzpJ&|;zf8+?VQz$De=2zSqDvoRb0 z25^S^tk`nCPjk-T<8H4}@VuF0@4(*ewx)zEXU8p4aI z|HXGP_pUBI8~q_n)oVeIe@$W7Aq5Runo?-m&W`49_4R_j@E&Q4@-Jftx`z(h?rKhY zlUsL6UYNx#mH0$40+09Q5&cptY>+C|?$;={9^AcTN7IFB>0;>#y^8AtJOS4|j7AcT z`}_8kw4>=)R3}oQuX_6O^F59cY`466A>QV>y(@ItnNm+P(4H(^j9y7l9Gm*_OB;J`R2Ts%HlZ*4uId~hRKrW zeKgLaQPhQZeRyBRd=X_|yzQWK=_f#@%o;P?EKuaykE3=6{W4q)%9lPE3FBbvpC8y; zjLqghri|JEnvmDTz*D#u_&oT3zrZEcj#`%^U00|H3(5>W+Uy>UHI2<>P$B=St=+$p z(H%8shF!pY3vmwZ`0aef^EIx3Q%?&x#q6j87I3$(B1n|=?v34ThVnJ1@k_i1$NZ;U)bu^H3JoyZ-vze z6!7Z-kS)mv=Yg)j%i|`g)vsXU4c!-vjVg4GeEPztV<2eIiE)%M^@geNbr= zJ2WTLH3!2GG-R`RehO<#Tx--#<@>H{o72Rijo>@y(_d(%AyM{ODOoUY-xH7N%0ja< zfHonTYn!tEVX{)ae|tgThCVuP!b`)@vtdh@zA~#GnZ)YeraQ&n3EWbqrYN1VLdibK z1;xE+nis|<+#kwa6ij3lct;It3YWnU!h<3Y25!{IhPpPYd-Sp*C$TFCymP&CsLA( z-J`SlZ2{Q%F~Kj_vtGkrnbn^7Zz~DGq7@qa_bQtfjuAavMrtmxjY*rcq;gKbzVK@x z!o=05_BAChQkL8o9jfp*^B!*a!~d_k5oQE<^|&@Ei>w0kYEbFPtSTumRj$aC-<-}d zI%DMS{jc`3KT;pjeX#vwIKzBOS?OCkn_AkdbiGQ$rbY@LgLg>S9sLa!-F+TCI)egj zRq1u1FH&Yy(73a^kQ*a%ejO#|wF%KWv|1W?mD4X+*UwBBsXQB>Jz9AmyWs7gnv*t3dd75WU)GDyyny9WTzqDs7KqH zNIT!>53M-tMxs)nhPmj@k(vv`=&bTK(K?08L7~+iqOwO<(Xj-AT-;|bEKF&Lu4m#KLSo<{MBedWAR<^8t!#d)ZG2$APn#z~%?_;U_q&3{?95B)V5D1h$ zw{L793BqCiY4YJgn#}7WgG}ewv%=+re1U^RpIZC&n7=E(cuVD3sXP~crN+QCjMcti5ZB2KuWSs zT9gQz+2Db=P>qbI#1z3_|r+JOT|n5LlUqY;*ZNIcBp$L&hW~E>2zg- zj(NgpMUR!KTIBPXt6}ik1!V7Gw}uL_g;gYN08QtdZ#?-nmyO{;Vs0k!%PMQIy9XGI zt#%V42OtT%$YO`=2aj*MK+bq5a@}IdUK1(z&E@HHGs;nbw-^BQ^N=u^=zF8oIb#nQDQzZWB z#LUg^`@7F-;I*|wm$(_+3#Ppl;}D~l0KIYPBhN#+gpmohas|3bjBH*ua@&?7h^0H} z&sP=+WW5H(-at3!ugXu45sh9_IbYGSBZbG(oy<@2{c8K!x~cmZu&cS4{8osah`0Rhw1zo8ChBW~Lat4okmu6S$C(WHP#Or+Z(fJ|%N?S56&H z*ByCRg2w%RtOWd+0+D+OA5O&M`JcQ?BS?>`t;t?2^&UG&QClcn5IO2dW0$M>#o#AA z0!>&{opNq#;tg{O?#d8#EgJp8FOU0kp0{WbSq}4QMM`Jr=fk36JOqYvyo$48s8W_5XGK6eLA^a%I+Yv?RKXnPB?Mr%8tcF6OqY^MS6w$g;o? z9`-k;i9?Yhg391|1F>_x){Km9uAKbD8{^&2r~+N2jCkudQTA&Sx95pp1_#tZhP5n4 zjl&6@U~G#~3hE(kus!QYgh5#cXwDGpg24ql+yJ#9*fGbQABVljTFQ%KI3g${*E2NE zB2hUZxDeu>8mVQ^Ua06p9l_k=luJdw(!9XVXzpIKz-%&aU&`I<+7f*Io~k|aLB|lc zP^vq^yhB-78DF`d1=13m^k+Q{0V9h6d_L+-w0cSd?gL6?vTq$4GX+%za=yJW>RQbs z{$rbGB(`?yoDAP`Sky7ojCQ(<#i`>!^uOxN25qJeS5e&j@PAVy$&uZL2%qA;Y`qLId zomil=%{WbO1b$2BJByGz2e#l?j>sO&R}$v}smdeU*>qO196(-o`o!a!Vv*Iqendd1J(KFw8$ps7&YZcOfyo5(I#87 zgoOBVK&cB*Y#C-gG)F}4085SRTdSk?u>jb7QBGWHklk(?o+Wie1}hgvrQF#g7~+W{veGsU%%%slR;sm}$=rXwJZ_zSwim;{HL1s>on ztI!if4^V2>-5mY16u<)NLZ6dQp> z9cwp8j;Bcqbt~968#`-oq_oy-nwAK7P>=KJzrHG*DK}- z@WN`eNwv)?Sle3`nKp=c+52G0y_FMO@@HFzdwVfpM?{RUT8G7dsJM2lgp09@2q=WX!&3DG)HXAuSJL> zlnfJ*(hoSzaO_8sQOfSham46*NVX4rjaB8|%Qnikf4gr ztTFj|!P4;ZjS&&(KYlMGU7Wv+zp?N7;2Wm8>$t)H{HeTn7`tNzHq!zEe9<)_vj6MR z=wt~*7UH>FjXQ<79Al7%aegS$fiSj7wm~}^ftYD@Fw~2;lVTM5HqPE}Sf4c<;~3e$ zJidA9RBOgepHOr2Lq~!bTJpQr;#DP&zz!j`B6Fy#PCsmesdp#Ka}EX=>BK zcQQCxmM)@c&(ZD5V$SJ(3Pf%QN6B17E$JxoK2m#cveY3`U0Cq1qF z(WRhnolOh{lGCUX{3(fqHxZ1~XBJ#vL)O1A1-D~Fj`1wX^jXnPu`iWY3m^8GFm1z;Bf zq=u85I|8epU(JF}ffQFn3ARM89kV$M24eILxSU;HN}ukF9VFjn5C36EHB-m(nR0zE z`lkOlAbz~g9O&9dx@g_Z;=8fTZOA1=I4o-BU6}?v(CD*>;)KsQ=o+yDN(4hT>#K1O z6dRtnKz}oalr`1M4j}Cnq&u}*VfW26K6aoztf6W8s9edrg3F0)Ly&-D{h9}$Csr~R zcvR;hWCDP;BbC207#{}WKr=c%@l<*hBCM<7!0+zLH(o~OiipbsGU$GtO4JcDfn4MS zWDrBg)t=4eS=ROdDpmz&?BIA{r?Ctj)z~F$XZsF?O&|f`;b;>I(gR)tq-OBPuq&%2WvB)Lyito022KY7ZtTYb#nWMGHhR zRKO%mhf);6zs2{l4=2)}$XVbm5l$>&i*D!&RezU@xvPX$5`ZMrJXLgWU`xzR-$udp zdBTNmGbAFx+8dBR0H^^GnLeK?Lbn_$lv(%?V>i<$wxFheMGKPiQGTs-SCM#^KYl2+ zUe0@K9)3A3SBc3#y`|1wFA1@b_4m-S@69A%ta@5~skA2Ba--0?o#`u`Qj8QGo(Bgt z?>bhn!x_eFWfwdu!CT(lvEC`gTq|KmvpF}YU7V7Rk(Esb8=l4hNX2aBLV4Cg-dt4d zpN8ZYThnD%d?0w*Uy};HP+L6!)Pv*ELIi|e+C&qY$qrw5KJctb{c68V3%27gkGR~Q zgdnerGZq5>A5I%oA%->W&%qmn2(N+lukKlE^$lsmB%h(3%i_dPM^42}jb_~TqY3WT z$No>nfjtDrgdja;5-u_*G1NJf_WC5|Q%L4u3m!9^Y4QH2_;hQEdNSFSeF&|z`w2gM z3n@W1gS}ORojh(vbR?T|5#Nnm{Ocy_B_=-UMOPrsMl(-!7g39&MTA_OUVC5gZm3Id>SAJQdgR^dm{;D$+(zw z2j51I74@i$nUi%#x;jlP#z1{S!?6Q0J;bpj1a7J#)b`HBX)$U;Fx!8Wo!IOl^XGG} z%uO$*jsX01^0*mGz<#7GJ1SKb6s9&r=%PQPaX^X}@eT^0f>Y%?oQYCg(r7rgp_Ngbx9V)SUt)5GiB;^am}&q$?{#<70J^wY@{X2jXG85NFY(~DSxfvUqI z%4q~TNYBcG2?wLEq>(=j5+;M@R2dxem>@~4bEMLD5{FZ^cSM$WBNL{b@QDHoKh&R} zN&e%FKYt*f0_OA6_o-8M@lOJa*>CY#%xDXv-<_|A*xwmo^m3N^&2V$pm<|_i9Qo(4 zKva=(o$h@ztEs2ZwH9Q0RHplSde-2Ti*mt&93T%*5^=dlg-~4NfWIg~7OUd9;Msn_ zfmlG_+yIc68D5p%eee?_G_%N@acFdN&vDy8s8Y?L?;UOFoP~$-W}^WH@j)KckjD(! zmwve=#)wWv@sp^U z$nLM)aG^dy=1eY=0OZ%VX!tbPB!v#j2ezB}d!;zzH=SX5+wMQRS}w%QCiTlxXDI3HXrj(i4u%FCAS#rCWMc$r)N3%)D&a zTvBt@PfBsofH3h9FmT5ycKcb=2N%$eIjlKohsZO@j3w4j<9>wUvnR7Nqc}3rVPgr& z)Q0V3Gs&JKTu9POZy+khlj2JZB&AV#F)x)|{qM|;RMVoUbR>kEKhl*pSCa>wo(5#s zemdxRb7jR*q?f_cAPK;nfRy;$!?L+xb{3Qg<@!tu(( zS1IN(6K(u90&j{&66#-qIKGA>LQeH7*7Uo5X0Xl34c;;)I8+Hyv@QLt`(RVA&}p)L zK{3QovvMi6C0*Z96PD`~eC)yxPZ@aZabrLHlpaZT5we&Cgoh0+u|ehD{Hc{rvtx_{ zBc7)&_{9$v7MQgOkvqp5E*bl*hb&S7I=#asp7-q%lxf1Hk8h2JP1ZyYL!3ydpDJ{^ zST0xqIdB1+)9mZ+X%>Ds(vZf{l<&G<qMfi(ynu_b49waY}ksmaAriZmAd=S)t`|DXmV4&ZLj{|dgH zHvjE$IldkjV{s%4(OFO5??oz@{M+yPBs*KKuo~{~eXp9W#l;Mb28g6%~I=`4fj$9`B7oMi-j2bmXQcQiS zLg|06k!{=lm;TCh_+6CAFLdOYUghyD1XktaSit@F9sRyoISv;ta`audysbLkomFXU zfjr9KGe{Xg%}vG{W(&`)rJ1H%Ow!F*s>x5>jXz?3ZPx6CGtg5#I-huGxxE-12QkeUDT>N z`4EOEzY=E+CXkmx8Kl@w35IQ!IFM{ih(v`p=!W^~K(3avEyp<;sS21_e z@4WjKO4Mc}WOVSGKrSG!N;37%QYR3o+ac7f)8=6&8a|&-1)U{Otz&0wIj4e_G)T?( z&XD%J?nRp5vttIF%sYsozdWL+(r*kVkp_@OzBPXMV7QatHYorKA;|09W685T#{m=p z-Jh#tgV^bO+Afg<1Ty4+98=_ z&{t2lNiz{RH4Y&ojyu z)@>GQ!TuN>^7Yz18y=(T$N6?!2!iiUAsU4nj}a6Vl|=QvSIeR^&%$!WlxLd3Lbzri zZP;zgDoP%vd)FX?Va9h*W2Nx-oP^d!eN*7T$( zKacp7yx_1x(IkMtZ0fdt{^|BE`fPl4N2-+}nCdD~>_@tC_aTsl*3NirhLH85e4VW4 zwKTW&cb|r6qsrQ`P2H=lwJ4gV9a9+z+t?x!0niKkrj zA1)`980Uk~u{m*?kgZ3h7O%X~9Iqz1&$45c`gQ$}_uJB~w^vIZIV^CbM(aHi4ivr~fY(C) z)U&U_ApoLN9zj8E{>lsk{<+M#RGRvXMOm@AL65C}TCH`KEIgLaojM;NZa`V!7hX^)gIu6fAq-E zI|<&we3i*Ur{teL`BOYZGIK+zW{Eh1=q{ni7aV@mG7Y6_@fc8QVW@qbAC!4H^cK#F z;5XrHf&w`dBSI8LXH9O1C1KV^p;5{O{q#`;;Uj}sKoeG^sr!UO&?Cca8!74%QIWkm z$D1E}&9E`(;~3#|H*QqET$15NlVU5WuzU`|mo!V{Ay$FCVO`b*SoCaEol|Cz)?5xN zc>6JX#KknZrIWRuEaTfvEAA=*@EfXC8yA2gOmL~+o*v3Ju10%a$?y|NHgx~8{$Ahb zc-!#`8&gTG9Sjihi`iR1IR3BZ_^k+(TOEuUsYD;PC~eP5INvf~-`od4l`AFJOsbBf zD?9SS#fdBqFkNa+3HcOgeXGo?Hf!?>+dXamV)$uVdw!-CbHg&`otqDD<7OHBTTGCo z?B{P^S@iOcY)vZ+^~{tSs>29Xo2{Y&epsV9uj6U%?_eW?^yCV)YYC5f!UH5Vz?HEI|(#np5SRbYDn^g;wyqB|9L_A(#fuc}!$u z2(IC}bQtiOF{A87aj)=_Y)|r?8v72i06X9OLFy*D9<~cy2(7WNsEq>lhnt>i!_JR( zZFv)tIKbO#h|6W^Gq~WI?!j!3!ILngWcuyCI7x`~JBAr}^lY-Vrsf^D}^(|^dHad^kaG?bwpc*K*7i|>2nq7SNai{{AY_8RM^i!So zdwW8z-)e-qWT$WiXN|MV*GNOSA;VZbvkT9ND@y1hG_hIDZM;KTv9}7RX#%Jm9_7@t z$;!XnXiH#Z-_msI(3ui{HMF^Pamz1HI*LbGn`7 zciT79RF0FlUh*@fLJCe~&>usc4~x${_-d71BYp_$0rlG<$%m)hQ(q%JUTnal$hr0z zpRU7s4C)7`Wm6UJ8;~r;7F=RWSZT<&d_-^lp|Vq%=4cT*?l0+ z-)6TeD(D4;{WU5!S<8Qvs>&FI1iNONef+J8mJe9nl>$254gY;rlK``R2p7AEW$NlYt8NS1ETFKV8y@9gtkzZZPhb24rR)h`x zTbB#Z&-|K>lxPpI?_V*YymA_9QVBcuf?zmHI9a7h`g)|zU|E7IFo9aHufIzLgtd(ipf_I`tP6NxnRQX1nzG5T z@L0yrO$V)w?5F5397QJhXk(yhNT4Ltc#H2%Kx3n!XveiF$p`)bIkiyI3RUG`lgS z_-Lo(T?_ciX+`taZ`Y8uO_jc>YNfIt9BwDywW=znBpj21?g5E@1k-#6a;FoAV$DNv zPRcJ$OU3?AEQZkOq^v~0A=N&ao(R75q>&$oT6aZdZ;;+o^>-2p=K^?hfv+wv$k}p8%pZj29D}2cgBS6GR^hLtXFv=N zYyX^HGFZ!oUI6_06Y0-MO;7xkD7$i;w$eIW`2tXbmM&cB(DBIYUduvPFOH<41D4Com4s(jO{3@7)9fK}?%A&?$2fRrqXH$6 z2WzAKyT47kpSba!v?4DK^R3I~t>c%=F;@u2+7@}9eVJKRyTEfj{^boq-HkpsNR%W< z?r+MV`QT

ii^HlTj6Lb2czOYVW+$8>Upv_nPI?^f>_ixuf(SN=3R2<=oofvI>A zyCQuFmj{`2Rc$kS3m;;j6G}_lXO2voh5UEj#!!PkfW)QW|9ZOOc6!bSI9mr zf-|Ra0I?68@`oD|`-~_EACdBh?9B}XBpw{{1(*i2l$NERE}E zfDqEhQP}vGl}bvf1O3T{nRKyyCiZnOs9;2C*4g zEhXE^>kLYaCYEgYBdEy@$hmUMw2}yQYtUE~GmF^bD_k)(727oOKlR;VrbAWSu=aw1ibakYLiv{=r`qqu_LJnzijI8l6(m4~tX|D{UugG@)%7vbwWi5vx6;w>!UD&yJHZm3-F12RisliQ= zexn9q&*S%dORY_Ie8fr$VW#t^L-00QtC1&^fnVuDUG%)O$N_Ds8yf5hHD? zZo1-e=WoBVt3(Og>aURQJv3l3zvQKE^BCfgkVC300uqT%WL2cJ<6vHPV4RAHH=OxS zr70gdw3wD`N0eg?`>KE(5uG7w4KL?SwpDC|4L7kgady*!U(i^;PW+i* zFjH)bk8%nZEL`3T6Vzmgb%rjGM)dZ8qBMJ^^^g0INCN6@Wtq}`Fkh~8A2G3nw@**_J{6aZbNGanW!a~|o z7>gu{pd6LyCPSZ$(~TA2g;r6-`?ZQnN&wjRr#U2s?O8B)A65m31hawW8|tvoTA*`( zJEt=RdsK-bUn==-ofy!)Rrz5~=SC=$j~CMlkA2OXt=wO!a>uE;Eab0T>eqrH`OlTp zk|AX_?GfGbG(LEhJa@%)m(n3CveQ;s5nN$^0BkzCOSWmHCzeB_tXI^$c=M#hhU`NA zILld4lvrbMY5K(iF|#<&VD=3Y0VM_g%{x1X4C|cRiwH@Kv1BK6zk1s>^KWJZsYtg% zqY;bc3hvT+Z^~Xt`|=x~V)Is=St3q8PNX|Ye2c*U{WybNf#~HczqZAAdK5?3=Le)}{B`GqaT@sY>szqXrIzYUNtP_nyqUL}doX|!x-*dDH4$L{hZ ztIZ^1k1WMgK+|Y0X-e;CrlzwYcg3WG10+W}Q?^Llh;uLX3A_b42LNWx@ZxWij~_N`iv zkXUQWbzjdKqhgxVnwro+;u7F@S#M6|X(C#C!in~7I)?h===avfmXO^|VcRK=?T)1D zN>*tNO~RIFgyw(DMMw=9lH#=4BJjZoHHZ=8AMBFp#naP%yWS_`fyASw7?dwv-(=B# ziEe+Fi5i}7+_y8N(f9Cmh-a@5TFGeGdhp5cf)Ujnwy&RvT)NSyrLm1Peoj?R%%keX z`S_Vt`33unKvo%I^_3?*zuWig$4mmMyRR9Lv(XNT zUTl{;;mI4kN?H*lu>0C6pY7QpNH@otQ0S=?`_q;Rb~5&b<2y4c-A@@`eFOqUOn6Wk zpSadEP*%z7^Cybc7fLAOzM)@w1q<>6gAy3Kz9T75I@&m0<5Ja)en&2tC?R)#7~e0F zo&8EAVsJXc^>Dp}ah*`$Vtn!WoI$wqVU5l#3Wko<`98Ss7pmI73freK8{Toh9T_Rq zV#W_2nH%$qxm?7IL23C$xhkXl>!-kPBF4DwXXi-rM}zHEnH=JR?#}{2CMZ4?T8W_^ zfm-WI+T~Cy(<7?wO7|VGqdCHL7!q^1AKCEsJT*Ey(*>O`JBRXDkU0I(MlsE`#ysCC z7CuV18gGdeI;=+t<>CxTxaNnd`=GzX1*4RggLacYj2nHfhLM@!^Kko)LN+x$k!B z{MRV&miU*gDgC}l3{NQ?*#?Mrzg$CLabPm3>oB&xiMKXl9zgN(EWg}aoVl#c&uMzb zEya5zDxrRY+jr*_V%?jJUo9rV(4?9Zav#D7<~smrCgz(jPy*;DjdPi>mE-!K{bi+l z9=Kp~q20XO!P@bOw7SRdHW3pSE?w9T=Pl>+EVfK5YBtT{QrU2BivF+K+o!h>D8Z7%Ibj z26++aLs!7Y&4$8wH;M{JC4T&?B<&IiEx@k8Q~HLEq0#HQA+ymXIUa-qTRJl)dlo|n z%c6@2Ts%N>&Ba}q{=2?|kh_N3uyZy|(wJbCU;`LMMs^!z)wD-{Ni=p0wV$Q(z;8U6 z&q9lL_a@O2g?Le1{gd09oQd@CY3vkdhz#d@7x9yC0#O*th*Ycutn1*H z+C=Y}x<)^E$;-+Up~lhoqFYdlO(-EjlK1S6de_x1C=sCbHWalD^qbNYy46h;>AHA^ z9hAhmoS6D^kFQ+}*F+`w5p&>8#01V4FfEl~imR%C?CoUPQ|$&X!Mjo>^z-9x(x=tv z-1u(>o?Yzgq1I?P5&@4bCvn0}lb(kh{^!Ks720FMFF{6_w>ZofQV6LsV-T9+frmPVP?_dvdW z+BWu;F-f!yR=XI?4Voj3Vbtrwj%ZGFdij#y8cB6hNsgZ|nYft+%Gs(z?(E0?8VciO z-1{Y$X27~vSUcL zMW@IgbgVFz4WiC<(f^#*)$C0f4EW)ERb&X55dMr%q3b}yNpuDvO8npTn;09oIz#Ny zA$Rqni!FYOW>(G}A5L{XKK$3ufPtdbl68r1wd+Gd%877++6;N8$QWuMiwsT7gzt;} zv!RcdMMN)9Tut4L?yk>|8GD>zCYhP-tIEMOIrmhsXTjmj3@^Bp!tBOUJZ^BqtmWc4 z+xKe}!~L#I1W?q@1sREmZv~#sVpE0q$^>gvRi0+n7;KU-B(*Pj;~nSmR&^G(Px)VT zaxL|YZ#Ada574|fp`B30>3%rvZ#D}pG0LhoA_pj@wW19cEGnZv5n)`Q?&Q~sd?1Vz9y=oo$6z96|69r7#%Ca1E?+W^|nA;ef2Z95H zI&E&2pu+}@FN@czh^QdZ!h3UGx8U=?bnDT#UtKstZex$&lWd=YPuSmzT**Idu*K5f zl6Vq2bod5v{bgQh)S-3bI%huDCPvy+Ps1;%9pIW5f+)1urxd3UW;{GGoCFrS=hFxLO$FA6LS=A4yVlzU(U#`ASQU0e6N?E}N>0EYRT;&_cOOMQAoF&yL&! zv!jF(p#pNj&*%~eB}L5@f-No$>hMD6-)DFVBER?5=pp3Q<;Q==1EK%#_kC=+hCBit z>g`_14k|PoFKUIhyV3^2?Wt5xelvesC@V~dD<1r`pmjif|JUS2qe$_6wYPzKVMhSu zeeAW7DUmb%y3jHx*YpSI-3k@Xsx069{o9gl?kj!?+OAwaApuUk5j<@9x}cAxf{HFE zz&I_UK(!aPKC7IVFi<77I^-%9b)JbJ%zpv>m23n|x250eM|T&2w(?ICwB_T~+p~bT z&8834oT$)dESOu82c-jNntG*)y9(l3C)>IwFwO%nyg_~zErHZRK%eh`;7Ua^dA|cE zSh2O5Rv7y8^-Q@ji%-Vc}baSNHfAN$FxkBYs5w1%Hd5dS^#~ z`$>VixcUB7KL!7+2lXQMDtOXYbQvrSnnB++Pf+^U}o*sDkol{VA`ziM5I7s3)qbT~);He}gBc-iT z+f=X&MERJI%?Z8y&ak4?LEleGZtJJv17`gc))M1UPkYQay>%`bF(KrS-8`hoc!B9B zV<1%@eTpV+^ZkUKEg(o^J>M2t|V*U-j7FFS{`+Mj-g-@o7O zbo_!b23oJjbK>1M6DA@@5?fCCTPvBg3@y4n28%-fq_`Iqe`aUdc|e?@eM1e)lJNRd z!%n+w?OEZ5>^{E!_G&^-VXZwalZ#ApeehCZ%$Jva(!yf=Hu?Owe2Hqe(kTc8syR*- z{P^pK-`a7Wc@E6UVureE;9p|(FSQy$-C0H^S9R^A&!XvI2mg{Q2P@}_|4FTwT)qR4 zQMwX<)wlS19Ys;q5q^#}4>_RkmohlI-^w$*XU_$mCbaN=Ulm*C0Li;fs>YSB${bI9 z9w0zVoAcV_2?fyJy}(AI4<3Y!2`IHAU);LRtq8Yh2B1>?HJ{btEiBZyHE3cOu?O8V zmh0(rJHoy?cQ)+WV6HN)yW33*`uqEcIJ}8F@Dn!jDQZ941z+twz1^XZNZspr$d98q zB+%?6TCQ++e5h4LJSh-$0Tn-OiZnH6=5kGUEL_fzFf7QnzS@1f^c_kYFP?*iHKB z`RP2*1H#C(9bR$ezsmqNXoZXdA7Yh^rJSUbiN1R6x(rRe2`3sP+O)91zc3wWNMvC# zhB2O=i0ebYj~woeyLs6zIL9(R+6lCGx{wCEEt>ucucsEf%- zJ&ioke}B=)*Os(RA(d_Z!Qol6T?@5AYLF%gzIpXa84-!4G;Vz}pK8hDE_CSa`A(H) zjugdh;`xxoRPParr!!%W6W_Pw9aukLSak~(r6RH1+RLliu;o+fVvAe(w6xQz4Ekai z0c~0p%h&OAswc(&ggsx`SH!4Ie9B5-0Tyg15%Su0_qnD~i=~VB@3I#AcbR`O=}SSr zNhri=h?@9jpY%;5? zep)c`Doc`T0mLoH$Uq+`iBftSa>tr_A^z}x*m~=zD5H06cmOF$Md^?R>5v>+=~4uw zK}u@qu0cvxf-tl+d_nfoV_pN8G`E%lVX5ag|_kD%9UfJSA zhTAn00U61**rn2|m8`_w%voc8i%Y6$C@`73kI&`&hYDz}c?xug^Z+d}HiEzxJzcj4 zD~H9Pk$jG=2u~V)dUhG>oO2cguE~TO^E}xh^8lmKmZJ(8cRt?JM!OMurGuA`LscQE zH_zIB`6ktBklE0A%3fbgJ@Wo!GSqlM%bho%0$(p)tWTx4l>h1z=p#nSsq5{ydGEts zBG+C}ELlaB%L@PaZq?38wDxX14Qo8q?;N$HKQarYbD6zPHqtQ0$$PV1BK!PZZjmxs zY(ru`qk->kE?y&5ZaJvcy()w*!j(-bUQ*@x_ooqVd;&%d+RNvf+A2rZTX>t58S4q6 z2XI)WgM)_nMJJxmoSXY=Ge9e#si>$3A2HwJT`lgs782?}E%+t&-x2jev;2Il%P$% zKcy-ej5=np!0i(VoJme|W4{|>#l@VAr%#{)S+h9q2f)HRAD?G&Zg$qyH9$)Z4c0FX zTh<5A?C99SmpGOSyoiVu8?P=&r+lfV{KKGr|MB>+YjL z(X{J5Dp0Q{=E(X<6aUFBKg&;f;X}4f9L6w%D?^yObHKftlHRO%ui5 z#@a5`x|x9Q2Q`nhS!}p@ddqQ7dddtN*H8j=>)R%@uF7`GRc3Mb-2|)=wr!oSMp*&< zx$1!W-CbMV_wUV?(dJIi&ZY?4FR6AwmiOl;%*HYQSv?I7|0%}i4xAz=9MSO)M?CXk zA1D7G9I?6e4@bc7eRU1Uz_m_o#ee_yw-Zx0FgYcSEZytE%UX5ebEkHPT`7B{%2iqc zcDk>KP`a!62GC9Va$_w;b2~9Xw4E5;lD(0z|_gy6gX zREU4z$glQal_=K#5xXJSETZe4cT+3DK|7Yn48ww_7D^9 znf2EY7I{vF`k}{(D*#h6Izs8vjZ1M{mr$Q>QC>RALnfN6C9mlJx=Wk?KAp?jhLaa{ zm;U~{OL0CN4SO?pqyD&qmXqje-83tUDv0gs zsz2@eP86NHu_AO!>hwE?sEXN}YbnC{424VKKVSa`&Gn*{>wZ{Yf?Z;?RXPU-OENer zTf@1ynb69T&<*;jXzIN|vBMfgdtumUNhTM8`XVPX$e`=^$DmF5L zHAR3+#jvFIadgG&KMzq!6fDV?_Eo6j@d5tHRgI24ZlsteKVFlzehbH^s4)+-NwzJ2 zcRO}ucs$U!S15`st87CwVP^6s-ur0F6LB$uPAjz5P2&e$9P*ga^+YImbzQz9<4PR4 z*^QVv0%FT9hrLrBqAg}2k>S4kWCP;3^U{(LL;o~xFZRn|r&MuSwxd+tS1TBSH@5U* zm%lMZS~k%hOSJszWn7pYk_mp)x9-M}V3h~YS=*(=mtgq~tJ5Tr5w>pa(5QD$`ieTjGe zD}1=zfjxW2JoYBeEb!?qU-NI}3@!HFITl``ZKYEMcw{sW?b!X01ZfSi_rWH>Rst5K zWr$0n)w{drqQ}fz(#pwtczGJX@;;!8tVc=L|4(ZF;G+Scg#KYkvbn-4^f1xTEnS^n%H0O_G7+Vyp`WbwLMM-y3$Z+)YiUx z^^Tmy*zC*i8nE+gMp$yxo5uo7%*9nsE&zP|KsEN}-6WUIHQ&A0S<@?{^piaA%{*^P zw{xSU4CxrU=R3_@h|9i!*0HndfVhXQBJCn8o9avVdMuIEb z*`aq^BrjZ1rGZa`7*Gan*4ix0*IEhG z82Fh|ca?J^jBkTzrey9TOuoZSCe=$%1!y5)@= z$>oid6YOsetJ>;q7W$D&;p)caf|Jj?cCh^2va;umw7n6X(MB#WtmFwDbC0sRb z66-pMyv*(zKIJUWlycGJnmZ?R3>KFaIVTsDE6SHOaT|#@s9!fi zvAs^TMaxvUubwnE{56yYn)$iyb2fgNnXh%2*(&ae=Ft{U8KB=oNXPs3NuD+RhI^<@ z8zlt!@v;^J1N+=qZ({hye`bmGsUod=3Y7;YjL(|jga=09(FcwLe*AhzYJrYeE2Q{W zlioYy%({AdHmRYK z=_~_s%OAK$Xf`TbXMi)3^$x8)C&t9-dWgK&eRMUo+Dq^j4)d1o)N8@8?)|UL|FFB zJspbUZbmV(XHd~xsik#)9wBUzuDuVE|#Cx&RG^nZ_Aas)U+YSTv|6Cxl ze}J`FglZs)xusHB6>&=OAAYu+ z4lQ(|$J(nMt&-fCMCNBbcyA6{Agu)&hE8Ob#{+#1<{irA0oVDu{0nh+1jJhG{%%_v zn4T8PHUb~Auy0?uy&*==IZK;X;EF5n!0V9na(>u77H_vrISngY6XEEKKI9#AKt+3fi(jGfx4w)Mv9&wCxFg zil-;Ppg8NOf1alE`%J||G{#nJ5BFB=jZ}J3-3}b=ssM4wd>>zOE>}RiHz9yC2ZQ1^ z8S@2i{oeQxPkNDKN%%vi$5t?a2{E!B{#w=gf(s_>-?`MLCdzg+8zcZ)j$XI2ID$)z zxF0R*`4m*|{Pxuk7kcK@oQ_Vog6(SC5sDHKQ3AkY?I;Vd3p6jhI|mqS`9<4Ov>*a~ zBV?O)CKXz4XFBav8;^=|Dx#zR$+3Bh*(R zWnoxL@71ZxDwe%}tb!!O=UvH1xsORK{l$?ae)94Zc#s^t?Cq9xssce4K!nfbh^LXS z`*WwMHYI>*X1%DNy|YOq$Ux+;A=c=vTOPC$z}{-N>3PJ~bRIobA8!}$x|n#t;Yv2F z+e9z--4NxCp>J$yE6uu~Yh-lL-`|gq&%ws#)arBY0vO{oT&rj+D|9B~F?K^yOL$G~ zJHTj3Wqh(f?sC(|$3Jl7pavpIbjPtigw!G{{R0C=M@y|nF|-oYnN+X5_yg;j7*>*^ zI8OBc)2;vO*ik)u+J7(>Q@y!S9ENZonr~-ECYvuV&pY2bYWHR#nm3N&!$z(LmJR0H zhPrn&T2l2D+L>43ZCXFf>i;&g9$phYG;vACBgJ<;qI zve14O!V&Lmg=lC44MJqfbRW6Pd^6N-I);){Ph8UeB=1rcQ!!wqB}v|g^E}}2e7=bM zTL@hm-EzB`O#WNgRMiwRY0@os zL^V~CCQ$u%`*ncpgvZxledYH$6oPVXEc{+kLT&B4C?ix(qAPk$CV3y>0cGO}EHf{DGB zSoX>prHZ8H+@tE486_M_w2Z69hVF)CpSC1%<9reS=+jTLa6=^6 zaV$*`RFw%n-94DFScqEscv`Z-oRamVFT-YOvJs(TqeGLB%E=f^Y;lo|AGN*NXr$xP zMH>J6!=o|KWIdmoe2Zm}qGa2^_pGf@#Z+EK?Veq~DlQ)6nB)Qpcwl>9=azCXki?== ziQ6`rhj6I{>TR4mDe?!|Q%`&nbzUi-;UX~`xo+)=$C6`xF3n+#Y&`u3x z2>`^tc@U3JDWoQ{W`6Fykpm4WG;?V8ZBu z-9v9DmApBRS14MFufCC%f1LmPa>J?Xx2#F})2sjub5n324{GSoo_Www=2CCBZgy6) zuRA$JKs&cUkUBa-xk@N((Na9U(++&=27f9*X6G9GQ(oXJ6XV z6UIp&yWs(qCgR^)W(s@HST2k0z+=Hayhx8V7U%{wjJt+A<;;S~%PgV~vm&_{A;O`tP@AWfc{pP$oBLi|Nisfpr&JrbULg^1tkN?uJH(KaLA}2Sqzcjmqdz zueRM+c;xQZ;?-Wt-Ar088wY5be&=or^C`LFL{4w9R^DF*@*`|){VmkvV)Gd%vmTy$%lY~lU#%WZt{N!9zq+zzoL)uKW~u2&-U%?cGZV+Y$4-_|V_*NY+m`r_ zDs^^P(>~45P_>qf#oTR)$JBEBrWD?uq~I0)?%5y4-DvM3Tlif;s^EA=E{#`I=@ovqjM84(4K!Po$Jd#GAc_ ze77!la@q=t^l~mK=`sT9GD@nt;~D>45$lT&wl<;h!Eny1VEHX4u8LKXC8=fQJ8}B! zHOX}j(eajgU2WDW`cZXU;&4G~P>f-9m3yi3l7^RfMFW4(Q38j6-)!a*E^$#|H)WVUnNoh?LXwd74vqmHT782A9ZyvcKk++96$~|c z8k4=_#ewL&;_aA3$7`ij<+uJjbnXHyHiVvv`hh-A+vCdSTe8NT@@%%HvE6Sg;K_?| zRTJa>tc%gL;qYl~Y$FXWeM+&caw_qhj;IWYSul6J0ODnNHyV-c_NP+AooRE>SwDXe zvw~3}DVM4d@1(^2a=`+_A7UMTJ*iEmKrPaB(58tpeBseEEi{`NSX4kI>>8UYSPmm~ z#%#kzQQQ&Pyu+(l?Ia3+dQVx)!jXE_6p!Z$3X0or;tEoMt4++d=quCa=js?JIa+5R z%9{{$dAJBdkzGM69YLVZub80GOc_$z5&YYt^cufRrct`xP^@VCgLPxYZLO*My|Y|5 zmQVwq3>G`dyvhsLX>22~f@efWsrGKQIr=i1oq`(03hjeq{EMCJ=`(gJf1UV#TsNJf zmQ%);px!3A!)mD`-q8Y;7@)B2udCBq+_EpKCKH~TX2?9mJ0Z010K_o_-n0ugI5&>) zwu?@afT*m;ULKcwS*9%1-GDjt zT>p|T0#C=83=&}39fbOS6!oKT+(eKa;u-~Qi27e?puiOlV5Q54koa`pTs4#Ja1E=? zyte$qJyJIgCbpGZ+8vMdd%$vy`EIb>;>PXwnm?5t?)yvq{&jw#r~~d)K;dgVj>a!z z3>YPTt%VQo`+_obl-6Qu724yi!8NF<;i$u0RgW|f+C5pQMG!)`%6OuPN)QhJ7h05; z0}HsnX*}{j@;!`TO+P;P&|N63C%hCo8G1=cZ-@#FSQm0!+W(hvB;J0t7I}ygW2JAzYf9;BJV<6Ea z4NDP0+z-(R2R4TmPhU~f`#fO*2z|>Wcc5>x!9_tS2q{QmZe^1gxP4LQNoh>8phN4_ zvZCHnKH8;PRq!jYu-L!0dZuSGGNZMnpl=(K{XXX+vPvJ+%IQofPodLE`xz!-lO zUg?U!CrjKRwSTW>^>3w5c(l<&nWHzf*CFk?1%!d@qj8FP{B~yr3(0J)q9$_J?o>Zt zXJ~!R7jxf1FTGUvYWf&=&D04ET+i64t?cU2dB(}_i)yOao76NdpcA3-;)(uZz2Jn#={JbuP*EeKAz}x{HCun_#aZ$#wk6RaR^NkE% zpxD(*PnEY2-8jzGYj{%S4~i#k240N6xcnnsU-onnDiG6_1I!Uu^Xv9EbDuoxoq$$n z-1n4;18u%Y$?wsw$>?;b30lNwNJnqSKH5#4zJYJpN%S9KNm_2eD&E?e=k6n2d&)+v$#QgQyiQGb-B*Z%=6Ph{QfC2VyL+^jG5$89_1J zusXO#VRILQMJ*e3Pd{}?#}MYmSV{1AN!-r>K-W9|aM}B;p}VsCLo{GV)7s{(pr@_F z$}%97gp+)FqQ$nmT5?0Fnq+>(f5q1{q(3~gR^e3FVGUW2B+CfC`AY4aSH?Yn5bxXW zqhPcx?ms2yILO9-*nd{*x~+bBy`8~f=ol4RO(o{`hf2T-;yt&rIGyiqOC&t@;)~)3 zjgw>g-2;C95Lk~eh=0+OoiV>FKwil1 z(-`8%|7ev3t9ygJ*jw)K17D{pPV2VWxf**xprxG@_XlAZCYYmI3d48&^d-08rs~+8 z)-{4%_V94dXVSsM6;^uh6vIXORzuy}>6`b$v+v9GVF9YV%e$wMCSF*4o@*FGi!Y*o z_&xkdWz`F@)?$+&rptwSdR|c-Vl@?>{atn4s=bQ4QhX8DhtA15%;9EkP1-<C))RdVd)G~)67H)i2{VnQ>O6(RMMsdqB zPBP8gMS!v=_KZVoJ&}v3sbdK>pq}pGIQ;OR!fiQ0EzR%8`SD#Qb-ziSeTqSwU!y^z zOCFkSdCTed+xeL`E3g4-`&;abXI(@Q5*JZ#JhTucga-XLE&pc{{E2k_<**?HY!I9O zf2RolcamZ7Z+Hhh$|VL?kw`Ww>^ONhmjBTYMnsxJuRpz;L4XFf4=yTCh;1 z*x8ce(}(_~76>5@h?JQ6bh1n@wJp#12~TOeb#OpMndPc9FvNbH8QE&$;1qZlGeVQj zL@tB<;@rs}Ys)|+n^dRpW_Ro3nWKne@JYNc`JH;-Y`_PeLh>TM@ijCN3;X4dA}l7a z0+p73+OuA(_XMTeyTPSD;c*Mr-wFD6%EpIErC4Yxqy-GBW-bR2`uxn{lhn(`dI&ik z-^gp997+Ib2vGb^;W_hM)CWE2T3u)PXKmcTG`7yXfq^|{@x8GbuBQPi*BNCRo90i% zv+n(PV>86HEPa#l<5%&`8!x>`Ua}v}G10iU&HOOfcG-$?Yh{;z@{rpsai0fLACDtr zoC{iHT{H=3U4neO7Yqw$|9J2M4HU}gJ2jMWf@kCuD&b%e^29y|=w=puAs!raey%3Z zcOjtWiPf6W*SeLITpDdZBYO>(o2d>QFb?9#xCG2O{)2r`+|v>+diB zny5Jw1^BmwY71$!%g^hXNT}tprew6Dp8)@M`~H??i%Am3UA?gn!gcFDfbr2WGQ z2MR9oZyDmCS7!GJo^+tgaM}ySv%XY=#VRR#6hZul0dkq8scc^1<>-M9trG3r(r5HK zPe)1HtYLXInlDQlz`zLw5(`+9N+5as-28w)iQ&E4k|QTw6$lyTA`n;*fi$U_S}%X^ zD8FWa6iCo;PPX`D&G*EoaZNSPpx&a~oAi8tW-jWNSVvSU^HJuUFghs3PlSty^UWBTytz3DtD;;D6!X`b z$P<`6%fkgz(K8QdWC+F7p_HCYDDUlG3JZ8VEv9CBvvU2iO8G^KsHLPIzn;T9G3-W=UjbMNv)%$NP z?HsEPZ`$IC#Mnq*-Ata~VZ92to44}i@lrfui?1OLzt5lB<39T{G$KxRJ)%c3m~s4^ zhMN{SmY&k*b}pDzhe+_-vp5(^TAH`RMPQF8049YW{SyJ*!KX+b-U^3(qzq`K;t;*8 zjQrL4Ve`>1vQMaxlSPyT+#&`M(9|>~F}dtWp4Rt9f>-|26e`4Z8RdaHj4&Um3B@C8 z`j<~E8T{DGX{wp9bHBxITf~_HYM8{x?zL z8+C*Df6CA5t%R1{PeAoJq>sN{boNz?D9^7;)!??=kdS~Z?-tdHXQK25IOVmg6Rn`b z^&GgrMpP{?yTe9p*8n}wPp=A7>P{ROUN%M6$1sY48rinA3k;j&321=%WY13poFdfh zfV7G2%yKjmn=00%_GN|FpkdP3v+i~6P^s%H&(uZVcKAxZgrKTf8eWOfI#Dy zkb{K8Xiu+>d!#z#zQv2GwTZCQA(1pkTA8WT+JhRE+Mdz*E0JV zgquYifB{6O5yf>4Q zUTA@QA4g4&0X~H>KK?#L+;P}TKd?Jlsb9Ejnjm;MS||c#uWL4c++6;=K&Kac-v~j% z2%zvdEv%-tr?-OiTqecv*KF#F-v8KO?dRx5fV zwy6-}Z0@7R|76Hv0WAz6m-?3YG=_8B%Hr!D?CDHq@-hVDh70!XS-$U0xnIEk6Mypl zEKX$6m>I>d$KHCCAx-hvh;{z#qlExCSAaw0MJ9W)^^BXaE|^N1p{ncC*JOm_X_tbe zjo4dWWS6TX;>pd+`-nDSiR_E8O6(puk$(thmpG7dRUEjguQ>9?qT}k1Tgf1+16630 zY*~XkT?kUA4N>Eq=@W~ct~Xj1x#b=~8#Z=KDJd;*JDYd_+|g;PpJ;*Gq6vp4yg2@H zU5aBl`j}7qQAMpLn8_J&_hs>^)d6#1Gj6U%JDYj}Ur13vUgWbgvUf4!ZO>(Q!zRWc*1`5El1X@*TP#ZWr_q>Clv`B|-aN#LdPVPlAa#jNp!!1CQl$wQl#^|{0229` zgg)?#@`}oqvzKp_>bukDB&Y-)8@_$y-RhR1fuz{&;+_4P5s+TA-6=#MT zC4#Iq@-8NRT^~CIO-%eEXy7Xk7OL#r4c0xF6uvMd4qlO37edO7itfKrc*OKq5$~Se zP&MGOh0v?igtjp}#W?MwFRXAC;S2i{{2+7mY9yeHzO{fqwiGDgn>j-G4E}hi>c!;I2d38 z~}BffClKTF3(dY;ba z`S(6tO^m3SRK%>X4GFW@@pL}y#QrE!*IRLnW%R-uv2WK-JlMuaPq&zh@~^Ke8mbAs zzNR=aPXduAsb#_CyS1CkM+=cQW*y~oV*sip!_8UJXg+i1+CQ4z9l>{0AZa4(9% z)&nv*$X`2ZAi);W+7y9fWPS>ySy@fvQUcdeZJFGGF&?+p1osMQFuqfZ5p%Iu+FND^ zJy*Ant>Zx5-eK}Dt4zJNaoG1NH2Vcuigh*3u~~8d*i9!E6A`b+ z@;`qnoR6ms9NFJ)>Igf`3u4S4Ct-A~XBv*L;XXC>K#bB~UMx5^Ye?Ns2CTk?M~Nm_L75F+auQ`R zAGJRQoO$5w$X-uUpb49M-jVz-7Jw6q6o6LW47>Bqvt{X2{asvCWG_4THc_o>P7p|Y ziG^)zLVZ7=%jI$=_ENy3wQi|7{oET#jU~5K?louSdaisBKPlte z=H5Rtk*-8`j;?PRCJLt1>GjlYc=4FM=S{pc#P;~L1vZFyqumURaVdfswpMfp;W=OfHK zYAUY1-KO8@?cuV`SfX9)h^if_(c^Na!8eNdCNIAI17c*qmayZW_P~4Sy6og{2_NKC(0B=Fw?uk@@a~d z>2gi*!fXJREphbIB?fF@dznh$t9XbVzFDVJrS^J|McDg;*CFU=vd%GvFx?yB{rYu<&r~&b}mC^+(-Vcyhv({3t=M5z)!kD ze+ORna@z0iR=a(1kaU7PNv;W}q6JYJaG}9hUSgkXVxP+1;sk|p0us8VZcqnyVVYM} zHhC8uMun{tbr}^E{%0Aq4!+3@x3E|1)EzT16i zjQn%;&g9MJG4K>3t2_14#N94wf7;a}`2^8&F_T*Pr6Khmfit6B!xw1Ed18O;bMOPL z8F)4ntm2S;=y!sf$+5Xi{{OrHzB)riWQ9_6GszSq_p=?ZGJUIPASO_{Yww2%-H=!- z{L{X{-m@phnM5a8s8QobvU3C7oetHGj{3{*L*o6GZrE2(e(s?no*SR~&yPE+r+&ph zWrlSY1TK}ePE5~NyH@~xPMlIj>POwE&R$^A@b3|ouj}1asF-=xmv9T*6Y$_%61*C0 z)7VEP#yQo@#T0%#bQ8^LL20L~F&ylGE0a1dFd)})gthlO@5G_-Soe*{ZBl>fN4`r7 zT9PZ0EjAe1-L|*N%xTLWw+5q|j-uTk%v^+|d5|avUVrQXM|Su!84votNu?+tT^Sil zGtL-=23phsc}7ZI|Gn=GybE``v_!+#I!`&Y8vJRNb@p@SyGsuCeLpb8Ltq&ULSodo zTRhm{$@?wB$ZY1cP=W?P0B?D&{~Y1?dTBrW<_D7POm7Px#%oRwBl{K_8ZOxW?pI=13q}{c2xv?;Kz1>C=U+>}?bY+F|9bDZcO{>e1dr52h(0@G z+s#gcF&vSrJT^=%36$aD+EeItdWfqiy9$ z0y?;uuWGOXcO|6H7&IWFP^R?DV}jTp&cBt*F09vAqGq~2W&3S80povSRPqwfnK`51 zW{w58iJddF1ARx&2a9d!#0pjYR?cv{wN1MIBN$ikZ8v4*1Yb{CqD}LC(p}3h`XJMxn){Rjh;Hu-n);z=wj(Qe3C= zNRzv+#Ds)~qkzlix@A2S2ZP4gF`aG>tpY{TJwsaX)c*6KYy|1;zZ~Tj(d09rju7)B6F(%yp|_;psRwHQ~azx?_e;Nuh-q8o~8fVc(QoBL~kg zf{A_m6X--8jXCR`e z;LdNIk*vrR>ooivu<<@#SEMNP2_zwrtY~pdoF$`=wX=JuX-~pIlvb?g069 z1kceiG%|?LAcYg-$z5o84b40srAPdG@TdfN9l+%VX7Ei6fca_Dp(4Ulb@j7s%0Xzutjf*0D$B;gh$#G2D4hi(>dbZ(Rho+1Kn2(&> z&!Clz?)nowq!M2L4BoVll; zjF~Ygq?y8A6K;e*$Z`7+!0L~zI9~)Jcr2RN-12HijtrP+3tM%yg*S;P=d{+jm)q85 zki$GEL7ROV8t4>hN+gW`xuhk0T7|zMo22LTO1Gb-3 ziL~zTuEp~}-W|W#gDj|I2w9cc+2{DB4{PzDy zyb=FGokfN3N7ejICM8!n|pSK9}Z&^u3 zdznpQ)c^U3z^O6_ATpH6F30!^hkl@2>KckNYxH6tdzSM4e+0W=qs2Gd^}4Yc0#Gnyt0N>V~oYf}&I8 z`$OmKpaA|lcP7JKkE{9e#gml_U9Kl^&F5kwopw+u_nhX@L@x$YH~E+jRxjMqo)k%= z>j(9NxLTE!T4)fG-WN@+seIJUo~}>AUxCWIu2bbmX?D85TO@sX@seOE50*EU4Ww;p z{c;x^zS`jxZK&T~|8{82s-HO9ZaPZ{qsQZ38kn@o)|zWKTR{Dwqg;FGCjD2U~)uo&H9 zhwKX^n>o&_=X$Sr%q_uNaliExJ3oPE0J4Mcy}9{|-+3O>I9>xhUd;fy1~a}8t;b;s z6hs^GN)u3QWiA7(<1o`>Fn{gw_nIyJ$PraF2QTyplQk1zzPLl#lX{|z*Ii8>Z)O%z zLusS;@8bpr4ouh6t27jeC;{c(IMSLiZD}Qk-M;Dz@YroOKbf#Ecn28L>*6DF2#fs_ z{FM}#R{`Bv8swQ9A%HhBVTCc1;dBt5go^pJOph2GD?f%@qnk&B8pD_vda1)3`F#zu z#SyyDS-It(Um|UP_Krr&jc0AFn|?HzIs5Ji;hNXXYz!1m^L`NUWX)yDGRM`5IV6{| ziv>3Vhtlq|*@0DF5A%ka=DnmxUz}+a`k>v1T%jq}w8A224d*k|DSTMOJt9Jli4U8e zwURKrQ`0j;@3{Bn-W`!Fg!6lk$IQ3yZ}Eo!a3sEE8PH*sIOn=(encQT6yvx=W0xjm zTQagkz36dHBdKvBz+2bzh-Ys4Cmo*+xRte2z$={A3e^3X8HzW;v!#K=b?I*bi*kDJ z%-9YYjcO0li=6!qDSv0~v*3q)|FtD+=4WqP+=Ua#3QI-Q=nu?p0Q{3`R;W#~o&8a5 z#sjio>%e0f#iN_6wiX8$(#wUBf59w>rI!SJTone)q6`-~1(}RpW-$VegXtPr)4gQH z=6N)kh_L+#6i~9d+r^^*YXJF0h&`Cfrcb49r4ahy2=?`YRhoP@Hcm(L4}tid^pNd& zHDGoSh#hxf5F1umL?q(IDqi(r;*sX=JUxg68zA!j|A$vbMZ&5HDIMrle5;Enw|?Vv zT-^NRy2-b{-M}DwWgyD=ha%%so<5avUg3t8x(~=$TtTCq?fT}I{(RJqtgn|Ha`_%F zNMlELyzu><DK#aAc^sqHZ><}FY9apQ0$vfU zla#)%?T!399Q5CBUD&)kYja?&sb6qf*v2+zx0&JLa?jx4k?%U~-ZotBDl2Q(vGwVW zzwi`$wch{mwA~uEj~=c|?#q3=HWYLc{Sw7S9RM)M;X^qg`e@(g>-G5Vh0~laC8VGd z7pE3yZhd2s!l?~B-mZ}p2EP3(0{5L}b(HDJk0=<{ID#2ln&1fspeZuen)F@G1*2X~ zi^+%qPRsfITM$WJ<3FYHqMH={r)3MHQeViXY$F|HR z&WMcZ2aoHAfi9kn&1!84^ji8oCsxq&=HDXuwcWW5O`)gmAdwsJIam$~3Qt@j*W=s& z9ouQ2f$w;{(|4rin%2CX67)Oysuf)pZOdnXIHZ%Jn+D6O1FOqv*f3;9R>7{fOif`@ zmwD~l+0))6l!JPqcgWWVLdDmZL9q%8j|&(m!T{CYP)pmEb?9~xnK}B{F*5wb&x{>IZb-c? zfM%Z-+094hV=lkB&Nx6iNFX8GPrjM8Sh*hqy~8{X<~kuB-^#&&Y2VV&^ zi*Xi6F8MIqW40`jSwoOe%TIvAv3({j1Vp~W_rXQlL@s=XWP+k}@MpbeGk!S-5lrAq zGDMBC853PA+^yPWRXB_Wx;{OH-7|59^?cKV*tA5sXwnLa9yZupYfWnYv$Z%gl~2I{ ztpsOJr)u@vXTCZYfsIXNabJ`XJlg&D8>MN;Fl1V|V;>}Eg&o~MvbamOo|q3J7wPCl^ygg<)pQwN|oNX9#+gGXAb zNCoDcU_d*bC2%{C+HOI8-4+(_(}*CaU6Xyhws-6FJnbt}>?B71Sq(AAz2 zAi93GS)G8+ol}Z6mko=WK0{rR(DXI)i#s;z36+r;zdb4Z9?nUCA%--1LYJo6W16Tj zyJ&m9v&09iVKc1f6oBj+#HxcG;TJxrb3Eox7iFmK>E|a{rs-Tq2YpW&zjyxPjl%Z8 zXrR$JSY;uqCq)OD`5kNa91k(PfUc1CopYajZ`(B(9lC&E>_4$M;<~mz!HVRS^meT_ zc)0NJ&V{9tExwT%S&6uZ6ED(nI+1nr9V{UAQqoPrUno0(gQBrIl!*A3V&iO1fxk+e zAQyh4rs7PzyQ9XTb{|EwyD&cRNRw0zX1`oWRU0{-o3D4D?q;;72SlPJ!M@=mE;a_V zUWz#tas7~9_P@JfslSTi5=uX|Wp3fWxrZmi*`) zPLX+VoxjtmDS=0xF(`04^>|yeJwdzUcm)op8R@6BRoU4x2hH3tvN)b)$OQO*(T6_2 zeY+ab%>Y0$+>m>tZ17&@r%>)~U|)_VkquB}PoH<5=S$UYKIE6*uJ$ShE1;vBm1f;InkK^Si}sslY?Yn=A9PQ3}4# zA-A{fkJ#RWsvq2lB+j(%V}3uYqe=7Ci4*|Pps5aLG$E9ysY#s{JJs|)so4BiQ2Qg*=jC>hYzM*KkkUM8Xz!+%t-L(rA^DyW_TOtMbh+C7_1z2)hZ z?l9^n2|o5xIHBI4XtwMLD3&rw|74EY&v`fhWG1kprHgr_Cj43?-39p~ zJ-&_DYK)1gyNR{g63kE@c_u~{``wz%C*-V84*mNR4*}iv`AJN)ZzCP`BjS%V-mi8qb9rSU_l~|@&21g5Z%F$&3 zk?n1a*Z8I9PU5?yj{%WE(U^=rKg+uIYO7=;gOG#R4V0PwWAdtpM^=)8U2a`yLD4mV zQ@+i)$pd-4Vm4(Njy?(2Xkrvj?gK^q2E?**qg)7G(qc5WwF9r!8ViQ{w39!VnSSe{ zXC{Z$-|}|-qM}SLtlkk-e#vy5Asq^|JakUbhL`WMh%DU-g z#7aR7BLxU|2s5l(_%y^N1^YzgSEJHI79t6b3BI`eUs)Ox?TBUIwr6p5M-<<1Z6w$9FIJ+c0wV3g|Y{lf1zfYw$ zr|=FoNE=Xa{WsJ?eBAGJ#vzcF!d$1Nz8m5Xwx_i~SaJCBxvkvSd3&c&+-&v1T2}Vf z@l`vW!cV@epZfEA(RegJAtoTxV6n@;|n%EWNXQcbI#-QOTO~WCaJy0l&xcDI>IhrR%-Fg-T*kVY**8&W;?|A--mO~z;LA2u;$?c?iV){G>2a2v!xISi`b@t@J^onE7 zY4!?BG5py#B4`uAys%?^M@1_?b=6nj8sh-IPb=)faaktsb@LKWv=2H2hHtd)XJa9+ zx?unSv@NaI=%m0BR5}l4HF(Tg#J5=6Ol^GyxLLssGXR9y;90iBX5ldk-?y(V()a|m zo8Dy^jYjc-dB2i>_lhf6ByR&KX2wZcSc{R27H zkz~y|<~7GSheN#IFR|6A)MWkJ!@z)w;Mxk_J`m;)+_@U7&45BW8uKv8JX)@10a^^|c%qbq{71@mlAkd&nS@Kr(cHxHOPn2h@QfC>I_5IHIZD{G zuq31}^cUvOo5DpRkUE#?6VK8WO9UiHafxpDWn|8NeOuEc7W$G@w(>Ks&FN@Ru6>>l z%ZUW$tGclino4c_;Ugo5YvE@m*RW&!FGuZN?_lq#3q`oHFzEQ_D#+s_2(_|IY%Nmw zW6u<8kZk3P$4UrWm^}ViD?rQK*&=ISNEpd*k}|zg7Rx79&)`&K(5A05mJl?m0Ic0 z{1Ffloca6lCiEV?O57=AW&XyzS8DaYAQD_9DVlyzKUza%(dzemG-OeynY;MX-8oS8Msw>?{!V3p|*~oE$J37i|N-20cOg_10!2mwP=t7EVy;BSyKIvBY1Bai!k-fQas=uM%0vdNf6?p2paw_;iJRQX5p4gtK!r!j3_g&XdAdkq=V8SN61Xh=a_z zkAe@m5Aq4ia4mzHVDeHy2{weQ1~My7`Bk@SC1{E~5>vWp#_>Wx49^Z9Z$^SY+HL)pG2v=bFDv0rcBKuEzoqf#j7(E*JnXEBUg0hO zthJAdV?OY7$kjEIUYk}y^zrM#XYc{Va0v5~vNUu8O0@OrrO!iMJeGyjn4i0h1o-~@ zhu{!K=fXoYyDQV~rewsq>n^dWc(jcO?;@8Il5Fo|Q0s9WZf2?g^BFFN*%l8+XZo?^ z2Y=Z1G~-B2JQnNABL^I3HcUXLQ8O}8+Cca%Ko6o@dlY{BpN)?Beni@zP%Ee4_6DPenqz+4-H($>Uwe0XYUC6M;M1 znm9FN5ij!^8aMHUumJ4H!osxB`4F<>FdW-) zCHQ9d?c4M$;_15}8|Kz0xufEPzl0E@|9M=m#t7z4>Eg;3=%kO>9v_Yfx1UfF&AjEG zHIK!V>tTJpks$~{V!{QyOwsS~{=TFnt_JRX-zqFU8#b%NF&Zx2X`=a36xKu+{m1)B z_X~(B1gQLs@%~vy0u7T5YA7<(A`1=?fcf#yF1(tzYSm5?1-XR;Be5v2ak?5@bmNu< zAt^y@>lcK{%cfw1g3HV}<0Z?n{|Z>E$8nuxwqOySxq^pEQvU#bZe)!&>&oXq<7=QJ@IjfPLsKu}u zFvECvnV5UmgH(8Yn4JD%JH+T*XFXDTqfU4B1%UEq#Duq9EZA?MQ}E$o?i=gAR>kgv z$LqLhHRDwAEkAzrBbsUfVgT^}EX|N#PvfzF^HUCFM}W?l@zmblUhw`x!@>RKifCMW z;8jGR^UC$8*RUoLfRCQknIJ22`~M1=mfrv2D|kj%&^~R)qg9q{NtXKQx@&eJ{#&qC z{rW+Nr>aQs6AE7>-}?KgW!f9>M$+Bj&#qdq#7*eHB?EX+@3#bOD#&E0z7+A~fl9!z zh3iHUjK`Z%ScoCnbm^-wduV-im%!sn9=> zGW{&0V$}(6IB_tY7DZ;Lc1ZS0-_8ngThem9*W-Y_*c^rUCHY=ul0T1J#i3s(4DaZz z0(%}l7R-%6e2RR}Hr1^wwsC0I-`vWXVk6)}w+ZhLFd)|Y@E2{|0Y9{dAD?Wdglb(J zk*ZkXf`nn?pYrdg`7O0x{0KiLRK)bmPsU7@@oAe=XfTWKg9ot>XiTV!y&}YgK6FR+ zdFU2(%U-BL4rl3xfx)El)29_>eRG}BTtQnV5B8xDqs>7@;={NI<22^(EeMv2NPken zMYr_U1poFxg5ZODGP*dFMwVaDbl5_|cP+r2uLy(6gTAJ?Nfj)zke^KT#1uY37i}Bw)B!9Qi)tNP;?0VDKrA&K!omF@gx9 z^+N@ok4a03@jIAHE!4bS7RLnOJ^SN(WWPR~7?M2Z-P!iIt1_^HGcl>}N4v3QXTk+z zr?$j62AMs7cl2jy>N#@{MZMvT^0;s z05JRa22I{K(iae>BEi3@c9_7bep5`67=dh(2=5s4VZ@dC;hMl{@J^8qAP~P4>20Bf z_7CZ@Y}9qZimlydF*8Z+s9V}kN+8hT+Id`vvuSY=>|wsuPT+EqR<5?pC6oPuWkJ49 z*h!i9ypYzh^T${j1cr*{Q=|yq9h7!{=^+Vn1|9-F zXi|YVoS#rSC=JHYI)fXx2T-o;$|fy|M13cr4( z2+-&ZH0#89CR!yS-}Q?Dyn8-kK=9x`H_3k+fdGbZhYq%DRatGyegH9s*$&kP1)Hc8 zqWZoXU?kN7*yMlpCE}~ipXxrY8Y-Hy$4KW9LOH_7fhmjH^#dKZ-dql47wwc<8@qip4N(0rGtA2M|YMvKn-ReLc2w3qR zFik2oS#8GX02+`J$zFa`cn)e`$GC2mDx)^=E z`?^m4Em;B_j-v|8s?uMWXpm)>8}5-!?AdE~`JpvY5jyX0#&}2D-G2CaUq?=sz|)gt zUXSY*OKyB)FT8ozE8STwOn>EEp1uJCniB%uIe!zzIR&{RWmPUW>RR!+wb|9RF|^p3 zCHt)jiU*oaVPD(e)AW-9mqe_ zsk`U(H`lUF1(q9?7G=&D#HbvrPpBOES%IAl0^p}TRBmOT_vJEK4(5KHRtBL%M@+2m zFaln>OB-(kZV&IjhT^|V2%^f7-nokXj==8~UfAKqR zyyzq43vo&bj1bHMW_BfPz>BWTWAT`u@?YM$T959$iL3L!o)!&U)g4^4UPs1I`W)sy zUgs{&)vR;>4C9#ggVwa^Z>|eBDie(Q6m;h>6*fl_E0174x5T8yV7ITg{nkr0Y;k;s zsfGRSwJtRLb8b=^k>8Ev)Y1=itT&aCKuf8A%MW7>B_Ej*FU6uI(x~<+oljF&Dw+a{ zxaD}aI=H`He{z$e6cJ#emF4r!`ovi;^ zh;$)MDfb&RB!aO0nyFly>N#Vvr&>he-m#C3alO~Az|?1NqEWNT{4kJ|g%^I7&$jIB zym>`1TZ{dHkeb{4u}EqBmPp1vLw)eJ?YMps){kxdQP!->z_tm7M4}R{ARfwez?|Yx zY(o^;@%x|2kEl57g@8XnrK%Ly=p~cNCL0C<*eaJra4(N{fBvkVT3nZ^oG|4Z?r~Hm z4)~0H?8WkDYP{Ay0p9WSb(?6N?@XDN;e_zlyIA|SB+iLh-@~s}AlK@b z|LfoJzxi8=tAZhi#z5xS(jT$@ER~8oqhD#6z2ED2D8F3|B_5_D%5y{`!f=(aQe5~C z*MM@g9T@WfRDUWqD#!PrS>kVGLHC;RZkOWoTdaKM?U76yf*?&z&F4q!#sC!HK@daq zA4QATp(nJ>?|fQs>E?WIY2{&mB}0^vAoTPUf17Z;tnG9wHkuJUV{xqWe+eo}{{!$I zP;MVOx0zuMTGGim_Qf((Y6a0I9*<@>Ox9PAOUt6keE}({haguREhe41z4=R*OR~K|L2@`pv8>GIR=Y2qW4tsF2l8Hc z4H#_fTs&jh?Y7854?%BIaCE%)Q$Ns^Y3KIoH`rBlVAK4F|JNkncPhMu)vfUsS@Tuz zKIgXAEcu_@=n~w+cNWP<@`%JbpIH*-Q-1%j`W(0IY4E>zV)kb4Cq+Mi=2BK@M*1e3 zlTw2Si&h2D>MnDqUE{lLvxm~6F=Tq+%~4#kQri3Ky99un)o%l|AadpX`eAQ<1sWfZ zA8%0rDJ;|r^JsqU@mi%$*8O#y=?J0MZ-}X`x}yShm(9I96(gxW>t>ldU!9IUF*Rt# zLd|SY!(Fe~sYz0X=B^U}<>rzDnf(b=ibt(3gXt1r6*KWZtl|Z#ljvNhYd;yp_;c9X2 zc*wtUD4H?UI2F7Qp@i4-CX3=@>G*kI+R zWg!(vVkMrfG!g2=@217IhLusjr2g1X3*cC1O7C2EE<^;?I?*9t4|dN5iP8!JoWn0) zqQ@EiU1}$uceZLSb1;2-dy!Ijs8T+RXVt?W_j+2rjaq{rRQG`ff>qt)jWh;P*D*ot zz7qy6mv5(aDoE*h4gq|%f(-KI80Bmby=FT~4;Vp5Bo-cmDk?#Ot*>NKw&!SQ+A&Np zMgbv5d2(+tG#dxIBL!mV6`5+gUY~!)^xV0%!wlu$*!U4AK-i@U=oC6JRwxlDvY=Na z=HL46ny6j*V>UKPKI#kr{#R-ql(_)a(!WxnA)P4vM~W*eu)3y<8!4f~Sm{kaClm3S zghrMpL&U#vrIQTs!CT({U*L93u+1=uGPMe#k5vyUg$=TRt?BoHN~#7TCSEubRMRLj zmmeQjU^)9=>g~Y}sjGMRTK)M)*V`}y_3dJ9iXe`WrEjRlpzD~bz`$tUB2?HGTI3Jx z#v7lk%%SDRG93n|K4yZre9z=G&GsMm*8Me2-5pP0E5B0rNb0Y&^AS)X`*7xVKJk9& zsmmg#X@|2l^7tJhKB_|YyzMXU9G2g>)5EHG^39jAZx0HhtaR$cqH+|7t$g33e^#xv zUc`PuxqyivUnznF(N@KynO@BoM|f=-AR$=F9d0KEe`w&&Hyq4xAWp>H1iSt6ildbG z-6kilhB-ZyDM&;r5TibUtJCZP#Jcf>s^96t#J-BW#1-Uq)l#j^CD}Zc>U8VBh)$|W zR3`SO!Ro~Njn?m3i&q#}ju-HEjQh|kMYMDMv=D&FoGoGjacI8Zq>AiJt*?O{(U;7( zwdjEfRjtkMc=OhIt$FFJKXR!^`^JKDGu!qyF`|AUj8%caf|l90^|YS9C387Mmwyd8_z}@zd-9o zC}m-3lPg?^4Vw7LtVK4iY;OO)VS%kVg8}E~XiF5sqmBDql zc!;@t;f8xd<@Bx<4T$#6c`v)VNsGX?C#>ZsWWb1wsqC{L?A*UMp|Izxi^W}zoe|0m z;{uF&*B17P@T@o)E1NtniWiiYN=jClb?!e(9Wyk&HT!$^>=8C>I-Vzl6nQqPrzse~-%_smkuA?%gj$QS}+X+gke57irP$Xe^g2 zNM#nxG(qldoTmvmo9I2`lOtUx6<;in&Pq>}E~#$xKB?RJmq)4AdYw33Qr8-hiHGuD zt{#u-eKSmzUWn0H$3IRPW$Wj3XIa(Il!+)KuS~V2<0ksUf2#TmSjziHdV0440Eh4i zQKqkA^eafTZpX7NE9$FZnO+PW){DLKxou_|a}|1#!qf@#zvFIgIHOn}Vs%klz8-ap zRgNkzB8FHC_8k;la8G8@NROl`>jV~wOZo_KOZccQj{sbEcqxH?M~gAw@>U=HEW{eC zg0Em;3Bz+Nu-?cX1#$IMsZR4#+ZNp_E;Wu=`1Y$+N&wA?24o;q86E_BJmGsZ&-wcA zo>FGLFRn>YZH26pC&+DkELYT-a1qP+EBD6#z)b(c1pE)l%*Dy+@=sdzM}MkGz`XS1 z-6=0LiO_1iz1q}!Z|2v*lY}ZZX!x3Z`*7KDXzYy+bN5*}Bz4xjaJggrzouZX|DknT zx-TV-?bCt;=dEjpqTcFeP*6}bcZK4$@EW}^|6!q87(KvK-k-)S^wa9)!r9-c5dvPV zKYK#2p%5v)aq1dCpf`)fG?$9viAx-nvvI$GH_6Sj^}1ibpC4kJx2nC|+m=Kp&t+w` ziG!9FzmU5cZ%DztmI#}u(*bh)UtXTt>_H_il?i;~OhNh2sja2%1IDOejL^nz=1u|{ zWP!wW-RDl>g|x3s;S@5h!^ep5{HT>>YVVO9+;?5os5+;1m=2@TT!T6b%Q+@Y;O;4m z+0%*Pcoa0!C+p{tC`Ba#Qd2|h9i7CzkEFPvR-^1_?Dap()IJb^TuR**rd#(Q37&{w>T?|h3LR0qE7J{EkF4l$*f!5g+4CqER!+|I8N{jIiF6> zsroxGf}J45qS#MV^!0zridmCW(U3oHz8c@Y^6N__@c zg4KOrv7p#tjel|b!=>8>!weFTIej8`do?|h@;J{l%)e|71-~r(=J9Js08*)gOE#M* z*c~lK_cQR-!t%qlgh7mqQESgF(GeaNad@7i{d}~2DF!t(5ew+O>nk*T^vqN#QU-~$ z%mU@^CV4W41pAL1Id{up&Yj#5+hnNfN_@(vHT6v&I3h3&o~1V{%DH}U$Aa>IuhnBN zUraFQp;07JVXy?~V5S9`kzpChhV{tn3M>eO1Ll7dyy-fMn231F+1f%Msf__M{BR5o zOw7ca*I0>Qd zKY2W2;C|iFBDq7czsV*`(h@BVt^j6kQUW0}<#@W#`cnzm=WUoG00+iG9Sq2OU@jzE zMW6jywxTZK&r;JG*+EJpg;iclMe}n^@d6wMtsiL!uXlK1o+UyriM{(sa0$D$f^`GC zW#yuZP@rw=hkOgMKUCv#OVmOuWbdHwX-P;sV~&)eJZpJX7;5p4v6FVx1%MaFyqdJl zM^fGW8c$Uw0+Bf>mwy6iZu$KXhpaJ@r#|udsqI0sP%WH42!l}R`J>m+VSB+7fml09 z!Eu#5#xXqDE>r)MU7{4hk2ufjG1?ozA)D^d4f0)J#kQ_#v!p!q0gxa5zH70G@Dsc+DJaht;iy{k&84!6;lS?$)SCpU1hOK|xY1Q4BsQ1MB}{J(RPuzq>9BQ zYig5Qh7x0lI2eCGtksRJeVGfM?t}zZbYd8dxG~(x^^4x~p<~bT|X*W*wEi zZc&YqIm?WAvZGY=fj(WS;&;(1;O_dEFKkM!2QCw$CC~miqFM8oS%Dt%JOBuTs*2_pZ9&5!GhWBXPh<%gcjhm@z`*Y(N> zC@5)JGD@Uc@q>Qphceen_9o|k@%6!yOvg{hxeUrtPIvp8h0j-hFWW9IiByRvpC^kW zy&bt<0T~Aq1>2ySvR>a5TRz41a-%@F%^b1sT+Lr92?TwBwnsBt_k#ZwiJ=Kaw}Gw` z_s3)5&e5()=PUn}FdCh|j~R|T?zmH7W>p>ql6FS*Nx7T8NUad9msHAuH}kIuZVW!K zIGfrS7lCT+kQ{Eokn=T?%NZ?S`s)3i&<=4D-6Zxu`6B8x5x1ZJX49>ug$%PRsJN+e z>ge^nHj=JJ6mM)a3qxIuSDJTGEh<#GjG`Vac>%}7anIEoMs@}+H(;H~hL8J2Ce%i5 zOCFnBMCh%5KkqLVn|9?2PQudAAz#G#o((whS~|YfSI?aCe5w)HE*pOe0<&b+^QB%` zalg^w9m?9S_%bNG5Pw~X>WSU{;5e>thZNh06Av|~zJ$K$V}zg9kbqL5)V>1tkKR;~ z5xDl;x5N~7F{%k&w+j*Hf~;8-Z~+q&!PGBxb=HbMq_c$5VojE!AR)%ONuK+gNrxLRxr4I(z!_8lTa+a;<2$uJ|wWXllX(abP3gqBWQ_}FN zI!>NGDlsHZ`saPZ^}n-@ma$C+=_O#6VL zy`$S#)WerPp9B9vwTi^#Z}uzXh3-GzZW7DC#qT|B!X|e6A4UGuG2-Bh#nK>9)SE^$ z{vl5K0hHiqsF9O+I{xqSaVYSb&jXq6GT4EUDlgvYWkzBQ6t!G>ia|+8{4GjU>7#ek zA|1EHtej>+kytOgDd9jc^}hel);=V-{{%&2Y}+$Bd>u0Uo-$|2?{viDBI|T1V1G8i z@$5J*ur(KcbGd)%og?Yr7ht~^WC03GNJ;*5Mr15kd5)4Ro@QX`5 zZ1Rf$){&eMAS*tNT$-F1Gassi#ZY81WT;nMK?_qvzHZj=E(eBR z;qcVgvi0MMfE1m(m}#`uSc1y3T6dZfxqnc&(i#+1GC?qA@>PzJ7>aCRtB4SaG*y&1 zTsVNyNh&qavUa&-l~Jw=aDVi!D}|>!3g<03_>E%%&dP160Q!Q?h3096 z(*lZOLTEEYjYu6I?`~JCdJ#O}$V!RBSXUQSTRHF1w(r`at;zGaIi{PEq{GZnM6y;@ z_-K{7l)ayB59hzh{$aKQn?M6B3Xa9Epa$6e*tB?TDAqsbR_DEs0cLioO!vW8w`111 zIP}jQ6@KOu^?9T-9gxDXS`=g`Ra>ZyRiy`YfMR;?3Fm!J4q#LA3?}J!oDIfv6s9x`PeTP-&;`FHJ zO2DBe%WUs>Sy?=Wap(&O(c$!L&a2jMTROI9paeg|K`rP4-)OPxD<`J+9)($JXx4b< zX^3jQzfSCsc98@kWgL6H)~EtVVvBMmt*-O*IAG8u)=eEO9M#xKKSoe@)<#tix~0&? zIJ=e54`C}TE=4;Vlt#)|Qe|T)C0{suC1t#ir!50U0FVQ zDhmI?E&TO2h_y@)vRP5A@vqJ-2YM9Ty0dC*-cI$AjH}@xz)G3!#Wp!&>`_GAy1lAX zj4sMQyY9@1xXcw4x-!ovz!P(&N(i!>3((PfE%;%hs%pm-o@(6N>oZA>euoNY7C{i? z*LPXHlw+dp`wivrnj&+UwWw?~( z&kHn(jWAx(C=f>KXc%2VZMSe+~B!7 zQ+Jo;O!4A>{S}-Q8udv5v!bKx_cUb!muk*$6Yi*sk$aE5bgfUiC#qSRZCeZLB;9_9 z{nAar(?2>)AO1#OmXLsNNXqqXh}-$F3u=7tH*|0(>2}FE3CS%&A+vMgNkYio$2etm zOwJ%@3!cHv^Lc$uj_p@0y)ZsiB<=gmH@Khbe}S(58dZMc>lgoI)4M>y>HdkkLS!Iq z;iP~et!0>n+u@wkF&RLY;HsVk8vdnY$vB}h!q=~LN;(;#pcym3vs4@+L8Q0DxAM8K z3@EBpegrI&Nac0=E~5sCS(LYbaaBhqtl}nXAG<|F*4#Py;oy!mS3?nZ96Puv$ZN}J26(XZN!A`n3a_Pk3VT_HlJ2$L7=vUT`-{~9`f zxeghsavK6IpC)zE?y@EMndc9Lq|zN;Ll@sRY>6OQ%Bm#TdFeVJn!hLlkBMVPvf4&= zzI`SEKhA#3r8c4~R-hEYLSOib>wWDg!`f^z;XL8m3p^E`95=lB$%^v)(HQIeK}PrM zO!-Xx%+i+VHD$y_n5lWPE#b1n9+6d3yXss`kPRBj94gd{osum@x^swB)zFI^zE%A5 zG2^}JsEu{@STwYKRl$J*og&4?7VWIrk>uneo8wY>^Z6$1cv0E{zo|09oRz>)Qul5(%zkI{A5<)-H3s^wy_DfrZ|m`D0y18qU;?Q1g}!vn6svp7yON46 zg)RN^FN$9jnTtrlTMDUd|HR+F&EF`zrP1>yLpR8a>#DUxi4`RRV(BG*!Q3+Oa<@Xbs=~t&_zug;n9Bo2xO$F8EB9U}{8k!-Ql`wg?kh0>R>%)=$m^17Xh?w$x|3(m`? zz}qHeILi)+-%x5q{1Rk)u&Q1Xt*c%B`1eUONMJGqKS>Bv9@cUErUu|chL+EqH=zRE zg7rR3WWk&W*z}_j?g1IS*~gQ;>e_h%7XIwzY9b>N%Zatg*N)rf9dq~*k7=kl@!gL# zQ=p`;KGb`|bw@MFwARE@r{5w;r-@A}IB1=U6u>C1 z#xAO$ZCnmGB}>xk=y*r#HWpDt`i5BfcFKAoJkuFb5=L1*!Ys-+`n(WwxN>$&fVQ{< z=D#+6n1kN-;Si?!_H&1*C_Kw};9jI}1&3-VUOyTAH{QmROwQXuOYxMjYDLb!jfF2g z6;bb!cgPw{3>;%{nXy!zzF-}4O?vtfA%>p0sZ+O zY?HNFmH5qm^2#hY&kYEOhoIab);8M?-l3|H2Wl>#9%Gdazo|gW6Xdk;q*3FwUx=p8 zTsAQ!*XglFvQ^WlIg8tGAc8^#LgK?DJM4K`|5&5Loe3~5=pvF$!}&ku@)0xvAAnl; zMI>NsjzOL7++K!RpZdR~tc$8?gT<3xvVq$8C(+K)Z`0tBZT)$6yy9y$X{pb22mW4sJ+=z+Nh79Q*)~hsk3!ya z`_zlu{I}n(1Iz$+8Q#!o$|Kyt&-C}1-}WY_FwL4ZJ>l*0ICkyIZ=xUX)sahUHyE{NF{=3u8Alan$f1*_^nBdZB3|@c@V&`ePaMh9fldjwW|R zykAfkkGR-a=WZh_EVD7d1SqiZ?`g}q{06cXuiD%J!C7QuJ{l@cip(N6Vys?@mZl># zfDo4?U{n1v7i z+&y5g;jsDw zKyx;_-D7Vj7zmN2A&>)O;EB3GH)@;-@|EV|(%#reG18;m&9~?g*5?7Ev+ELqjtnJq zVM)^GsRga%3O7T6E@s>)FyTb&tQuUr5OFA}U`UJHdYT)rlV%2A zvU54~bvkDLLeurEPY9Qc1ryiYb}oeFi(27C0u#!{{p$8YBXilT-VM9MFf=)g4#5K{ zNE1|rzYqAf>3>w37L?|;m3aDXwLpr>hN$_K5{GEumVSsX0_1;+hT$g}EmPUcSa8(^;$r z=+83ma~h(^sWP~2p7y!L-*4^@e8u+&Vx3~X!-SUwXbR({kpMel7p`~Qs;vDgTj^A0 z?kLq|%C&=PG88eG<1|FmcSzRXC@?$*z7a+7i8{(cbbePgH%9F0fx;txnEggfe7H;o5?t#i^ng zKH=6r2d0_jS`ls}9%N2l7gyWW8WfVrJvIELD&*BJjU9ZtTK{rSUITzEq}yCS!TVaV zz)_T2v$H{R)^GQWFV(8;yp2ONt(z1qqhCd<6XG5Qxr%TZx*W+OrbAe4bJY2-imvEy zFdV*^ad^CF(t|ZFS)G3Pp8wB$={?loriE?T z@pvqwdQKAQwTJrXele;uBbh9#l`T4l)cEPk+ z$>AjCHEtNeTqtMmH-qFI`8`i!z$rMAeQPqNDN9K3Wl#j*bmG1*kXHL~9S(qrMKdqYzq95Q>;dZcx0I5;OP zYW+(M&cgT$pW{-Wd|^oHtaBX;Enr&;GQDHth4d)VDJS66^F-;%ucTmo@{jS#hS^@rkU|39 zNHWk1hivWs&4RXa>8qfjYMo;vYDzG$>8T&x?X3N>&B=Xo4l(kc?CBfND&8U#?V%-$ zDdxZ&4m)but87u3HTK=#In90a(_h-J?j|by)wocW({#pQga$_E-u<^N;M8nI>M)v8 z`p}mo0fQoHxf*KM_rSz>*Q9fQh`}baZY!J+$ehR@Zd6_hR02SZN=oq~6Wb)crL4_T!cd*@b-qn4F$7iDX>Imggy#Bw^ea;#DwBFapR zkCoDvl&C)2?9<$iM?HklT{i-kHNr5q<|eBYp7gU_zSj+z&slt)1L5agbP*&uTNGLph_ZAXBW~p^`_arrb3I{DEPy=Yn*}fHq?3g^bn&K zpDp4n@XIv|bUhaTcw3A0KL#`gl{ohJDB!nPNc!pHIdYA5zX5Ng1U z+9hnN(H!`A@%mDIAI7oYSHf6w@?+GlvW{7<-i}QfuKdoTPKV^Thhby0ZO`JkiEkIN z%50XGm&k2PQm0o_eAb(7{E&_9V4ytsTLX?fZP#hQMAqY+O&o`Os04dw=w>phT+V&+ zUKyjOwya_v$8_!9o2!zL`FJpDLdT5u^M};hXbr5J}YFs89T2-YYp_V`iM@ zfyCI>rTi6>dsF>Se*Bx}`Pe2K83fHk5_7)1o94oL3LiSRzBtgo6{4>^a}HnZ zeYv_Us+)M*_bkCz)gqJttxE}JF-WR<8cg+32lib0MRKPk8t&Bi@jtj;hgH9XP7;qr zPm>Md@g}tDI^IxUCU2bP0LX|Cx=31_ zm*?+Xqn0+}IyBe%7U{1<#-((S1Efe`Gt59Jc!TkhRp~fB@cVaVbuM5XZLKY#GB zIhNhkZ2M^fw{?pofMyEeUaA|cSeK4`4fxDn;jN!cdkC~O(gy9)D^cbQdVX*ozkG!l zf(q$%M<+iM{jtC#&l#3-7`UFI5&9XA%CI7CxQ_RpMm;3WQhK0xL-ldWjAl_0{*q{h zZW|dhv-zG3BD2ygQFK0rc~%|>dEY!|j`PvtcEUHQ*H$@8cF>9U4I|0d?nx=|u!UoQ zS!95n>$T5hDbZ`pAdk4q`rKWr=nA&2$s@0TD5{HUpu~6C@)Nm_HE;wc4k4s4AxlppBB@svq2OOj zE6cU5Cc+wNgPG!>{nYqgek_jGKVid-X+n}g3h%mT=c(PWArx~ebi|IC=Xu9Dm~qZ8syIUT z)2@4e??%-f(EFbrD!_ZcPAz<&mbK#DUlI4LgDx`OI!gPjEpd5qUOxtG=&9=Uv^_9* zq23>yK3v5;dW)53#A*S8@em_l%u}3&M!%F&3vJ#G!d$pdL5{gMLlJ*aW&U>TypEZZ z!Oib7>m#bSn4`=~jafXyY00dJGt8?HbX7yT8+Q}q2; z-t*xvLt%F|IIqWM!YgXvB1P`KUt?J*4so`OJmdt_+PmZC7P~E< z)m{414O)Co3GU9=xz23|Iqrw|T*o{@^v{zM-IiIcD)$v)X+zs-snTSc$)66K8e(0+ zzTkUx+1f^~%jx;qP9qa%8m@%kB->$=1zFaqFCL#~QE9>?;EHn&ry6H21W!-iJvJXb zT@mju(U^|XM}I?&hpYLCzJSYq5ZDnI*b=P9C!0iEH6R@D8>DG4~8D<*Sr3=yL_@F7-R6}s}|o>gw(tpW2D zJRU8TwY?j!XHxhf-u>Jz+_~jux15XDhCE=6JeT^S>RZcJ5a$snriOd{!%54^{lU)F zdR&zdDpv9UE}>!m8A>NhlDVPWg zX)V~ioUm9~Hqv-I?e@6vf5b|L>i@(F<7q?2S~Z5t>W#BXW*y45(GiXgKd-Xum=LV9 zmEMR4_L<)|4!j~Qilgdp;51R-beJ)@nkOiv%3X4%8>MXvU$Nf%4;5RPC%Kt2W%0+&l+jp4&rrq5hP(9r$MANCHsG`hCE5{2g zv)yK>sxnEyOay48LP#&mB}gsoX_5?6flqXHToklKuX18+jKsZ|R~bs>kF;*qKCUCx z&B^IT=?gNPKlrO2-Vd{Emc{0st--~mxOxclL!L>zQqVm36i-kor0_?4{Tm=%_t(e` zOO30fq=HgqAEH3eFU-uahJwQW>=Az#O82DV0$axia}* z%&Wt{q|2>53MYvLVcIPz@uA47a;X3Osxd7WcF07bYixIEz~fHYb{}IvSq1s127QD| zcXqk*0Z`n>S#<;n7{I~x zI`Ky{?AKJAh71Ud9wGwKkar%dtamTf+PQTNfvPF?OphyZ+`|pUWYn+dKh(aNVH1hC9REcPsbA@##vytPp(ZS4q&Bzw8P_G6J# zm*LC~lsw7Jcj(>7VyqLV#~yjWs4}rRmWxW7Atks=A=gxULy8HT_#d#y_BlCi4ed*2 z3o9)Lcm4&QRdRJ=tWy)0v6lQoZGc%IN#nf83N{;#kHfXc8i_7@s9k zbLHwnwzt^Q*)x2xos>v2H(Gfx^)Z)VkGt7%-TWvIsQ*^&cBaJe=F)9!tD+9bP71u? zaD7(e^|uMk{S_*fX0S1TE0y->SG# zYtLXf>?f27&`)mRbpQ0@$!6?3pe2SMT&*-rpHYQaSt;Iq3*8L55q#uU76Z}b-W2|C z{_#tIH)RHU0VoPygLkj-Uf;58CrD=UELPu;N9)>XHPPeXvp1s%E1B?FE)GD~ z%#P?)R=`Xj%r_FKBzBi{dPej)|G2blS5 z!Hw?CVVR8`bHXuvI8^(=?L?NI_AIl=326mXz_^mJfO3$A4ud8j+?w-!FWgY%0CAn~fj1 zujCB1S?$@Oj(Gc+@2?Qzy;Rx|srzwaG!3v8BPjC19yk%mae+#~M8TChX z>^>v?k6|V{Z2#*o4BVsB-HifxC*}`e6Gz$`uKO9}h)e6U9k(ZoM#lf6>#U;M3fpa6 zT3VnKZ*g~uyORRNiibH}1JNft7M)0&CkNZHIZZ8fR9x0&U_pg+h5V+z|GF zyIP9LWR1u=e=^p#6eCp25urtoM96lY74N0cZ_u%D#WrkFH%#R7Z1V{Tr2Iz79XKJ* z`5YdXFFhA1{T43ql1QF^1QgOa1o>J51(;-&IRYMv{Ly2cwZL|y%s+rRXQXji-l zcyUWe?4Dagl2nGHE){53YS;1@L!Ik(3yw;pSha_nRK#<^J5k##M8YU9$g>bA>OE#{ z;zJ@kSG$iOL*EI*AA4+?Nr=^R#=Iv8g#6L<0Hem+Y`z>*c;0D9?~uhxZbRz+Og7)i z|IiR6p)|)Cc0S#80+46n0HgC8EtrR|32jCT)UA7~1oByHxO{<9?zx~Wc9@@N+&CPVG zf4VzDgKDk=`7~wEHRfMM-r%{V$Rl(`=saev(O}W2r8QwC`~;J&uk`Z|HUZ zg&5<0vsXcRecmK@(`1LGhUppM9I3IB>zj?*Y<+~mF&B)6LY*FA@8rZ>_E{wN?43(N zn7R`z3}5n>OnSy8o7l=<^uH0X7;m-0MIdb7K{o zGEUn;^la4nq!FE%FDabe?)%&Th+=^~k;Ocjd2r( zi-@o;4w1n$`I1asuQZSkMEF}>l07yOzkY%*vT%Vj1)$Nh9lQ5Qh1eQB_2+Qi44%I1 zfhUC{B!JDXT0nx^;=-!=$>uHT-_|-sES?5&()mmhna!F^f)XbpAtE>WrZ_DUe-&RJ zTF2LToY*XSDQ6wQ#irl_i#IaO=HMbQ&<@Fk{pnoeD=!N@6RR-kj}zr|C;_FOC(+xK zhO^rrNy``k$629xhXD8M_9%>juMXX$o_b)0EE3RP5NFE3g8Z`>9uJ{xA&kJw!#3V| z7SJq5cvh>`+SWJZbU0slnAtQ-%4w&6Gy%s-2gzoGXM8Ytn3ZBB0sc2Fgf2X0$jjI3 zr!BA`5SbR}pxQ)OQdXH%I*`d*xdJkX73n0@>=`+fB*Tt3d8->0+EnCEB#uOe1EOHVa9Vb1Y!gDuZ`hd zGmRYEt`qEq3?t9yQ;qx$2mKD7M3|HPmZP&pZ~ver$USD?u{`bl%S24D*FLK`%XvZs zQd7BUOfbh}Jix!ji%pyAD=+8=U^XQt)spqRly?@M&`&sjkCrmdSLup|1w59y;~QjH zZcukGIem}-w(%Z;A$dm04czRWjN3l@fC(vqLDzou^9a{Oq{5=VupLPRx6|rZ8;a8i zdQ2<&eZ6vqr;9+HKX4UInGCL%!L0Ht-p8@;O%u23Ijz0dvK!uBk0xfWW#6K0hQ&2@ zbZle@&!?)`hBS!7vkMDG^(@=6jsoO@W-O!g?qvGosei5+93ygq*@p^Cp0 zu!>~|3SC^{XCb1@xr9Bx6G8JYpTFd@V?H#pQ$WPlHrlPYLno1xMZpKgf&*lB z)WbgW=Pyt;nu~vZV_NeM4UbCqcx!ih6*AFnB;c--g?}~ z&KF@Q54-+KA{-s~un;IIb_lK;S;mR1jSaZSm-E8`7D~?ZnA#v5+BP}E%ZKUI658Ut8596rj;?U{HQ_;jbxdXnB?JCUNW zrFp~jV zY}7FebYT2W5N%V3-?-60+IZ7YvMZa_m>SJeJ3fcbuQKd!xR??*@Xf4-YWg$1p>k^I zmeVO`$Dj?J&cp5Z!F5+2wPNQ{nx6S1rSG$od^mI-vsTw1Z!WH8Sh*bz+SsEaKWK>>uH8{KuSI^@Zq(oA7e^<51{D`ZcFW0HE3rz518d&EJMoDSAl1Sg^x zG0xv^JC<>J8tnCQ0{Hf}LH`&)>%v?nm=T^oGbRxo0kD6E)ACG-UvEE%bdmo$`|&dV z-Wm8UL1i%pwStJUQ|;^3b_w=!G!m=%&PZwQ;`yb=IbRXqYuXc~UD-?&BW)%M&Ap{H z93`%^qkMJC@h|Fx)*S6Zo(mG_Hl3t(a52r;cC+y{D#I#0c1RZoN7yeu%`c7S$RuNpy65c6JA=m}%-NZ+jDirap~$%?*3{xwEsoA9hmQDz!NEQgp-9CP;PNAs9<0ry?%^2^2d;X7c~ z9yiNjV;4=2ac|EQ{Jk#B6ijX#a^F`JGW6T%xlk5ak=Y{8?uUBFC%H2RLUWQ}?A+ln z0*)Dp4&gj6|MU`+wW}$7n1t9D?0x? zH1)+xFDPXsLjkvI(z6_%GMutt&m@wzP?*}V{;&FaVU@>tuRHmY$?-j6Iud;F-e5(% zO9=k23>M-apwt7n(&8%V(dWGhS(%^h>^ejs5AX*}41Mx@*5s%&x+55~a%X;S^k0Ys ztP4h``4^DqQ+^@(5Z0A}k30P5ffkN6)PdAZ`4Tu6mIVO4o&7d>KOK$-ORWeKcZ)$&Y1O6)IZKRtvLN{S8J#XwAjjBsmoq`iL2J+OfacLscq=Sp_hLSJWrq0|o>9o-z@Q zryp1gAEOO*`;&@&{(8n`g2of+;{HwKw2D7G{nS&ZA%6K5nBq$8LR`Dk@Ac}&`)Bq8 zZe;(0xF`4=Cte_`{ZL+`&00SgNbv9hbc%ezE=(Nw)@oiQ{n4d&kk2Oe1#seMSvDvq z^+Lu?1fUJD5vll2S$dT!^oH9T@_uj9IKJj22ESuSa6U(2mr~Z^sHmpVK3Usnca69y z<8-e>Y%hp#h@mI~U*%5p*olkg;n37b0_xE;j=D($GPSKtWCMC0bA7jjB#tTcy;cUd z9MJR=k)ls_!gD^B`I#0F4j5BEnIk4!I({;RT~h>MJnzbl)e3L1R-Uw8ozJ&W21>#- zcf^C{+eL9=kGEj~$Ro36i#M)~Eag`79XnFI1+P3uII?3zta8MD#P%yUyS))f5cRJa z5C+mT_P#qMDz%u`P)rSsb_b+vjam$Ur(&v*e_xIPBDG?0@V@$*rM%ha<+tSN#J#nV z@^}OJZm|X&F24NZ;WQq^kq7y)^7k}OM$WCqc~r!LqmEcQF>O-c*|C1uNAuHC&4B$j zPb8;fb|59;BvQ@GT9fW4*G&_}DBoH9UCzi#vt2aNZeT%HfWLWSZ9t~z)7~boHpu<-M8NtbNe+!a z3)h9-$Wh&-R6gLo7ci2h8U9!F%VC8=>Be4?Q2!g)3noS&&vnf%o5P@AS5{74JF5@b zZ2bdg;~KSE&~gdnwiDtKi~aP_^CPJq3E2XRD=S6Nr@Lz2 zOqsUJ4)0?KOMD@AQwX4S%;8^9#&BWF0Rsqvcb>y%F|<4DJp{k4`FO9`V<7Tfzm+z3 z`Mj)AO;cN5hM^LdSu)M4%PoJ?Sua(8g{Hh%!vdc-80s8fy+IB!>$hq~@Lr!G&8XXX z-Fx5B_t-SOi2Kqy(A)o5xM8i9U+v;JAU+}{u22(X_noU(f0v%s`}@>-Md4bS`}gK4 zyjR2mf*#}L)3<m0?melmP_z{?Gz@Pj%fTbtgF=#KEkL@HF01@7nuSh>N6 zSr$a52Lw)qdd7-jECtumPJa2@n4`&@6m(tCG#ABMkA`cgQx14_hz3%Bj{rO&l%G(A z@kjqi0?$cCYaO6+1rWVaV^rulKa#7ZL{S*DPIKQ)lc=Ipi~R(h!)glq_;mC?thf$z z|FPnR-rKPnM{c0Tr51rfXkNNJnecD!y|RA4c|`B;$DO_lWjI@^;@PMJ1w>gZ<~wga(UiIyCV`Vr_7_`|rW~rbRM(^{7m_K_HGax8 zxz)O$OQ%R;QFdBrR(*v-M^)z;hWpt=<(-q33@+1D&q#FZaRkHM^xu?+}CREw%~Qwa6A{%p;*{C;!O^6e^fWxKBAQA4n}a4*_5^h zI)SEsl7~oMW0qpQz!HZ$LLd|<>2tk0QVBpp4EZ$nea$K z&)FT?=R^l2880O;m%nbJEF6Il`jE^(Zg!bV1q97fzXOqqUW+)R(0drX7ff-KhU{df%fqPKS-b!D~$*YbrR3O`8JWT1r5Jr2ch?bevn&&@G_Rf z(>!6)RYv`ZyyHTh%6(4Ne7}&OUdYCO4p*^>Ar*hj$4LIL-DsCTOjZ6inT>-B!Y@Y;CV_s=Cc}xU78Ha*r=k~GZz(OEVALum1-j(&#j*Z6z3~_T`US0fMEx*o3JP8y ze}+*qwzci{v|?*PSJ9h|)E(jT(uz^3=eR0@J-iQT8_98>JHrke#rMEv^-h%r4RRr( ziwtxWrKA029Vn}U39M+YXDXmmWIheuNxj(Ao^rFnW9N;`zH?}wKgSBzGpuI410~;C z&ab~xTUZjA%sLsZF<%c7VrqUMRaxvl_>@7fv8OhAhr}xv*sM~zH;Sh#OvICda-&gn zzk{oZ7KWCz^KcDBQ6U?yO)2m(M(&?TU&#IXpg1Mh_JiH`}6k(o% zjf^$}kbIv@IeKMmY6qMLGHO!H-gJk0%{l2E(W`F9zhojQ1+V<&4X$aF&|7$Uyis|~ zfN`;!H5-7r9hz49TSpK9ASyxMzqQm4@se}vd1y(z8=P+33y8lG|2-+H#et2LExi2x zghPvkgG3*n-UdsmY6x=dmu}6c)|u+^Qd2Q5Bb)NMu(A-D_-5sJ-O-Hhrpi=VC;b=P zn55{I73Rwa#CGI6a%lm(4Vty{#eYk2_ot6u6Go!8lRcq5FQl8r;uJv3E-u!;rkt-x za@!uqzy7?;IM2N6?_smrt0W`nFGE9%{A=}+!Bp`ed?p)r{ANI#6+WM;`NBN6`S)Wk z;pf4DPcZ!c*WN+g&wsOz`OCJ6m?l5RCW&)T{)%3>Yn=QN+!ELF6HqV!*ZX^v9oT9K zp6k5ig-tTxNoYopqu0HCqfDFX7LWAYyFIZzZQUaqjgjP!bv_c~3BVlL!LiPoX?v-$-taDLtS z8P2h#LcfGYqamGHOG}1c*@-RqYi1_ zD=X%o@-Au@<%YY&2D}Ruag=2fof4M{kIU5yG6iY)hT5*K3N#%{W<7QZ9QRKZL*yYf zUmFMbPU(jvXP>rL{a(GY&>FA6I5k2Ks~_1S)J#OUH+VaIp-R=gAhB(9`PmcR&`U*z zLX;b4xmPsLi&*;qV$~$!pqt|?=kX88uqJlQxF2ze?AE#tM9$U8uZtgor`Mkz)?}Di|#}v=JPxLvV4I249bhum&P&`f)%Bx0-7eq4;&41%8Qs+k*ybWWp+~ z)w_Q_(Np$JrR76&8bKv@0Y(MnuyNyE8V5@KqSd-&AFdPpicQwlRE!GMIG+_7`q4&g z+gJR`63~jPGAcTy_Q8Bm3lH}bZC`Yq_hsj5?W=VmJHb=3^%G1{wS5~Hr;s9GQ3(CC zBWcor#oCw~FhFaKYSkLguqwDr0kSu#uSL&sFbWTOne3Gm`H@&jmP#sxq}Uvll*oc_;XK0v z#!{B%01}bZ&!g*_dZE^6Q{ZDfOZ}|N+{l?f4XD<7F*5FvcmDGQa68Ulm`P#4%4794 zMEr}|%C+WQ(!~tyis?XNtJjB^I^8o>4Q|kyM@vU7$4P>Fh8G0_&ijb+ zRklnnXH>YUM8AhHEjc~RJ4H*K1>^N0d6Z2o`VCKgCxMJqx``7`2x`8lP>PUK5Rn>d z&zR`hn#0G`+6EOVy-uYJl!tMt53#Qv=yTDH7vK!yDY#!Y(unwRH(cd^Y?qIkE?eJ* z39q`%((kE>?j#aBBu&PuK~!n?P5(R1^Pd%po(iU0j{_&#lYvqrXMlg>DSV!In-gX4 zx5HsDGvy1*iVNY%!G{Kf(k2C!A24-N4V}=;^oB)Sk(v`o59uuZA^e0d0KnfwJA7x* zolTTT6hxO~Zy9N)}AC}%mFjA@2$hS^TzW)fb=DYTkJ zYCEEi3o|Gw13x$}247C^v?DPxC>;O!@L2_fi;-uES>|;}ALr+k2I=S^ySojQXNor6 zNG#2d{Fy;ohIDP`T++|(ZB>cBy6(wgPJ^X_dF)3`fEDA`1!qcDIp+Dm$FY&6{oh%r60 zvPeQ6`DYj^w$*%X!g~0uwE^a(#W)VspOvy9^OnElr{*|bnI3q3vdb_VO_ORe_I;Yd zu;5kxS+bh)i~~Yj^4FCpm0ra2XB;J2r7@kD9>NxY?3Sl{dyUmBj(HYd*Ne&0@a|(u zqw-ma{Q#-EdK?>IiW4rO5?I`w^h+D_~0N z*c+wM|BiFwh|Sz%Sn0%|L64ELvfn;{JKX9Llac+<*_&|+AT}@eC|e!ozy9tF68t)N z1MUppc>?QM;gW$hI`?vHiH4#Vowye#jF_{@o!f5n19+P z-XAr-1K{RSGp{bOElM7@qMFJrjzg=ZubefnuJl|avIC;u(1Mr>jET|8gdo-jvPN>> zP`DQnv!ji*xc)x=?zMY6i4Ps6>Y!>dfvN)*j>ZduA!8YLm@urTmW}{wf+e*mYo-2pR644}--K3~)6Xh&f0zZJwJjsFQSCvs+! z-`(VLGn_`s=ejkP!lCgOVl%r^v`504i}z8Z-bEh=C198M)+h9(Znf3!Nv-L_t<<%Y1Bvj(q{Wj3(bF-&@l`PqXl zT^u^-&YSYZ_C89eFPScMOO>{Dq#wsdzLNQtX0u zy1)v-KbK1}rKsT0StB>$z>4CpA>V^hJ?qMkE1#it3xB%$zUK7g`A^QOOOVuFJsK#m zzfsY7O?ogXp>^1|Q9_9%D-4sYBd$@9PW`OoxAm+37ljk$!<^}Zwv-d;!+_KMz&}|U zqy~Ym#Qacx;C$?(>XS!hEAVe+CrZG(X}g>^R6tocJGB@ub~6-~n3N%$63^c#4K%=T zgjaZ4U@_tc#ve2DS(CXQ7ovJLDG6lVQywP$s;sEn@|!(Z>-0d@L;jyj+BE zaM~M#bl#74f~zM|0~cD?v>mzEAFno#OrSt-jy3i8^C=tpNf^Ugqo~oLMW0>_WIhsqF$ec!{ay+G9nWe`% zeLyG*1ORPkFMV9DCZT(~`Pv-6LdX3VPmO53^cA)FSrGv)t*Ovxk?X9@e8K?%S9OVu zn*`A&*ctRw=)&l=D^|yD+$|Nuq{$T$;9kc!oD-k^X*KDo-F4qTH@DG?Cuc$>aw*2467-fP$EDuBR0K* z3k+WZQ}?<-7gQRl5n(3pYni*_?-*Gxypu;2`_p-Nke{xvUjMRUTuGLOmT|s#L!*b+ zf&H`^I`is+#v)IMThbr2k@+E|`Zb*jZIKB+FR~{_DP=0GD{fT<3wjLH)ys9BrAZ5 zQLZQE*RRjwVRl{u81ZszJqjgDxE((D`ZLS8guH()`CQR|))$HwOlQuj>gNAC9i%tW zx5NRXMN4xj`Z@TFQ+o;+xY?=>BXHzsRY>7|osb->lTkvID;6=``^^l$a?j1n zrv3v>@9|z}fotlE?c3XWUR(S)*A#62=f=m;8``Vg>YY`OaY6+TKVKyOWO5-}FRTiT z{IT=WzAu(l{u_RP*g8grB<5dX*5J;wkv}E@KS>p#@X}9Eh{vB5a%?oNT!?#c%lujt z0aUn0HI4PB4c?EP!;RlsoqT|wc+=%h`xJ1dHor8jS3Tc*340bYQD`baIb>s*%ZnU< z^($$~C?CgmVaQv=fk-#sh`d+&PLtM(nWq^4^X80W+(4q%ST|fHXjBZ1t>VFfWJXDf zW<(;7dOq3}Bc*VF$-iU()ThE@*YU+^@|CrW_EX&TR-6#F4C0|zpQ1$A(PsP_`cG$= z9lSY7FPr1GmLg#ztVjV$wyP0ZJVSB4Q_g0-&GG6r06yEM^Xuo>dA@ausu{_Dav)v! z0cE2T<{M9+5**X-UCKI}(rHq>v)QQ-mADyhvPS+n@6#)i;P|2T5I^(m8LPPxiUq*- z{@)-#WgSl0+_d90J~NM=WH19tsj1@VPXHwM4ZQl8T(=fQxBKP16mUGFM@#J>lkuay zBad32+x7=%*89IIH(KYfidkeVHSPJD_Dw6zr53A^@5+HUxaf2DAOosW8sdJa8imtk zbJo|FWeyNfd=Ve9clt%#qWDpZmHtWFTxxPnTXW)DkGrVpw+4Eb450)+wo(>y1A#zM zy|q$?RN|V%`d9bi$i}d(0TQ&#B=hk(o+@t<$idygN9OV3#Fu(4rB43+EtszcUL6}y zu^sZHRO!O{)%MlJ?>9%VnjE;>-Q9Xobe|iP*!&sem-9w2Msnx`?(#{Zdl$B?sX8&; zwuAr-#5)GD;;eDtXaVOe>pA}6u}0A&SRFEUQqGX{e0r@q^PkOLqslP~@?56GZ);=3fkDpB&a z85yK4#*BhT+FdB(&3=)o-P7A9@Z?;Y05pBw$-P1<)A!Yj<58m$Ds~!{Mq%en1v5ww0`E#%r+%br7>-p@~;vux=rGf8!-_H;$@j2P~ zzi3vkPf?_b(|?ku-jj?}F3J}FVzZp>P}JG~`Dmp6&!f?Iv9suNwlQYY%iGWVOKaEJ z<6+}}aL9wVV^0Fxz(kl|9RzfI7wZ3hY76e2c4Kk|na6)#qJ&mgc}<(yXE>z)St2c# zGQ&k4!QkP4gx70_85Aq_ybUH_mh8Rb;YO8wAQ~@h5rqGeX@&6GK3_A~)F?Fs*Vf1j zOqDR%6Df*h*gX}Lj^k(Je?5LIy3zU8m*i>sN$H8I2oh%~GDkB6gKK>58-nnncA2N}5jaqEW?K(8;Buy=A?57#X@ zhQld4MH{9Tz90Bm7J(e=h(Cc!XjWc>s-BDmX&MwctAWR9TFd*lim?yRNta*^o|=R8 zIkhF{*p@&R9rMhV@hefLfDwC)ONT!V`Pi4z%`bR_HbO?LH1IB_5+6p&@-`;07>%N2 z`WB+WjJ?jg6M6*U+Md6b*eRTm)+U^LiP#Wi9 zm0RNlk(l$Uen3&L3OB*~J?!Cd6`H@gam6$7CMvy7<}B`6;i?*yRU{@z>ywmcytknW zk`zti$6r9ml9B^Ysd5uSd8osE+)1Qplk%ccx;K~lnmZ0myJMR=v+QvI%^(2eUjA6P zcIYxwwg>Yni}Ict(b_>*S_3L6bYRY6A)8`p(>Lk`u1?9p(jZuJN|+zhXKvg}MG=KA z`Eu%RMK2|)n{-?nonCTr1mt(LWP^`BZ6dBTmXSR)xEc*bPj9WGII%Og z^Wl8i85^(IVWo{0eu;y*nX-n-f$=usifj(6q}J4BSJ~GF&G$Oi+Bs*O60Yi0j8x~q zCY#6KvANWV%^`2nRK+e$W*oe>rQf~e`mKT0VMp@#?&`Jz(o18in_kf)iw-usxCjbvS}UoVmnVRO?uAK{@K9R zE_P#KggLs$pyjr*y(X@Hw*;xW!m+%0ydD=Yx?Pe;aIq|ujCTygr(ci|;)35qr`&78 zG`2#VE-(88F367QuHRckY>fUDe3-qv#p&EH-=bcRvp(+fvDlMrEW5;A8Se!>jIjMu zarNNZp&O_}6Q? zs)Q2TZmS-{%Ol3c;a>oA<@mkFXpt_O;Q{!gl~E1LPmjY`O*f&=z5;s+HI**Re}y}5 z$1zVmsr+~OJ0+mNSbv4X$Gb4i=R;(vo&ZV`AAkx?6ul-q_f(D0X`00!CjD5Y(i}zS zsHfH4n~H}AtFj|y33GRua~WZJg6hl}&rNHnxof)rzGn6+iYCR2+OJF>O~~f$($?F) zzM#~-%)Th+^h%cc0@UnT;GfzZ$sg&3h=jJ@jW`)h)ZcL00H1H1`>l1nQUmZ8SX#gf6~u)nferP(H-3-L?(ptxZKm+IywX!Pq%i<{WVE}yA%!hRrwGoaE< zU>5?|5w>GB*kEPP7`eJULSF`AI4GW>*ebtoKq_@hvUfX5 zsd(`WkKCWZKSb(G^ykK4(aVj?WqT2`tJ@_=vHQWlyyI80yV=r@^u=Ufx%r&6HrXnN zj+J?jw-djxY04fum{jayVz~A?fsh&J9Oo2gy_|}1KBDDutn<`31p@UA=~g%Bt=qPZ za0!`=F343u(quHFuqR+bR1AGq0ffINhGY9u-2WFUx$=hCa7Amn zl3Vq<05(ScG4mp+iZ7!x(sXzo0tTky`CH+;F?BYFb$lID*|-NRI2w2lx^p{c_jYAe zNdDDJY&3~2ET)PY)WB>IqdcRS0sGmWj_}@_?!VTGk7Swb+aW)2s*G7hfU_e%2f2LI zj=BGS3vb<=I#8Qha@|e(nDDqW8V7bNZ|gJF8*=KXOaAS_7CiK%vF&n4hV&WL@{i#y z5h}Qc_3bPH0zu__Bl_2pq}gQF(Kxa@9=iZ^?HA%Yd2t;X?Kcz8;4IM1$urT! zKn>+ou=e{Y4Y_w(0p+cMv8}ZsJ`nxsH^<}T47bCJtIi2-tD_IO)Vl{ke_Ek*>}PLS zE+<5CUQG0+@;lNe97O<=bWPwUqfL4B61QLxMDYyoLYG`_n-=%n!n;R-`~JPw#^_<2xIbpOn-n6Xnm-E6-wpzU>OyE@UXmVG$s^x( z=!*||?FjBS-oG#0gXZ7s0dFr>+RbDCsAmAf>Dj{R{ATuuTrO|X&noxUIG-V6TKF7+ zx!#L8{yr~_ik$3$!<_khGUU}q=2UJc&3lX!BnqR~Mp97o3}bmi$J*I{=NPb8qLolK z0{WNc!WX&Qt#B^+G%6@E$P=ER;(Hz9TNxJa=~1(oI=n0shyqzxGm}AMYX7!xcf#Y&Mg;%TY@IJjpaR!{wcMqAUCGC6I=lDWdK& zN#0F&8QicC6utMtV@3^h+vzIkSat00?b_Ku)ET;Pwe0)+&9NWub!c{MhDN(})s~Mr z*l$?%JXw5~a@%-;qP==eE1#XK^XmnwMkwZL9bnyioC16UbX-OSpY@I>d}Y^k(JSU6 z_a@9+QOI9~rd1C^{Y*(}8EA8uk?&|&5xU^s@K1z?x;FLrmza<36MAvD9)Z+xbj6lyfd*ECS=_# zAM0r~4um578gB+BkNmAqRpD%7Za1YlvMar>)R!GnqO%(fxOY8#`GyD1wnT2e1bGy< zPtj-If@_(gq7jp>Lenv$DbZIFUU!t$9V)hQrtT-k`LmC^Gr`VxT^;UcEp^*m;)tth z-N!*Wmmg%F{sgJRsj5D$Wn;51C-dh$UiUWtxm0#=)gm@m-iY6mrG!no^o5=>#{22h zWnXC+!A>o1nAPE}W)$l-o6JiNc!Q^RC58#8da{f;@kwnh>;BQ(FXKrFrhYZK=8 zf6t`e=qRdxUo#YtoD6&99qwoBbD4t-in`&Hb2LkPeaD>8A! zYBa>Qw3!JTF#q-g!kO-pY*?~c_gb=J#{v!)*-Y+8LHL`Tt&|IiT_W$FadzCg+zO_o zAcbQ5#PW!T+Ud?i#9%=klc7Yv12c?)jvI91QEIpSKSq0CZL^M;`yQS~c&_4*wSQ+? zSJbHg)i`7Xp3%V3q&XqGl^w}0%vG!>F7o5wNZn}}u=rJ;duEPO*kI=%%^&g<`!We? z+}jE>Y>d=rqTT#epJYv|cGOqPL17*eYCi8JZJP(MRQ>B&k}{C-0BxM$%r+twG*aeA-<$K=*B>rSu8%yw#0Mxhcx){!rv0WgA_>;{P zLmbWR?)1Lnc2klx{A%sf&G2dIMGAb*7u)30EH$NIWrW`APU^JXhsU$nBLXV(=l!nt-Vt!kF7O#6V8;< z1>=h!)1Hs}Wz(y|yJNF7xLg)l3{24md)&VVm9l@&|FNW}<iuAdG zo;S8QTrWEX%?m!Yzn-5#*f1~N6#BivW0`Kmco%2_fBM@B`(jY?4q%NC{K(t+Q_D#e zzO*(Pt>nYG;`X@Q{!+^DZGuA5QZmd%o*55eb=7RGWtD|1)(pVqE128z0CKHg+MBGX z_+^_VkS5?IhXQ=B`t^!Bt?EYumQnOCAZ$8J{9~|nT~1W?<1qhD*f|6iLqX--g{I%x z=Og)aaX6frc|+V%_QaH+_*Lf{W+iY!w0Od8*=<&JRB)SjkW%Y>J~dU*AhS@B_f<>b zoD_+&-IYmXRsZakTrE&oRP|YH7fk+ltIE75e7^F^Pt(`y}+siRX>U>Q?ZHQC{5J~NgtQ*JN` zc1g0%-?CAkp?v7~)ayZgd^aIoexI9Z<@(Tno&2gLS~&7KbiHy!=dpgg+*f<0bnKIV z7D<4~?T>JX+2TgJf7NP=yvl;7F81N0%;x+1zjgc9;r9_%CkFc7p!6pR-cKwBlY3im zhg}zV4zdcPH(~z<L?Cw$5|N;b+Vu zYfzf_=elcL_ncN@%V3-=X~NrF8u>SWGUlXXKbL;YP`<1Up42vGQb&qCJ5}N`h+XkX zYcg!vdt>RwqRW*lEesJi!1Kz-X-)9m-s^qUm-a3YK|{c-x5*M>|GH>&6)u`&^e%7n zPY^qZEX8|G9Q*#6=uacg@%ilc0Q!j+lLn9;;JpQ^oC!B@Z%6KV^8ih6?ENKjrG0bR z?e-H@jVqIjb7S_P<8AkQ?N<|SHzU51 zL#xSuCezMlmoidvlkRQy$FM&S*MkPyo!-NfQEj9HOQnVhB7^4wM~OdN$0H{PQEZ*q z9Slcdv44xJuuyu(<*%zA3(DG%8OYrUH`!}ht1=@ZH*xg>7wAmTqUK78zSnqE6O<@2#p}unx z7u)Xi|8DZO4xk5Vc=?uH*zq9K-2L#lDQe!^{9*da@I=bz@zB}AO z0AyrRl}z-Z%AM<+`f1ahqmPai!U8f7yF4jDO`m!U0E#pT4`AW2?L368`!?@a#Kp?N zS6}mFOzJ!i15Jx+rP#(vA@gd`2Vy^@XQbU#ojIU6$Bo&m$5!Y!lVnbx&Z&A{s*mnZ z{mfS@hSIuE&BBp-F@>CVO;*c$_mGM;J|FYmLD#7xbp;B?IR5Vzw>xhHk{v1!?9lu| z+y2hp27(+m^;<->R0BWwh`xyNRn`Gk?%(Vlc>b)Cw9DyTO?q;$RS$XC3MAGVRi zW#}-ydu59d_KsvkvAh@{A5#0XCmHLyGK5^%*K|4&CwtfS$V=#0tBSY!ckFhJ0cFyvL83GA|Ng%wN`l|7*_163Me!SKDE9>T- zd-mSvJNvAA&ffdW)z8=`q*R)h&IsbbHOIXV!E!x#49o5`71 z8Y)PYmE&Yipu_WHd{O@@D2y$P2#vilbD@h475UO`T^SN49zgkd=rZO|Pjl zTxmR(F3(0)AzzDL;oHUNG>n2xJa7i_4aI2F7nhq7sHkaoeKL1)_ic>PR$5qkvIXJG zd%@LANxK?BA3^A9yoo9q zs!2M~JYY5k@3CSA5CWh*Ge@HRsZY~GQyGp{FUXs@Yp)L6xWmyna__Fc0AUzkA-ZqK zgud~id6<2dC@o(noy+Tn_w668wt?=DkQ4a**51|#$Q2&S@-TGOuKW&FniA5?5@ z#)xfoyv`2dGI_vfma8s|i%TbhI9;nqk()0py&X#9y$OvCq1-2B7LoaqFtM=D{x`oZ zQ{SJOr-nY!!3rXKL}*N9w=we$E}$G~>s^q9EiYQqo>Xo(e5UCS@%}CN&>_`lps5O% zsq{uW)1IVjF`Xr2AP?lx5Q~vk&9*|L4PN!vld2;qTLM*+q(gF*n}(sX%1z25NOYZ> z5&eLE$)#l8=f>cmFhy90pqqo#9wG|u>D`Te7pv38GcP$dtQ#i{fY!~BB)BIC^)WLWzgcrL zd|y^}a_zG7c1A5EF(^ZDXJ7GQyNtP+u6GGeWC(t?5K}Y3^<)&??Lk_&huGQQ3~!<3 z*VIvTCR5;^S6Djy8lz*RYNi3aV%{+a_|30b9vJZMaxSurDP}3R{`9(*AVn@*?m1b< zPu-WccUOJNna3u=;CNSOrDMJAa zMxad88=?Hh0YO#&O7UUVm@VLCSp}@EgsiB!Qs#t4Frx%9->Q z>l@h(jMuP<)g|QH&~T2&$lN7cGMKU}&2Q$H@J(D%g`*{8Cu24>6E6sSI<)bL!PV}w zv0g>@k0zr>jcb&i>0J9&19V!1&Ya77=6B@~D=lp}D37=wsPtMTP-K0(aZJ;50FI6C zJ*z%TtPkv2c(663cl@L+vH55ef`#$M&tGh@KaeB(Itn|Ek%$WO7#gr#PlMGwO!b6w z0^{N+s)}kaFmdn#aZY5G_DHS{Pbh2U=0q*sLt}$0hw|6&d?_f~q5d+5I9;)*1VT8P z&!2It4d#+M!1%4|OChgeZgn&F7V@-K3v#0U7U%?NCCkR4fFmHe!c4gXFQz}qOf`^Y zrdI!qJ(kCe(DnE87My)n{g0C#)l~_TS4D14QrKIR0sA={u97%3XQ#^O^Uz15q+eSq zHFz4QN>}xH<|bx8EtzOhc%JPqba{t;$%^%gi#px}6(Z4HOs5(wnIvCQuIU%h8YzW9 z4k|G)+F+&7f^pfjLgYFU<^owkMi)s?#$>0L4CG1$zlLX!GsK(0C_^L-BLr2(EOC;S zAb|9E)P|iSthg2T!|rTb_8K{=n5c+fbc%B~B)GPzdV|X!@A%@Iles~RmwmpJwi`t#my|A)oFyss4|vU>P{BKhZHA(JdSqTFWRJI9zjrcrDXN}B?Xd`2X7Vn! z&~E$lfkROo`yz*sLR`gEy$+bCTqIP8(6w)1evQ}r_F%r0+#ZN1V7`r84g1|Ur6q}h z@SGc|Yo(ls(HhC82Z`LBfC&ETeE-4%hMb$w2Tm#R*12o5eTb@VeujcnwPF7xkOht+Mf-#ZDSCJ0<6;M<(CFJ0d+`@J{b8 zdZB=>RFf4Kh?U=xanS&_)ye5TOC;yhUe?^vFTN3{0R8V*{F$$^n(zbWoGyV7-;N=1 zn%@G_5A0Uh^7+zy7I@k>RPzz75;-7TI&bKLZX6e|FUcM-RVac!Oo0a=@u2TDyI9+= zi!skl^v{D~>s&H>cE#+a&iryPE0AmapyqZ9i`=1q(@;-RdyXf6}Vj;T} z>|a&g4Ox^{ZoM1*Yig5TeA5~93DMIKyB{B6Q?+|Ls8H%7b|U;<-DJ5;SR=LA$*Q)~ z#rCRUqrYv6T}D5Ti(c3TW(~O6XQk71d$!LJe{2gIl#&mYMX0-q+($FpJ*L!@OAxT6 zcUc-XUGZgh-8MQbTpUtOzWi`e60pw&vkybD`aF<9uuMVc?0T z?9;M!MBz=&S-ArbJuBd*o>hV6)W_VdznGY0P+)8VV18-!l9^*uic;>+DpqP^@bQVB+DyxxUTs zM6~TOL3rg-&60}V)?^F5np_C~gkpoI3;`kP)BKx%F?Gm8EVXIa#_P~6<}eLG`v!Pn z8E*5$EvCCdqYm=}q9E!r7;bTjF!GZF?+4sRw#a5G1o?6)E_?$_N!H!Dhl8UsI1oL?U+3lci+esd&C-3nVJ=T%ahI@JLMJLiFDqW8QdpIGCx z&8zUZrY+cop_Bll>{Q5Rj(a->t1`%YoGwZtc$%f&Kl5jQGm?v7&lWN@VMz2Z&##C~TuqjHI% z4-Xzu0~Uks_$Ooec{)5~Z2W3EsT0imp6Mg?{yT7}WNAP>xNB+-^Ik&bxTzl=9!Ey4$I^eRk Lb~sZC^@{r^9<#-% literal 0 HcmV?d00001 diff --git a/docs/source/_static/img/inductor_provenance/kernel_source_1.png b/docs/source/_static/img/inductor_provenance/kernel_source_1.png new file mode 100644 index 0000000000000000000000000000000000000000..95e850cc89debae411d0e2e5f1b59ad6faa93b65 GIT binary patch literal 287510 zcmeFZbySq?7B-HEQYs}Nf^`L3G00GoL&K22 zXWnz(bKdoR>zvPb{r)|yh4VNw_Y?cx_r3SNu4@yjq9k)4=P?c%8rpr?*V1ZeXxMNx zG;~+2yTCiHS3elg(C#T&zj&b{`{D()inD`-wXHcC+Uw8+ZA=~Y9ukPY8Vv@P^mDmi ziulol&*iY#k0PmAXt43+BCsVpA~}kG>JbpIvDV>yoxVjtzWJRs+|+|T{7yoGo0H*o zZyNsdB;QroBR+Lu(c6?vSuPBga)WtsEY)bOZ3mc7S` z_`qvSo#bq4N{hzUwzJr`MUJKl)qWDMce8y{@>2e3EH2t}Q|dwd-aANqDs(gl;@9SP z(C~Aj1u}K1HQ~-7Zf}PrLu?WRn|f9^TCAd_MK(BPRI)0RUj8C|LmcoIvigGZ2X>Vm z3I5vwDw@TmUFg@JZ@5SbmFbu0A8duJ#L)(s5rmi=GFLT6qn zm6Z2w6lTbQRD1(jJN7~A*-}~H9Y?3{NBe}$0GYm^fh`E4c&FHjE~M8t{_pJgsU3}l|?fb zMYr@l?yw~?-6IouQ+)2|M{@TVXW@5iICnqEv=m4batM;2QbaC?i!EV)iQ9{I%ou)I zBc^D;M!srJ|I7dU#|vc7nXp6+@wjW-ZAYqa4-%Xh?a?S4wFU7oPe1b3OD2k}RP(SF zs?Pd_|14m7_EyH}>*sjxhkjuLzhWM{Kenf1={4}nph_qqqeHGm9z{ShFEmdAOYc+J zh5Cj+X&*GnE5+4eoy5?Y#x5kY>563D?+)`8t$_zwhLayLk#0s9@ft%4dE%V&yz(gX zqP{nwj})S>_H}1)WUv!inv<3aI(Ffgfe1e=3nqrgg_EQ8UdbGLdJ~J(lu-AdaL&Vx zZ|wbUbGS#l#TFc$e zW%T(E0(+Q})c2%6Vnn0oN+Bwigp)p`yy4Knkb6$1b7%4cOX(fk+f{Gy1kqBTvkKk; zW03wZcYHGcp&*Z36w~)7-`RU=EbK2d_oS(~CD{nP+wYjrHND0s{LoIr{8~6R@HPG& z9#Y+icp95%mba{G!P_#C)OpdkubG_5J>MTu`_Up)?`g`osnCt%Jc%k-eegJv@w56c zo9kV@01ip*&k0ud>&We-mA{aVY*~?>Q|xr1T8nj1oyB{ zFgVepxI&{}Cd)ph(ZF}de?TXDzem-?EXCv;L8?~EfC-zCH)q%Slr#5Uj8Umt>5W{U&+k-=^2Nt7-aT+5sEVC^E+*e7@0*jGYmvK_ zPg20BrT-XebeChnOopF&T+;j(}Y7o^Vbn8t!I9EEJg33{#r63 ziuKCg1wV!z$A4O5@sU_**gSh@Fanay8)o02IM?tZX!Hk1gF|5jvLltFEAI~Y)@`(G zC`TIdXG%JTohQR4d#Ch@tK_%*Cwc8+p$YEMP!;dc6aJmC*{Da4+R}fRV?7EdnChCh%jWTnnvNH>s^_pdGAh+z_SmN7ojlHS(q5Bb5 zCT!N&h0~cBDaLg|`eJ-|kZaH}a*4mqy$wufmW5B3KXNx~>{Vl@f|=hk#j^AAa;F&M zJtAYGGTGW5?6^gpI<^O_QJOu%ZJWG&&+hYSCfoDva9zEzuuHVkvV~4WrHLtlHTTq47@Sl-jA4#v&be__s82v^QmaQxz9_X2qPwMgSQuWoRfL!ww!O5q zm>!=ts9=~aw{x)*m@$IB*qPrh-}%17F_U$&dt!Q_c#^SarJRDsf^N5pEp~w*{p>llYW`~FUCLda)4QjXc(_rnQJ*LUW`wsNoj9L( zD*Ic$Uq>I(8uBpOHM(Pj*>Jt*zBhObTfMUVdMrKCzC%QXOh)ecLeE{1R3 zM$Ryp zk2BconjjQPukCD6w|}HNCX#b9N@zWGuY_#N;;5(9Tx2E0C#<3NIp+1RYF68ai%%&Z zo|9;iHf5wdJN(I3q#LWdQ?X;gnRZqAwB}c^&>6*#bZBj^3+aF_R}`*v{X-=YTqE07lr(^kqa$h{lrgL7qsn-UK%jyx|vliZqD-Q z*@R7s>XFw%%hENC^scAsthYEcYP4*1l?_XD7B$IKXBvK(wtc@(RZw0~tc|IGSg@*b zn&}is3rH)rJz1PD?XZ%cNjzN)Z*H#QduwC%NmH?d9}aD`Q-(6xNu1uk2(Ei_SESe7 zXO?+pMw49IJ9jXv*gk2gIN{i;@oST%$LwyOH~Q%_0l{as3w`tYP`qvR?F&1<^J{X< zR=mn5Z>ekr-+Edtn{+OXzRGrTH#1guSJ!rtb6JlmrL$v)u?I&9Q!dQBo7MAGIGyfY z9$%J^#7loFJmRP3Req{5nJ;5EHoKb+aXp_+ndW_B*){~Z;XdmcTNhkcNuMU!0jm776K}n9)N07C~u#2Em8*tT?n?sWKR2k+6yjiMF ziH5`1d%uuo$mOU7dB`wCLzK@`@3ena*GAcKXmyeCX7bA5{PKlE4??fm!N=qfK1_A( zS9z+qxdmqS7TQ}s&^|A5zg|7p*7sAmxtclu1(e408w}4;q zTjc*bes$|9+U-ByM@K^ownoGF=XaEVSJY25@QWJrkJsC=L1>u3{|JF!&}Z~Nzl{z5 zeEZL1bXVXrG)eUrva-Odx|y@NxxI^(gKO`~LJRN)w&QDE7c?{yp!OkQxFxGbzXzOu z%34FmRYy@l*v!F>!{n`lsW}J8&Jp-F8k#6b7&x>ucQv5~+1c8=2!q6E|M-S5aEyAJ zla~6AuejQX(dsCwP`_|+HmBz2;Nsw-7002brWSR6Yay&A{pz2?f&YooTDiJ93UhLL zczAGl@NzgfTXJ#>2?=p>@o@6+umj&Z`!okx|NBbzF8_Qi-~l;NpKx+>aB=?Y+`v##)Vsne)*y3R zU1@7Oz-GWT#QAvyMgREz|MkhgulSddI{zNY!^g|@*P(y;=>Ht5>0<8u!od!>rmOhB z=j)$?|N7xS2a0l{9{n$-_=lbUco#6VIF2aiza~u_hw*`xDlm@}*3!xvz$@TosGnQH zz&}s_@d_NH6D139DaNqYTS>h?>TXJho$#qn}ktOvzqB50An8 z+h#>WXUC#$x4DIv_xn@vuUiWMt^9q%ynl|1jl{`;My#+Cegup7afutDJb@Y~@(WHbDeubBouB63cf; zbg2D04~HafD3q5ZZjeKcSI1MqU6kHzq6p6++;Dd2G)2_yTflxdfvqB-VHr4JkNr)| zx$B8Xz`E$Vkjalbq}!8+kDJ6U_u7_}{m#{OoH~eYYFb(T+nxP;kBCTZseA~(+K=+P zXH!(Ab+&*2C*|E(D|_pm!A_e?sQK^{MLp12?5N~_XkT;BozBA5q3D@`$_;Q&l zcr{VqeuX4#K|+#Gy|#^Xs`rVLj#Wi|TFi^I$t3KBi5 zFOaRvWKneJ`MjaW!UU0LDCdyias6^ zz{ZHkVhEBEQG0Mr{l#c>gres40v9H=0G3p1Dc}{f8wWE(VE3BCL6NxA+>4hJs#}E> zN;NC7@(16Fp|kIhEUrwMKF+GzK6@Xj`nTc&NsU1CJu%a}{VjNNT8GD+)m@3N#;dp1Z55~C ze#^}YX@?6{QziH%8oJNW2dU!XLIIy}9G1j94qz_})N0oE+3)Z!&h&tJdV&$Pf~1#c z-7?~YO;^X`{IDFrhiT!^?ooNpC&i5?v(!H_v~Ijl>lY2PHBH4rSLzNey>)VrQAIwadd4CxkS`$&4?px?#*d+=>*%2WFlS%SE($DaM7_YK&+1{w9eWcna8m=-k1Dr>cbjjn zUsT&1pIG+RLQ_!wdJ+2(zXY$R-B|GIXzY;v>vBN<2G4kN+Q(LVHGy{P<Z0=V8FtU$$%5!Cq86u;$j-uoiJc&uNg7pZj?RkxF&ZpX#B#=E#Z|T~%UhwRbg3re?gFG6RpZIPh$C+DY z9Msz{3@34MM8v<}TyZ6{J* znR}e+K5LBxVuG=R&zAn89FByyXz~XMeE2|1uwHET26cOZI(VvxTrPHCm4G0bS*pRK z=`dpnu%s*=L($6j9qWPZvMx#!AYfExB2|y&<=W@Xth=L{ua8S4=Uz!HlMQv-olnr3 zTYktkardL2k*e0`AA#!-$ z+n}$b&BOMw)1Gqgt7{5yca%i$L$0N9vNfH-Klzd~)OW_n365)hoOK>f(+OEe41~I@ ze##R+&aXWiY90G-93^(5qhs|Ya9eFdl@-~8C)Vt_o>4t+G;_bGA=?L$dE-h1mkl|n zZoXn)JXfXdVSI(&_mb4Gl5kA;TH^dy7IJbLK9zHpt3=UYxOykl#g>oVkJC+DiN+{t z)q};(d}=MI;WLjkYXN(CH?5etJXxIN)bVwGM2VXA5C#n_PvUCBhYyw08&pya^dua=Rb)LP|DXmehb-T@ zIT*>M97sHo>SDSpER)=}R1ySuY-)NuzkHfQlP73l%ZL;VtkYm%UlLDuo76=X@lEQ` z`K<9~VpE;9VESV`2bkbUp$#E9>gpl!igqW6 zFzh%ls?yRI=`bv<0-1r1W8$8gfB$L2x){=UTjit=(zyMu&o*>EQvCeaC7FTW)o~yx zup;$1?im5wdbo`+nMSc^ILnX*$G7>~o9m0KqDzC-9yNLP2^T}8-jLer8Y&V>J!4mr z%fpboxmvu*2)Fa5xW14frhygL?>@fX3zo=E53ZWjWA(FiRoCxNB;ywO_4(t$oMJCR z@}BQPG>)I}YM{ntT@9mTVsBPfq%eyWt9mAU*+_hIRw0a+H0$*Ep72~OWZu=JJ%uM^S}&GrmdfmB#HjR>)51nwg*B zCZZv6hnI@)hbrGw$lJE0P00_Y^y+TdI!Z1w70dy3?UyfJ0IRL#qC5$kRDO9p(U5VT z`I9~)gzbLyzD~p}|{HRWK`4iI{{`O`OG74owT{lizB5Q+GV`jrqDgV?7?_9y;oY?cgntY zHIcIVWEmCkq71{+IB(x=tT44&Gemf4Dy7S=p`aRfJHz<=FBp$0SP4SmFs$pTE~N7hk}#`GM+OEP*XEw*olen zy2xTr(e?FdbEaC&HAF%?MSz8<*_*_k!iW*B+{sC_d8*TRv0ER92!u^2L{FOu9Mcgg zF}0~SaQ#-6P0iJa6+gIQQQ)sA8xfPLGlfyQJF}SK0n^Q9(ZA0qWX%>6q-d2FIrC1r zmTIO~KIeoDKh*N!38nXTOl4Zpcdd3sDI$m6+I2|2t!5#m2O-j`T>lmNdqFTz0wtf) zN+h+=LGu=kx|8Z0rphN~&M&t&rJy?q4d|u{510fD)HE8d=pdUoaC;-?i)o3oBH zEc3I_z>=Dda`Jihvmr^lGsaLtUC6d={UkFsLqxDdD*n(JDw%mb8y*O9s;ij5%sVQ4 zOaFGkIxhpq?_tUG>z}L3G(SQDYaB+}@?GhiPqe1^_<{db(;qno$nlMN6XM|{b_}#O!Ejgc9Lf`V&wEpC6WUwD4j@r!@N6Dde$4=oGu=r3 z>Cj*!zVZnW>znC!JZX%^4(jC=J_{zN6np-FPj#r$a+X!Cf63~-(>K-|{PCc0)5C!t z{TM7eZq3NiTs~p6qvfPfMcRO`Orp7F2P_$}C7tAJ-y}oqs2zwzq+CcI&uH-S5HXcQ zZvcozzL9_l5hc;Q6hz)Iu_E;QK{YuMLoN_RGKc4P*5(`6z29CpGqJyx=CE6VV1o2v za1idLglZS_oVqVY0&&car!c;>WB#>|#r2FTD~@NJ-hCA*CI|Y-n3|5uJsHC!{8#=& zMMvw18BezNdteVIh}~7%J?*8mU#d&51>@v`+sPRTE6!XyIOd%B$f{Mb_t~!VeHdu8 zQhJ~W#ZF!lctR+T8I2KJa?CGYe$&sE*_QADU|WHaCM|d@F6gJk){Gt`Me4omCoeKx zMa&_IwGC8t`k*QbvoQ;*IQx}68kausi8{Du?L#Kc%EtXp@(VbJNsIt&M!Aw!0fc7;tP3W0{W(GmIPXDCuSjL=)oz>bl@>k#3?g4YfoWRVgLOtQJEoq;Odu z3!mK?9 z4?%8#Qdy`rd*#ult~Wu(JXsXTjeSABv4mODAS~&iQb02hY7c)LL;;Ajw z@ZGJ=bqM#G?3tLhc!bba^+BXSUMAQpvr$K(guP4sIHmW#1Du$TLGLv8JAR46XNZ@v z`s(Dj0MPe(bpVf_T4nRPO1S&PO}_h4gEh&#~0z8069Zp*E_kIXhQb{J0e!C zEFR$MA0G&RmCc_Zz6Y*ae;fg8voE_}21MTaaO}OB_t5+dOUt9V9M1}rqwLBL0{ASzv z;ZSh^2<%q;G^8mr3m>`+>uDWyg(3PU(Uft3~AWw=&zRrMaorrkboMCcFxrKW3d?G5X0D$-m{0(& zdv&^)d1M2GN3IgDNOM+uW<`t2og#SN!Ak^0Aj!~Omq)aS=P`K^N1|Mw@l}Ze=qH^6{#X&FY+zvc>Hf#phA26kWmHa7s zYqK&C5VRb;F1(_S#ZgI%dgjgLdS0cRktTrourO~+{?7jKNaBMKWBXVbm*&9J08|CO zOZ50PsB}V0|M;amdY(@F=S8ZkUozg?g8+s$Bi(FQCV=D4+{CLJk{6LRL8!g~wSgD0 zD;|qBAUg$`xQ8=pm7i1@7DWUY(8{n}(FAn0{!i`jC`9Mc?bU-!3pI9G??ViOAZevDfR3u<_?*xa;wxXGMwE zl?oFDg5qLH!-9wRq+1s>#KQtXsFG(C#y4f49*{n(B)AHw*vKr?RnK@gq#F7-n?-x% zHX;GX8!DkEueUy^F9a)f&WCaxW5)8>?^p5`gC@2Sm19ahdzXojEJ8T*10eCX;_9)`_u}=LC7Kd#BF%*MY6ZdF9kN}Qo zG|g(pU+6R9n|Z2soeh20cm@xTa~l2bR2Fj~culAu*&1)hSF~rQ4tM}gjp5?}kl|=M z=WngXEQVo;dWt{+zygH0?QzKSq9VBI;jgSH#+^=o{}lRm`OqirGNzwYOXbDFQ#jhy z#m7j#=2v!2RW>f6P!ds_-J0b{SpkXjktlef{`p+Hp_3ppCSVw*7<&rNEAfl%3iT0r zqd4lPt6e}nGiPZGZ;xMCjua^_6_KNXz~ga@5ub}5oRvlS7KVgNFcY!MJ$#r3!1qjK zgRjW#G;}ghTT?G?$44vB*B$F+4}De+ae&gOjhZEN1K>hLA132))f-J%_Q47OwpnVi zJOP$g90TRNR1x(26MhM?IK}Tp<1CDG%cQKImSOmaspe}p=TU<> zjm}r_kK-8LY!B09r5belzZ|jp@?^cy4s^IQn7vjCRfKCk@O=b4t}+i*yMFFF`4J7T z%1>6kR=66gIq~FvivEHrFCGZdTYHo=p7bKLkx5=nIIQMtl|cM5a35Ed$@!Yp038(G zq9?OX+nA0I%+svvk5CKA=18%zfk~>G3rAiP>l%NG?;l^xC(~h3FOe$8x)` zS2!Omss{*YjL5xe0)MMmwZzU&5af8k`!eJdpmi9TrmCk+ag%2KGGI`Xt<(m9V%P z=9Q3N2(S}9j<&08h>>&>xHW@tCf>Gz?f2C39g8cH=CvfPAC+8}BP9@lcQi1DB!k$9 zk8nDtZ|wri>d=Kkt@SX)naLDXnqcC_h_lbg&iN28t0Pp})Eqf`Py=nyN{Qn!hmY44 z)DK|LKwx-gYf=0BQ(Y7;3ZoEC8PP3(fU@1(EGU7*I_k3$Yw=<2e~)dY{H>&6pD5WT z03?+`IG6){&!)fBul2N|EAfD{s&y6>z|u?oety90FBptMu{u2cAMwO%I16VgESYxf zD+c%j7c}(dN)OR7MKXJ&M2pyRTnAJ63f{b9(z{YXb_!0bwn~x}eYTvsR4thAfa4#k z%dBz|x#nnf#u@`TevwCP=8!cF3{^raWWw_*wYe9OgC4Qq?0E-(K=XB9w#x%5DD_k= zBhS?)(9;3qt4(eBIG?SeYQ}}v45HeV1ug+<=u~ z_Ir}v!tS`b@<8=N2>^@ELy;7UVn@R-ZuBO)=G{ME37; z1b7MCpMKIyU@@o94Lyu&F*79a8cP4_Difysq)ty2x%#RumXqY*QwtzdFj6*DG8ryBa>td3F60Q^LxKxQj(f zMz>fW+^3jOXF_=BKRVfNb1 zG*-z!^#V}9&5EhjI|&8cQ7BYfOwzV$Ld(~^GS%L;x4ptk%iKKebQvfd5V&3NT+Nf; zg(rNmjtb_!*5pVNr(z^NlASBCgU(nW>C3Q2y#gjWknhw0mm`RCEBxBx-VS?i~rYOK3&`i=an&WMBma2!% zPBNVXH6ghjdaIE;CPuk=P+1)L)`8tPh6|dt=(Qf465o=mptEr`8s_elT-Ks=0xaMi zaqoSc59V`}^7N0=;=AQMd)G68JC`gi|G_8&%}H~>;LIR#y)xi#Cgs%X8b zHe+4vo{7dQ&l0wh_)s!bb++O{^dfYv(ye63a-Vmy5vaJhNPl<&i3LH)_dM4U07iZZ zDnDG8xT$TS`H<0G==A8iTH4x30||h^wT>lm@q)6uRdw-N?Xi1~DqcrMzYN0;J(Q{0 z!SO}zxq@v?~_J2_%s1b&}mKcc6pyEYtJHJ4*e*R4g?DLH#r5=5gYZHdVHJ=!imO4 z((fSl0MhIW?k(0TkGFbOrIzV5whvVIm*Z47XEKNO-|dlxdG^V`*8DynMC(+!F7nF& z5!QRxj;UC*@RA^{84!@-wHJ5<^i*YAqvEliTPF^)l8h$nKVf(lrk%|~RGT7#{mzu`M-1G~ z-L;r~pFTCLI(zJ$rCx0>dC~LSn&gYKr(e=82}wiK;TI?Rts&!DKH`PB>t!k}trYGL zaf{w*TFjPqoY8(3= ze{iRNe;_65!ekN`V4~;CKG+3+NM0lg6o(U+hcejtM@n8<#B2f3aAI%oQxY!DFH1hz|}Q+3GB zJ-*P~wg+4V;yec5iGpvDU3k0i2F5rw35d8?9@NV&a9MkP*{l%oHZSNUtCy5nHM5(e zBroz2*hZ!_gZuO*$jaU?Yy)U^2Q;Lb%MZ|WJoH?gJ6xw-u*q%XQ%v| zZ}`e1tZ7_HKl`CMW0R3!f6rmhnE)r^7?p-QS&ZdmLfoykFLuyP zr>=w+2T<`jAI~%tUbOf@T2L79!&e4XbRO;^KYhPWZ^px_uhb_c<$o zzD%_VW*W{OyUU;xQ`7EHmTyA{mc{Y_v?Hqp!`2i009T5C253G6H!|V5HFDcJ7L}%1 zm+bPv7@YaUum(TE;p}8 z7F9?)e}vRyH0Ki+(swG9g_c+|a1>#X8b|qQ6R@Ygb_J;Tsu=4#jgqF%i^dsL9c_D| z_)*J#Jq$@lf|}wSxXNG6u}?2a@(2lKqrVXkYEas+8gSwl>463E$=TpQt;aT~8P%rs zDLm8iQr}I(Hom$~OCsKT;sVSdSL<-@qXCO=%+qtzC~!#SQaL#q%om-+iJ0ZMhKF-T zW-THes{DG}d-QT#bab<^togc=8?~M)l5vObXk8gj!ej&6eh-Vm{{?9gJ>W@OdGfA& z!lM->NBBdNW+}Q%Mnl9NnAEVsuq3POHL6mb1&vk|4+eBh+;8=HgKTjdqKj0BjE=u+ z2LjZUt<~_%cUjNW@`M{HMp|r6@0q@X%A(R%921!5`jbs?ot}!95wchNbTTraz@jO;E#AlWV*E!`VJ?;jgG$V zp@0dLZj6SgI6y&zU=k3PeBNS?D`F(!S$f?qEo+jWlZcbjLVt>wwJEuOI`5(kRzzwc z5&6o#?|Q=_HZ%OuG(?JPh-{dmn9ZtZW16M{BiFrNx|zgENiaJ=B+w951_7L>pt2d% z-vBh02GdA|u$WYq#f4T$F*@E$`TQ(Im9evDaSPC?F{3Et!j-B7TsceVz}vw!;u6ry zXsU3IIx7Z=wSXLjr=&dlX43%o>vFBw+HiF;_X0ZMcCCAPmN!^O}A;U_h?Lvv*xL`ux1k& z#d7kP7%*Iv<=9pYMiPeuqNqAvHfFw+3 zX3qE$PU@M`+=7mKuaK3+JLKhWK@0fF?ieh%`?-ewp~KJ7301NKKr@pqH2RHJpY`qk zP!O~nF;OoeZ!Y|9IDK|(jBOsy*z$Eo+hxplvq%0`nSjF6e=z{U@1!mru!O{T%4)R@qUV_Ri)=6uIzHzxyy$+SHjka5>rGUbVc)03de#SD>@DkD<$B?K) zYdldAUb96uVMqoIcig}IdpoAZ-CF^mv7bUf9fP<(J6MLBl@P-Z11+o3z*S(LnswdYf37OL!m_n~+(4R9t-TOjwj(}JR-eSW>5VgV;* z6xMBiUn=k8=yO1nwmmQbNCdVDVS=f3Kp`^-_Z;Ax3P3IA3}mp(pHrXx@Sd@c`y z=OF_3A*TRaPO2olk*#Fv3@Cfi)B5f@DVfwX?GmV2(>Zv=>mmw;>F z&Kezmz@+Tf)-?nz1-1da}& z@>>ZB)4xB|jPX6bLqFd8pEQ5^%)ie1|5(~@$^k2-ZLiC0|NWu;82Io{$XocLzgH#x z#kUmr0V~B_U51GL?V%AVKqC>i1e*VE=~Mq|jt@O3E0t$bmHqvp+$c9(!+gH?|1jtO z&CGDk09H~wIix%J`$H`NH|)Gb!}`rV?k^FNF%7U%4;bA3_lI^W0B)E}h4$m`uekp$ z*1!1pe?0u(V*QJw{-?wLE!N-KF8;Sz|Kl?Ii~ar=S^pXb|5IY}|2ML>9M%K!-ZnsY zF>GlD)R*^$0KN7_vCEP%27o1%eQstF+mJQ+5=esHM?t#>aInFC*N2%)J<4EbP_H_K zCs-;Y;ti$m(a0XEPXds=3;eq3==VYQf_d%o05I-ezcLpyAmcyBil{eg0I=k9cs8y) zN9|i9Z6*hI$C!f>p?0VBlr`=y1`KMI;1+;XOV-!Ul=Wr0_h**jLUn8Q+GUZKsRKVb z4a>^laS41qbvoqYw{N z1Jl=HmPbjt7AapobpwyeWeX_#YCQn9(gp-OglKO9a#n{YsIIf7i-M9SFF}AK14^MJ z+$19}YgC`h;jh9Pc-RsO^d+cjdmzHL4b)~x1gf25>GQ`2EV~mzwLs0#Dt0;_C*lb} zXhqK;{(Bha-H z2o#gPOY!PT8~uwZKSuyq@8pj*IMRlnca}QI9Vk+>g;(2EhV=9v9Tis{dA$-@1iC81 z%au%%0p8FnGFN-Gw4Dp+Kzt1Xr0n7{ZIG3gZbaPWQLgd4j3MWP?_&snEirIR`g#DM z(xQcBosaYNZH7b1&DB^`iUk|DSU=BKCLAN(55p;t30a+SU=%5($F)J@%^DOF?r ztGx}2XA+gWbt)xadH(!c09%Y%$ABhAtREmvd%Vp_*P)l+|Df{G1VviRyN)R?0Yc6}(x7Pa0fNHHQMr%lO7-4x!cF@?zfS-lni2LnE-=2| ze6fJ=s@be%tKTjcn>youjPAB>E<=RtgQD=cUZKxO_f+zu`W>=Yz8_Tsggc&MA5fX< zpCxG4+}?3O@y&)*!3rS)!wWSI7Wdm}VJUmxn1oTn1t(4-`0;$f`SO=w%GC08+M5bl zC}kc>Hx$n0Wxr?kF2ik-dHPwDf*@d33a(-@aVeph;iUpVYlG0^6MD>7V5+OJ3xI(UN(g;$(5{~WE0 zqV?NWhoqYqV+3dKt;L{mPsVl|=|AM!yOxY>l)uC;etpHd00??YE5NU0nU_&2L(vlL z4XQ`Ms_SI9d0SZr8tFsuMSa_{4viB6#VN_tvs0k1H`@dZn63u^6W;JRY(B4TBo!&p zHaf0nQ#4^F5n10`WSiiOC_F$4ZjyxiWi(CBYl&-v-_w2>>fE& zAoPev4I6F7a+;?DB$Mp#Rt47Gv$Q{!-;uScXGIL|Sg6f@j=M}xt;}Q+YRs~|5XdL( zQmuB?X19tF82M`EWe@#&$gk>?qR=&5YTgUa>r)B_y#3|N20_%$VR&8+YLQh7AyxACwxEA(s-9A zpKf*3xt*T^;FteZHts%SP6W_abL_mnRVtYU(~V+Y6|w4sp(?7k!4L4^ zZdhZUe32dlcZ`vh1s-;FN2IJwJ{Tlh(7;#tfn*4v_1Z{xJZ9z{C9im68=)Yx-LT0< z^5AJ&WoDj>S5U{mYf5)WHbyRRcyrZseZCd`^7VMNw&6ZNbLS{0Cq2$O19}EDxS&oS#`l4i+>>sJ_{-*pRU#E< zQ)G))q^90eGl~5jF8EsjN&ALn&eK8eiuCG}sX?J{p=B-}u&gZ^@2K6mh1EZR!6+A! z#J(@e_exS6uz{(04WKZt`R9DA9x0cyw3KL#Bf1 zn@a(OIm!L!tINd$PcftGBDc~{O6)(DC<={e&W_VO0#+*Vq{5MdqsKozEm}M z0=)%F*66CHHMVncrYfrbbgP(EGj@ANtj0*WzTxu>;BS~11>TTEN&roO)|xp$N0LqN zxyxqW2CFw>XsYR?>beEffJ}SgD>mOTUq|!zl)2vZ?*^|-@Mw%qHM)+A`DA6a5wotp zIQeD$lI%C3z`FCc-!EYO(xvc|Pu6Z&c5Na1O0;L=iRN6hb)#TpT~2|QQ|zH+T#bjO zxCGFF(jhv55eW2A#{}w#erg($QSP1B@4fhu2rh&L) zcSF+#yDX0+zg$0i_2#he@%kCim^8oaUUzN9)5N`735q#P0%%t!8O!1a&AScT2KU3; zff|}Owxu0we_V*Nx-4jPzJV43WiG|$_LYt1X(~&tlMpbf2Rh?%>}SC?4=prqzs?E) zX2gY%+(#2uB{bj^+e)5f;53 zTNB)I8rA)Ft1cPq(^ZCmOpque6>hi%*>F8AlhwDX%d-~lECGz_s-V9hsHjF>X+N7A zcg47~zVYy;&WTKOF4L=QH!fN9B7P|Dv_G{SQ1lSJi`#kSFH}JO4D);0r|Dm}U#C%B zjV3jXyXgB4WCFlvP5}~NU?&3gkzYiEHXtpiHaEvf%*^C{g5-A3nf(#kw^+~LFcK!q zg!ZyrS=*54zKx0Yb;B>es{#U4Q}4PCfoOyyL*?r_Y6wlY*8yP0xKlM!*fCY1P?cS9@sGP?j_D+%QKC{Kj^MtA^fcpugE zeEq^6=mRZ}=6n8vANvs{_-v`Pm!%-!lK@Qct#Qy%?J?=w8=x~?_YBC~h<1UDX%Dc( zGgN0b7S;K1aO)xDRMde_VyKCwr%eZlQmuV}Q${OF2is~!@1a^4eSj1h56}{MpDhBd zan#;*H^9EZ@Atr$(wy|+knK-;JpCmJARq8-K#3)6-$5FcvDRE+*WrIKh-xbnb*#xu~ z2Low5{o8^&t3zjs4qocr0biDY^wr_|K_}v)=V*4i3S`N7F(rqQiXCMDzc`XK_I}|=X zY6UEIaXSN~isM3WgZX1%3J^fNj&1a{JDXEJz25}nfj7^+l-*66jZhi} zw$HhmfX6VXsO4*e)A>}fuv;$=o(82_905tI+2D{bmoK4Q7QkohcmtG$MS6fm0CzJ- zN-&n+#qUkLHMRv*O+PbC-6Z98TTV!4$2$JF$HuZ=M1lN2G-IKruTr%0ibejck0=_C;SN>zKC==FMD@)L)Trlm*m2W_ z^3Vv$hal;0H%7!DQ@)0BBUYJ*&_v@gVyw#I`DAwcm{O6@EbMx*C{+s!3GD_b2(bxT zZcT&J=Z@&}DXzDv(ntwQ?CnCNE?f=!W5uQb`tx#w$IWwv&kpz-@CFaJLUjCi%e?iW zMJL5x*~G_tB13$7a&0e9Zen9_?uzpX<8FoAgF#qJ-z&hE*xq!kYsyG99)g-#1v0sp zd5u~HM_CUG=7F5RlJWU>H;w$40ig2sy+9Z6BL`+K$X*lMa9~sXnIO-Iv7h#-}9L#Dxfg5`n z1jxhvdyeuoV-NcUZ3db3&45$2+7#Y*5KhoEXjx9{G8)`a$y6vgk|;bHviuaWT|^)n zv=ni_QA^z`mJZ6M+#TvDM77H@O3x!9>ut`hVy+qMz|2V%>4b^n1QNNGnv5an2_Oi> zUO7vrHsx3SbO)x3^Yg*ND`;H~C_&k~-uSG8C=kI>pjZfYHf2jgeLitA%TGZ?=se;s z$e;m5w48sfNx1D$~+0t2G+p`Z-vpAK9FR0Lj#fKAB_AW{dXYU6c57m7h zf6LAw?8MD_D|2n%#qrm1-d&svHobc`PzR~IBeH38azCL!H zhO8Rosu2I6LUB&EY-FdFnddWTYGG0A8G|P8UydCFj29$N$LabHtdt!;4fJ0*!fnLs zW{e-^F{S>xEd(LR+${%tFvA%ax*yYW>s5Z%cNQ=(MS)fV%>`C_KpCtV(r>Mhl)f0bpeYMz#^P^RgxooouafV#Y~mpfs&f7 zly{|r+x-E%iD31Haa%W^>UEig;YgU}{ZHn64;~@-U6QtPK*xYbv3U>zwl{U&ahU1O z0GQI_+dtE*$M%4%^7Ao!9_Z;=%R7Mez(OHj_UkU5AF1Xv=6fu>cJyxjwSxv*uDVxM zzXo5;%fl}v+LC=QiAZj4I#T4eGSr2v;CR$i4SzQLmo~u^`8egb&;~3GN>}qZ)ExXp zceMQRY9b_^i8@RDu3&Gby@WI~8T%829P!?zklG^A)HyoC>FQC7foVa>HbVbrIY)=R z(WR5}GBZ7wsv9W2(U7!xd}F7ixl27v;umpIa9UW~EuA9tB{A)wfcjNmZxw+D9^spHK0sAKuVJn zF_=*Atfi5m#7%MjT=vchb3<+es8gqqZd|SmY)4<<0+ruGTQPoF@OwRgZO~Z~OV556 zzuv&94~rl!slXy8R!+hqnr$Gf?W4*krTY0OD#WO*yA7hDEjV|X4bPkIpAk{cDe3n6 zU)RG8Ly@^2|LOz@ilzj;FW>lZ8wWOJ0i5!iTI>63C{i3nWU<|!_CK3#nB?~Cx!m02 z;O3lSM=%93hrCEF;F4l?1pU0Gk5Fy0a_eJ;Fnd>%ijw6J-9VOU_R-3!T^$bBv#rpW z$vIL%mjD%e-k!CoSs~Qe|9SY}&9QU&+49JaUWyRRPhP|pjvuPkZ_Ms_%ZSrg=R_Dt zI0L@ZJ;o-=`nQ*#7G3AzAj^Egw6O=ssI(e4im9t5S899HF8sV z{K%G@D58n~zg+w;Xs-7pXN)aJ4OfHZ;9v6A7o9)jK4nY=8npC|e#}+hj3C_Yc^f;| z-h4*OI1^a+#*uO=9>B&2mlras0|sk*o`1`wbn;F2Xxw(yB4=h3$*=K z9flc_Gp_?h)l&Cq-bs7SeC6zP4t_K*bt&tl*r#?KblC75t-8n7y^Y~}qZh4cE4Rmj zHoF{=CiFKD=jwNCF0F)_lX-0OBzRr45Qd5_zxyNOS$;Jb--?G`7r zIP+%pA1?>IOCagxkE@d^#gvopkq4K3uS8TY6cEEL6}5_*Q~HiAEP)JqviCV4ed8F27M$CtWg7l;af5>16KY_h?9acV1^A@y~t@Nqlo0 zngOm+gK3ra-QN=2PSZr>NzBYkbne=^q{!fEXOxJwC@lDBEne&Cm`T>g{ZXl|5`K>+~KF?;J9s7KGU58!< zqW9KPL8m<+-SDFaO*Z2>8!2a zX7t^bIXcYYzX|=s?~PN-mTQGN-}ZK|1nq=L3ErTG+F1ta;N}+QHuy0p-k1X(>b91D zfE?OA8BH@CHS}R;9X2w&<2nA(*whZ{Q1i}n@O|~=GQi)x>`gS`f3_iY75DYwL{E$G zCPn7qD~F>V#ry4sA9$pN^=IX#m#aMwRQMrEIK~I!`MF(naS(fH5&jCS@O3ck0vlwC z&A(j)yC}lxReV^QbOP>h-pk~rpUqLLy>yGcr@N(CfVJ@Ad_;dtNt13JlX*QO%HM+Y zZ~C`5W%y3`rq8D`;s^-V{r%|cdVL3y;0;J3Up&J7Z9o6Rr`0j~TmMdHOOs%F<*e9} z?9uw-UG;0^2k-kEmv@S}kITRM-6DC?=MlFw`|UDN&B*TN7X3J}A2GT`Np3M1-we+_6;O<9=YE*Z73|dkmFu$HnaRAlL3@AuZntmKU~N4{0`G zE-M_F0|!rWVt)XYtya`u#+zT-HpIQH7G5e4>n zimlP51CR!{epr#MnQ`Hg4r!e5nzSq)xD3|^)vSYRL^9e-9sJ9I&Y9{_eb{Lhi}Zoq z>&;J7OwE$M-n&X2Pn(v9EsM=SD@igq^FP0$Zkw>>OFUgq_W8v_Mn2cRKA_q>3dW<< z5ag%v(Rs)`2uD@ewkgFp$rXKN8WXnoNVvqyd8f3xOOm;TTdP3c&I%(XTZ0>@93q77 z=M&>4<}v~-zx~An-8o+A_++S!yJ6f?CZ{}_rwu~=GAt|7afB6(XHVI?j7K4Eu=8(i zti9$=NSt=$)>a>Q>-TJVYyGyC&5&$Z&Ar_((bZ9iU2c!nN#HSZQL!C;`&8>Gt9d-7 zyh8KDPO`<%RgyGRwLa|y9q#{r8<5-qgEBGkrwT<)-H`$81E({zu13Lt@Tro^Af^_< z1S_=wHfCc(>+*kj&An>s%6^5N`FbZ#|JdjTA&U1{bo_9Y&Qz&P!=VH1sLMK6*rt1p zvY(`JNsfU-6`#KM5gTLZE&7kk|7TeUW{#bqAH|a%vRknZqdNlo2=(RBzjuQhL_Tg;>W znljIom;N$;Qn6Dw|B)r(#f{Grr$ut;Wr?0a0|Y2vNpTrc+ZVgLswx=(*@LuFK6pI(#B1ya=KoiMi_wEA{Ty7Y3zAE@!MeI z#!kjZ$#P0s(cqhaZIzmJqqXAQS_aowll`i+RJf_}*V{JOdI?PHvw#uYVI2_X7A$r+ zR{&-KYZJ?Vggqx`V|@E0A3e6L>T|ZtVf44{lx>8jr_fPLvD6W4evI6|cS*jQ@YAM%M)DLFC6M(I*i-pjjOVW!S7p+#c8Rfsj%JuK8O+zN?3wgP zKjLDZHhCr435ngVU&VB19<)=774*#ZrV1jZv$rRvlqX3wKdQ{zbpcdF-k^MO&F5<4 zKT24;=0xooCkZfkC7^Kn&Z6?>FyLF9poRED`t!ywUeAV1pqp{jopjWOKvs!o$|Ln< z?LBip3|7n#?c{pw=lAT)0B;zy7p#~ogdJ2a)>DUc8u>lJ}*QyW1t zBD|rCfw;vU0LRkY3_Fyk_yiExAJjyxwk!`??!ri!ln99$Lm+$Gv$-^z>xBo+9fy9Q zj3V!O#rl>*Fs@|giUXOS=jj`BO(cY9w>k9-jlN4I|BQ@;PgmC<^VU-Xb+jfu&Y;q` zffB_BwQl$)ZWOrWo|R3uSK-fG8ujJ+?M9^~o48TD+o?+hZ+evL(6OKloN&@UR=d}b zn7y;i>Q#zK;tqa=hoJeZ=M0nuLCc__Y4(NMV{z)8kB|@L2RAvUftXewLS7135N=q= zRElVCB=SCTSAwC!A1I}Epm7-j@JkL^3W$eYYM^q4^HtPyilzI-jq5+q?OC~~u-#MbJ zuvKsKi*(zc;EWrR@|a!=%kW`oOzTgb)XoerX@c&8Q~@&w=$FB`wbkb{&0_$<7m=nz zJ7(5#-Zed?2FlHUG&=jbXnWz+X4#@qt_^h2VrNE5Rv9SX!s=39RTnC^JY#2wCNdx{ zgop73TMrnf_l4dl*X1hzs*mKW;XX5>S#9VCh?S!ud83ySw-d3-<%%c63H}S-NpEyw zU)rh{>G{sdSF=Beir{{tiCB`JamF}leI7l4f7g#i^I01q8FftBF@`h?X*@7dIZ`P5 z`_eV;^)Ug+X}2-=Sq-9rp?0HjVU^P|igrNL(DYYSDwFRl^(Jp?nWp9c$6v|lFW@-i z)IYqoN|GM$L~#=F62&ofro0S0b4kUU%`gnyt*!Apf@)hrZky+ya!l6AG(M0pEWm@k{WN# z(&Ru`ZsP1~AaI~aLWY~zK=Gvpac=Od^^*c~C2vE};p0JB>N*)<&z6vIn6R(+jGMop8#K5%+c;6ohpa98=;MxwK+y#uz!v9Gf~Zty)y5tC4D94o5No@ z6?J9QLBim}-_v#*tbstQ zT&>NtGC0vFm5o^j*|(%HZ)$rAR)?dW!+$Zc~`p?Xkw%ox;|< znoTpZ!W?{-E@Npf(6s}B?c9O`g9w|6mb8;3D@TIo&9=m5vHi-uk_ zS*T@Xk&f|@LE55Y|2`GrYrIooI1s3KuHKG)`X=k6k7E_~r2V2Dj3Lu8A3Q&G`LEz> zl)=AiE{9(JGH6<%Y>zk^t}gGpww4a|ZywOs5LNw9yd}LD*V3N#hrxI& z_cFtFVWrc0ARk%lI*80-xte&jzieZn)}&99cfZH8$}aD3PmZ2XSGvxaW`V^je?HNN zt|lf6)0D~jG_+O%U_myWa#@+UygR4CQ7JSNKvQ;p_yBR~f%V{nV0PWZ9ddH}#W8q# zJzu{Fl;|V?0F>R*CB*+@?YN*q%MIb$!G}Zc*`~QaB_Af8Q4(H3@8+(T1SK_Wj*FcI zO(y&voaHKeD&12u568N}2s*7DMM>duqyy06EYKA!Qn)W3&zB`!wranu&;K^@{JVA= zefu>k!AR%@R&xAw>^Mxmv_;>%#yYTm@2QPez~=6%mXzzDc%^4iixyi-uRbgLSGZ)o z9zP!FNgw$V`ipk$Gx=da$A{eH)RL_T0CfswM3g!Gk0}BQNDG*kwJuh6>#4$!A?}(_ zNQwL47~68k{D_D2SL-DxS`&GHIB@gpH0WR#Wug zFT{5PW=@kro+Z$le>E;7IhpsyQ5A_s$f_qDNymPOb2^4wX&nZaqtogTt(S>t1nHCaSyro5$b(_& zUw*Cr@071rxy>Ea2=H6ufd+Zw3dmR25F5oKGBtq1o8&{Q4mvsn5n&St;ur7(VN}wg zfuj(;1klZ{p*2IyJM*(0(kYlhcn7i;(QUUmoMFr~Ta6wWUtpWo*5{na(cW1u^#Dq# zkt;mI%G%fT$-6=;IW}^moC&u;(xS@tn6KS(%>v$_-m02P<-#%}J1eG=kD@XQ!Q)?I zt%ZO6AjtTka=KDg6BJgiQwT%*(*O=IRKdzdmncM-pw3B&x{jcy`#qe10wiZ$xmep+ z(a9~cL&mR3_FTp<7JKbw_$^nHj`Edfh2uB~&5nM@>Z!tXeR-(usM}~E7w(`3=j8a- z^tk)7Kl*jkH@?&BmitnD)~7h#)d=)InMbe-4Cu7C8_i4Ye9VLIB(ky7J*AdtO~$_( zHjG0l6tK*X$iq8dd+ark9^4FT@%0o&e)l4U{J&*s4|Qk$wMo-FYj3RG#D6v92ZJ41 zHJ08mUSy^Y_PIV2^jBzXAWazY8$1quMm#7Rn){W~PEzh3Wzfwhu13)CnoH3TO|fm&-)?zseT6l zw`Z|7weO=p@xZs{W$`9(2WA^dkF$@b>zZP?|0FcKLRM8by<_Dj8@Us1l@)JLirR0E zU&wc1%ajKDLC2-n8~XWQNJbmXVxTq%o1 z1xmX*H+0;eU`|qt9rtLhx;=|4dqaBaHtf}C2uW*D#jMQT?XL7Pn5gDm2j*n@=6@b4 z|Kx7ZEyT%R8_$F8&d0P@6Z`xJ{{S>6I5*xgZTf@ZOQ|P*z+8GBU2bLNgpzh0w3U{D zZ5GyYXP|TwX*TNBl|e~)Zq%Yk`juLfMv0Bu18`=$_o(>PY-9H^kX04+6YhFDWv`q> zMdVgHOj@4~p+|Zw{EGW6cb_U$FZP(o4UuH#_>KI|2+Guoa#o?axf=WwgWHZd3k{f_ z&e!ZER5x2C;Rcwu*NstLKOzGNZO0}-$GTh^dlcaf;83?t#Zl(;03xu3Rc@Hvq{0rP z*&XpgX+$8Gg}W;|we&CyK?dSi`$7F6&|9T?nkdyCxzQ%_@Jm0+%(dGb%E*|FLj01d z;hi@I(*O5F3u{?o8&q9R8cWN!;wT^b2hoj2wa{JK&+VPB+4IyII8Rt*Brrr8@JeMc zUqo~-jy9i^3br{Nl_Fy7#(PXnd{fRFt<32w$nN~Z-`(LxQ#GsX_tTec0a;LkhpNT! zG9(@6KP|PN(-MAI>4@S2W{9fkx>Ev0-srmI&24U6*O`XIe&h`xBj0;A#_D2<&voKL zeDs-fJ~|rW-qTGBw-kV`l`vRsmK~~Kl+;}sfU!^ier=Puz2A?+B+5b1Y{a>_OiTt? z|6@ESKyEt$_TO6me8H4GL4Q;v+574~v83csr1kPO7r!Shfsy)*Q9lJxpY{{Kkt8-qEP87Z;zk+1FvSUe|ErF92JP`K6O%iHQvH2?# zn~j?-t#Ad(?~{ruL->{<=W&HtRaS1UN^FX)FvBC|e7Q7RoesXuWbf%eO~j-4ZQ~+C zGhgX0Guz`#`{Ue^f}L=_7~x1I(xX!L_uI6wHB)HqV%}=`Em7^FvkUT5YT%k8>;n;D zPP7kf(XC}aZM@>w=~wL%(lo{i1o9Cf6JBsCUVEv39LLve1N$IC%67qQR`?kyD5$r~ zGqW7kdFkfMphxa9D8G>dZf9t3VS_v@j$ha^0TPMgnF4THaoc>zpE|Y^K64Qo@&Te| zN`S!4*FR-=d5Qt#*Y_`!(WaGUK3|2va3da#+qiveQ$y+WvT*;tFcfW$*r|U)~7Dr(CB`O3Zs0t~I}8wV{nf&Kepw*}DYO8<-!+4X0fKnrwp2%pG#-*L;4OARka+w&$4l z*1os7G8l7@uz)F)6N4#RCW^wW>zJLGB8LbKRDSV^-@iKR=f6OqYvwjFn`TBo*jm-1 zJK@^LoVqYoDPfDVag_cpx{aFj{u$OY$S)!Z{d6ZmKYl+C5f*4lwUG3t^ws9O0!G>c zuTwG|<#@$o|8{~UWRmXB$;wX4PqW%}*QW~j?R1t#C&PD=L|5zAHA7C5|GTWiQX(B) z58YLY@Hc_0y2aTL7SU|dqL=dhB@3c~27^`4PPGj(`FeIWJ4^sq@Gv~U%x~`{HbWm~ z?N{QgphF_!QQID`{L90($HtW)+o4fTlkU8~5)fq4|LXD;fRzxIZFq#l-{D-FH>acv|M9tQsgFEcSu^+ z%#wW0y0Xf}3~yZcXiEc5v*~inxMa(DyA-#x($!r#he6HbVWmNnCyqxZ%3x_{CgTX# zvFo|p#x|P>CF?Rqs_vLH7jS7eI7d}Mv^aUD9+OWo8)l73T?aI_@{yZx39$C9#ro2P z`nBewac0h;Cg*gM({p5DqRJ(r`mcNqUij0O9ilIXwBdrn^y3BF9DHrDSHH7Aq73^B z_~-9Pw@>$H8S)e+g6ZKLaa_Ai)q+9o8l-`j<~-@sr=Gq4rUc3?E%DbS`^{$!ZJwjl z1(dsB4#Ur_9QPzp#>rQ2HoeiLnWs(E>Hq%cgT-}<0<}UrGoy1SOE82C6vHmWa7ZbE z(G0e~8qY`sF*5*j{o^+yUV!kWA~SRqGF9E`_9Iqs)12s}><}}4IyiAKUu{FNOL1@I z!?qb;JQ0{Pp4&mHd7V7SkdqPUOTtnEKKl5o7c=J6-4K%T!$3+dU^UhKO2V{D_~=Aj z&&4;$TZf0;#Apd|Y*ZNE)#~f%Mo%QY7xE(o@}|&}#~a-tR;lR6I1hNt694vHOT6BT zwY)I7=Hh)o4yh`52pPd@Ai}@dH1#I)-pBg)EI&~3ID8P)YW~_AW5S&Sw z9PHb+jrHw28mZAvu*77#XkVu^jSlB81Hz(5=_zf01oUgeC6$BbuZ&en)HuKI7~wwD zv>?zm>IHnMe@HFcc%GFC{(o1omd?rb%dpt0+(?dzEmG0Sa25N6mEhQ^k*KW#Ftw1v>tTMXO4HoC{zw+BYCLn0OWREyY@I9$JFDgfha zfPWDkC#5$=?3pO_>hkVL%<-?EB*xT&W@>4Rs?9Co4&x)$@0_1Hy~^lc%Zr3Qeg$(H z7p_N*sQty$(hfL~c`F2$b#d2`Y3NZ~HfAQLR5in!oxuv_{YYi0vA z`1)F#N#DMS_W$M325k3PheCbe+bD8P^y1J1Qk6po?JELW>lZmm-UNzbK_t4jaTu2F zAYo^<40m_d&p+BKPQy{5a5NqUlx?pzM{?42LT{A;N%KbKHh0Mlvxj|ALJ2=11I9%7_8jAK=q`x7<#&ygQFN7DpKGyhe~uTeLUb zp*8E39p^^d|CZI>Y=y5&eJVkOBGy_B)(7)iEB>>Pp18t>Hg%?tQ;9taI8MAtZQ`|L zax)xId!r9a(ZfFsopu?%YFp%Y#kj?B9_`+ZfFj{QaEu zv=3B7OU#zq17JcLrgcgg!^n}I=8qa?oSkR1-2IIKotSGJk;6~J|Ed$h+oM3knrdgl1MPl!se z)76G%Z$R@7Nl~zIU{`N8d5Fd!W>hv~FKM(W^85{9)HnW=e+|WPz}PLwEq>r5jWcgT z`Ir9ffkv$1UvuxiZZ=Odhq40gtjQsfjK)G1^u3i-b7W4H<8L{b-;o8Qh3)Zk>*Fs$ zV|~9X++IRD$&GRasp9zAY?fIXleiyQcV3%V714Oet7&YS@qArR%iOucRX1CrNl0G? zFM}L;a9X`6@CA`njLZ=;dkpmji23%EzvMh!J9e?ng^P zwqKIHQUgnDCV__4-s1neFL%PD{D)-eJUjp?4kpA>|Za7lHV{F61I~sDKDKkIzvjoefRbpo6`5D6-rQaa%&Y2x+<9!_( zMZt#Rr}a+%PR|x!i?+&JZnQl;>=NlloeO5yy_R*&oIzV(G`%WJDVV>?KKE6Et#waNeUGU9==yOvds+@wIdnc??Y%zE`mUh$2l zVZ(eQ@|vkaRbb57QxFD!MXhsk@J}X9)_(63AFa7)+f*pR?4IFtf%6Wn%6Uizl>y|h z@F{8KuXv!FD?5887qNrN_{aOZPJaQTa?gw7H%-Jg?6_R#TASs3(itSsXS{#>cz7iY z5usSix0e+--@Ok;90HsR^BmQpUa&ZypRUrqx5r1bs(e!sEKv#bM|`l<)~|MXHigjr zSmppFlk%;-dnb)W5)GlW=Rohf{b6YUaiX?Ma;})AyML|>Wp$2;S$Ew_<&czj6e|s( z%+QcQxkK6d8SjodQv+w8Ai1VvMX9_~bk$B3^u9cr>^KGHmyIKyNV+bAB5 z0V@Vm(dnp+#i?p&8H$AbH8y7laBas?WtCo&EzI%HFvdxs71@9G7(Yk-u7c`%N|k_9 z!}g>VN7fT~p!yas{@CfCur)pAdbxne>A3Q*Z$MQ6XbEz}I|qULeXx58!PzwA4847( zHS3P0e|*3NGB7NlpXlPRUMsIKN&~HkXRatTmbmGzrW!7W!8;e@Xx5>rSIM~2(}`if z@(^7n823k-b*!6sn+3+0&HO4kPuFnLO&mL|@5%E$#ECIoV`$Fv6!NDs5=1ql{-8X$ zox5KRM)Db{{cP-<G(D8G4 zjkp+lA^*|%VSyCi^WWJafhM}ACT|8UH*?r$DgVUhL`YcdBBbY4z7v>G;kyJI!~3HB zGfw-wQFod~!gj<#S1W#xxBY%1l%k_Rxip4|PFHxZYhMuCzi@6aij;3+=8|F2?yuHm z3RD0{KHCStBFnex^|UYetm`sW4Hxi-PeA_~z@PaTqDJmOpC*ZB$F=Y+gQeRB2)DNU zZl6I?!0-ocDms7J2So2uRCnj2^?cO0kF@;h=0Bc3P4Mbf$Zo#W#`l#~dRgPS@C`&C z>|2qIVZXY^i~yf-qjqp$#k;Jj;T)qzixDm-Yj|$ALCx(fn0<^J1Xz3KAcG~{D-%8W zOfO5jqw9K==e~7?GUBDqveVY5I1V<(?Qo@*sCTfOW-SBRDZzUl&zzS)61x1GygI4A z-z$IhRc+CQLG=8YzgR!7OBBUQh-xZ9Iy^%_#%~J-vwL9pk5Lw|feSB*FFyaEWU5> z@O$aV(`s#kZCxNk=MCt(BmY#pa2<_mxfXxM^xTSKcyYQbeSN~i8;a5n0U-D%byoPhj|#nV4>t9?OBX$sT9ZfWM>yJMngVH$Eq`b=cSk0w zb*NCU3Sg&G_N5P=C8UY-JwO**t^t+~nUT2iTo+5aZUKBz4+~kWEg(2|LtH0fR^>mj zT~&y6Pu$9Fo$j4?+66mwoBDfpht>OUMe=#R>G)3kzL*`oo2NMXQSO0*Q+nGs9sQL= zQHPN9*HZa$?^KNsQ>&D>zg*9q6-7#kpjQ0KaW}^ab)%jwqd|V!Nua#WYYwjU!$TS5 zWY9{vGI&Fa)+&U4E4&y6>1hATwcSV@eL4z`C|rpQ!yTt*(OuJiowD_^kSl-3l{W+7 zS6}8h8aF zh@QY@8Ts`ayD#JS#1}+ujE~oECjsVT6V}CU7+4eLk?%5j$O(0w4YIsKCCaPz-DFt? zbSFp77cWhiL^C#hlae~skZ#L{n(5l^1b|su-t>33ruxLl0#wb0%wEgO%xqbns=j;w z`F($)pn&Zhkzzd-*L_N3nTim3PluNlZ;O?SQKDp2bYf)V1j#~n){0?_Qvt`n$e@1&W;Pw{4RYyc z<{>S$cxF9b0;+{B!%?kAgAR8UF1e`I?+gyMt%7s8L(F z^~A+$;N9n;>3{F`{m%LEoMYm<>9}^O?r%%v6-Ro4y^JS4!SwrGS21zT7`M+guf-Fs zD0k9_&=C+x=H0NXXw&gm{h@4@ciTr&6b9K`dA7>7P(3!)e@31suy?!NPS0HHymBLD zX+%F?`)7FQZji!b>m?(#57n<{G6dVKr7pkS&PRAD1a^8faZ4^-pIO%5o(&ZZtj*5C zn>0G4m&wDZ|2OhYbZ+xIN&b-AO`QAcIEIeKz|Ey#aYx|1`8!IuhG4T- zBulzqO{aN{3ls^R7?F~YERpqxCXbd=mQ*^gmMmV4ET>1l8e-|z2op7tvLksTRrQgy}LzC2~uLP+cc)Ow2C zuXq@Gcb;m_Q{!6ll>5KBJJ@j5-_TXglkw+afGQCX2Cn5%tdhp9NQqGFo=i1k+M7!DY;j+amOZENn=WfUj`9GiY;#%T~k~kEzUKIESWhHX? zvy3skPPpy|DO)Ar&8I6m8qM_*)9s^lX;>P_T|@smKCkpBKYY32CSubxyWh!{Rq4CT zQglzP*j9goETE2F&t(p$Z57$W0~TjG{*@~!7i-fpo$mFZWknI5`!g(xG)%!KMx@&~ zoF*;b{b%x?wYMI>UrS2HZC;X=NHu~V0l*}y7CD$SdR(bqPG;t5vo%6WIBk4 zi^J7-1~hE6Eo}R#gSWJaAwm*m^(fXX#L{yT09a&eT(gCMqjV3$w&!FW!mMuL$>6s8 zihSU5iRfAv#zVvl=LW)T5XqXu3{Jdu!gL*3DPXFGLqqzt#oKJYKx^Tjl8z(U?Aw`N zJ!pDulg{zTyD;_J6NnFVTd*NWfsy@s5G5K^k;yFCHb7^xWowazU)De;;!h`ao+;db zBA^q&?$X8POg(H2Hgjr%cp&Q#-gUgF1r~c|{h=sg@|;j0_M*n>b?)hJRv`b$V@dh} zwn>fea@U<|P#E{nOf*{S6jCsC2uK#Yi5Ga*vlICz#rl&vyYF||cxBR|)u#mDlA7GZ zM#+Q{qka`=fY_PZ`$)3Z&tJG+#ZrXP(ze3sf^(kr#0?wuH4V-`?CcI!KCiG#N1a<) z5^YW9binHnAW9p}WkG#}3PB_`ly4VJOldp!`V3Y$+8TtN}=tjoZb4nsnG1G;3{ublpw!q=Q$Y0H-}q&FE7md=9wo+%#F&;4C&S_&2$Rp&4!MkgrY zg)rRzJ(R00ABsbD4WB8@4s)`E#GW6?TQr%OB=GLzZDt_U6)lyTxwP6n6+nw%?ewU^ zIXBD8u{FZkV1j2_3p6TM@#&l-XU$KPl3YK$8-2i|l0sw3ZSRP%lKtadrxe#(hFd*_ zbnIsNjv#*p7k|(gV?wJ@b+_N(zY1#~lsfm&DZjh#f){|HZ4!ZSB+Zx6x^LYioSk=A zdHP7l|NV32UfA!snrJ<0{e$K_n9f)NwrSlFC=Y_Z`KV={Wqb4bR($$VEn2=*dZiFb z6*k!CWqx=LtBHiT_QU>`Qe$0MDue%!_-eu~Zz~l)Y_B+%C?;UHgecWyQv&d|Zz*Bp zFFDXmxjL+IrUJn&72jI_$UdN8f^L6_PXDuo&50J4__*OWuc)SO9bT}rkiO`{GDG(k z(g1~}|L1uR%~1BWS44rkn*|yJ;`MyJ0ehC;DfXg~I|}(|&PPp=4SS3vd^G3Q{jXLQ z0}zT?;qLy5cHe=tnyuc zb7&CkrvF@s=AW!mkOLu-%BPf)A;-{_A>?P|wf2A_UB+L=!fxTm`eqoqZ_rVZ#-!C_lcr6!**deKnSzLuv0Mc6j-$SmTT+z!h7yHVFIB)A;w(wIr8n7*9rP7^wU9e z*Bx@VP>z=O{lRi!;QU??1+4pAj|BdIf#LS#b0YopwXVdrdC0J?W-9 zo4Vc<)@!DR7Z5HUvS|o;f)F{}oB5GW%VwJdy5#5^;;|H7dyhV z$`;jW4!YR~%=;?Ont4-Ya<_!!Rif1U>SQD=Y6mku7=tF3;a_+`01S zw)adh?v5xdL#yAu@Q4w=+*14pZv5=qEwM`k$qNIzvY$&P4Efg&3LE_N3!+oQ6AmhCbDAl6MmG0e}7?yFT+_&od}0{BbD zrh)0ysP)Zx4O$?a3M~70?3y{l46I-;o@54=H3O@-2N0?g0D55ppf{x`QZ)a7|5I0y+( zamY)mXnEQ?_2wJx{w0a@kNlxm-4X02>_+)?ai6mbP$rf|lB$!~=U#_3#j{8b~`CnT&ClL#~6lc6pfh^;6K&MK2()gH|YeKJyuo`qMu%UsIVG!`Q7QN z!Id-NrmfbSY;!WUC{@k@YTTGBvpk-4kq?p_heZM?bN6#fO4aL=-&DD6S+jzV!SL{9 z$YFAL$keU&5&*z8Ru}gwVqCZRZwEeZ)@;^^=tcv8 z0WYv#TlqfDK2WqcIvu^s;`%P5YX{g#>uh=gZyc z!avPYa{h7Z%o89rS^Qsy(kqF<9AR#^*;J;V_~-m&(>gc2J3CvQicb~%EN|zCu{->0 zeL<$FqMQo~RL98R%Qb!XSO5$C{lzH#pFTG%{<1DzZO)e4{hJUhxWoMRro<#lqi2D0 zNdlUmOCMpF`+l(Qb5-qpNb&^3XGj;Fa5}GCK1cU z5JpOR^rGCvBAWX2X9E0G<(mNh>cOw9h* zDyRy5|9>d^>VPQQZC?{mX`~!NTBI9s0F_n|0TGpMkQgZ$S_B4=fuU1CNtF^rI;6WL zg&~ISq1*es?){y+&$)M>^PRsCkePX(SkHRa`UP#6BI-`)8GQ%1ubKCu^#c6OxID#V zyI^fdJ|TKk?u~$ZqmFzyVg0Vdg9j?^ESrk)89~ntc>FC44L1F2ws-UmY)Nf6nm!md zt(%-FHyfez34&*qu;AnsH!nI}J|Ic*IG*QuxvId&p%;AHi~MVW*~+MfdjqUO16}j} z)|{T{4W?Q`e}a$sm-s)Mw0|&^T9~DKt*7kXU3)vfcap6K1w2W=7g2`NULY{iWe|9V z+M#=*cY`Eg<}WI|LMk zP-JJf0_Vl((}u_=uNcAU??7@YFFiSqU)-cx4;9duvA$V(b?SNhC#iGD$$H#4P~}_w zL#L-WaC1-b+_lFlxbAjTl)eVC+8!Lw5o-EX>N4mV9eW(!{u@lQJM6{qluok2?XfJ0?`y+}$p%VWPY z*!g(JD^naUJ{M`iQeKPwHKfEoEO&np<5B4!bK;X%G$J^Z77qlXXR`E2{xMO+K9_v& zyent`esJ@lvZwniNR_i_oYsM|X%A*4W^;6aJdTJc4v^TMLMDFS$U>BT4!R*d8OD^r zvrNtTa^R2%-ok$W=6q?3^Nxr7TV0%#!ruFeRWNPNw)*8VmKr|V5?)fD~peVEkWEb1k zR#X1e(f?6#qdjLHD51M*mLYbbiQ^nlsmRs=(v4S^i-AtPS)w`=T#Vuhkoe=R?h2qj zlg+ja6!^PUli_8!_>=!wqnww|6~?V%$ef^_OzukvEJkYb`lAA^-cLOsfMf@xSi+T{ z@%K4<2Mm0IL|v~E^XhA0T2^-fB<18Kz&@;8C6bdG6(1;|(G(LDI3%C%xRiM!-`u@~ z`19zp(4(g;gw_;kHaqRYhF!?rK$KbLuiUsNv7Ga*Ev?!B9~huiy_l7PT(?hp~3ku)^aaz{vd?)d_D>n(6SDj<^ zjt}|Pibp)A<6gk0av?dwr(-#Ah9q|&P+1O?{u}v9C*Q6CkrnrV>;oU(BI>rkw(A1h zIiLwyZsO6<9*L~TO&EtHwp@TprJ2+ZpbQ_ec!u{Wj2^tLAq(U-Qf}3sX$Ct7v?C7; zsqKjFZw;GZZi!Qu$$)BHC?*7))VE5OzGRm}J|jdg1rHS-((5SJ6vg14vHei*X7D7+ z1*V$jJ+gx&#s{!-;F-v;8rrQoaXeW|;l;lmGntzx$ow6uF>W<)xvSr$&E5);}ZH|Ne`QI0bki@!q1M|MIs! z=Pm#}D%mn10y5Dli9_2}?F{k|nPI@zGc2Om2Tla5`fiIHdvg*sBcTM0wi|r1f9I1J zGO!q;kaDAOY;0_6z>~$|=^>O&bqdxK%e{!Ky}1iC3m1TQAhm>5AG-wD_he6}Jgm|^ zL^?$yaov_H6#Kv;b2i!Qa53-gL+H>ASD;yR)1R;P{(WSibV$C~DYcyAoJ!_A?A2Zh z_D0(Yuryt(vjm!PuN^ytl!hGL1y3nH_hra<{mN0FWtE<>7fLm zB=(iceTXttF_&OHhJx8Gcmbi(7{4NI-@rEuMp2#H>vwHF&Y6vZb9Fq5TXPJ&CoZq% z)D220#nd{Cul=V<|96x6!w=dCsG>y#u;a2x3!F}I9(-=s&R4SXoNDCpD2Qs~K-Tjl zd5X*U4ZtM#S8dc?u>(j&2aEUiO?U3T@%ccf`vOM#sQg>J7)(PXmtRg038XOJ zC&FC|fo;Urm>ijOR93}8YB03)XKR{|RH^8-38~;%I8dt7Q->t7Du5*5hvhP9y5B^V z>z?b50O3{Uj)P4sWIjq7{Z{u^jQaLUt9dFpDD>MImkG}j9UyV}?L`tWdbT@SEn)t2 z4Fc-gfdJFl=I5c%wHpb%mRL&`GZW39Lvo`~cJw7WiSDx2Z10O8oymI_^2(QPLUu}J z;A4vPQ*cbGd2Lda0FB#rwF=T(=RgOcLdPUTtndZM?QgA_Fdq$v4Yj=c6Px~JNBx_@ zHSqx(E#%@7-%KlB1roiA(a<`7MyLTaEmy}NKYN<{LZ=brps)t3bq;GG#uI!N)8`2m z&q1y({$5+UHr?BVG_JkFmL6mdB4d+(T=a=PP&66?uCGJXrmXbSNkDyUEw5s&4bJBU zlu}pgj8mmQSqCb~>Vb4hB(CE+R~O&L5EOx`;8T87W5A)3oJ>+tjZAv`G1MF}D}xAi zp3*?U<@W&wTo7i3&o7gCZTPN5JmCMVw;Uoq+Hx{YThl^J!1dN>7nnv*)0un(n1jwL zFk|^4&tG2HAlRcLd0Er^U>!*AMN+#0v9DGVl!^0KizFhC?KEa*THRN72b%?3@{WWQ zEMD%m{}nU;;}rQXd;TA%Dkm#+LZ#{E%&0>zJhTq2Q)+eD^G^V z)e!qNC#wh-$X96`GBRa+(5~Lv&in15>l|*umWqC;=ny!V7(mw@6z`$DaKkSkXTqTG zE%|N_Na!IhPm~5(ppdH)8T8XtsL$e~ii2RSuY0eWHCBKz-}7ZY{;>+Ijp?N}fDT&j z6N=BfZ-oec13lkP$_C3wHvf4L!&mmeYtHq+2t&aBPG-mki`V@Y%J*s4UEXdWxw$Al zZ0MU~IY*5k*J)`p{W~F-SO$9decP*iMpSInm3x!+)rDna(qE?hqZ4WnHXy@dz#=o3KEG_>?vhj1{E+#9&rbPa20VjH*z zemta_Aj*AyBWc|u3mieqsB;fR8mzGoiw;HnKk&HiVq zJ7p3SZ^~U(Pcu$r(PtaxjktHcksJZiI(tZmFy@+~47eR;-*+`056wi2$brwF<}x7G zkoZix@VRX#`MP(hG{~>kLJ%G%yCy)w$OTfT5xK+lET=%c!5z5b2&eHH%1*vH&%mS^ z;L|}akQ==_V!`YJ%;znFpfF@NViv3VpvU;niTWRh<-gh5s2_g!K-|2aa0KMZVktF?a&;feRY205A(DPK5iMGY|S9rhP(JqCIjBb;L zb&ddepqyoCGP|0S*DfG?DF=>Yz4jfm71$mAl2*>dQn0pV?Z%ylI;Y?N6`J2+lyX6` zRk3{Vjwf#c$a3#FNs}=jPy9YfVOs_AvIFBFZa+`X&MSX8=WhmlZ`Ufqdw`*Oy+5N5N!hRtE{iWF|NN#0mrl>8MmH4a_8=N;j+_>d61B;Z5Zx@ z984 z(n9cu#5UDe_l1uEg{PMVCPz#`{B~ei1j5DOGE56LH;k?3R|mIWu{H^W>God(oQD?X=Y!7wsa0%Ao@zROS@B zS3LyjZpT*wq4bO~fVz&QXRkHbuAw#X5UM9t^Q`kOKl9mvRiR0pv~90Hp?;1XP+ade9p#e22aPQ#Dc#8g@^#b)X^@QQ$Z(S{ka(B$=9hr<3OR4jxWKe4 zUDfBT^U650*>-e+!ers0*5(3MJ5Evm-DiO>7gLKex=1WLE; zo&}gNNQ08cH?J=HNc2T?w%-Ie55E-(ajGRrX(_d@2OR+l_H4~80m1^=xU4`1-*|Hy z6bZiBhuHheG?(uBka=8>?5vv3s?M($v26+JAtxI@e@p4e7Y8VPNv+qH3=}_a#+w-3 za)^)-1b=9eoY#}sC5a<$wofC+i7SOP-7vFZsnUFOtFbrb`V60mJ@s2qzG*jjeK83< zkT6&^joD<|!-YUHz7zGj`G9^WH)bklDW% zL%g{J7G~EPK9pvutA*H98NIYE#>jl)e{lX88#BHG#X{#u=0xdKJ6wvIqH{b?B(32j ze@IiF%^6xoHcw*Psk88_Bk2a4Vpi0pQ>C@ENY?d<-Qp56kMF>ze%aV;kNZ#ryW+L` zEyZlVcHSFEr++(M7%F0RNH}(<@O)%S=_qy>7XE)T@_!d({*hutQ*0zADqbYw1-_AN zOf7Xa5N4qP)+=xF8wm7v;0`)ZE#p8&y7Qc_W`xSq8=`|*`OOZ41Zm3!`;7ORH^_~; z>lgO1J)GC{Ww%k>Y}h+o*HVg(QImlZtUc;4sTqz8Af=mARs;Tb4iFE#VSp1m#_x66g zqAcTj9rKGv%12lArY4F5t2Lv@x~ry)hTQiB(x2Q4zljUWEmBq2v(&7iV5X^9E|EBG z>EB?&lw(UyCV_zfJ-(S?&UINhmJ7JIY0PW($V$ z`;NKe=JG@}n2y0#S&9FxgWZg&D^Qr(Qj*yWw-l-@P)hcC*yZXQqfy2$Ttnm{OwvqI z+XfR;Y)@yVRIF>x0}z>oKneB%^%oV0IYl)?awa4etu-d~h2IJz z&$op`X$vJsO;elpHnFrgd#4C^f%HWZoh|WnHP^I?;}U+dZn^utDdFpAW+}`@wPiJR zjBgP;e|g5=UWU%(f*7P%Rz@IHz3!m8h_yeR08WPSO7G|`-uK$iL{9Lwr%mOa7r2tJ zv497DkNoa_LyVOeKla6=kdI@*7{A&puw)s(W5uv7*u!`iDR_nDCQMr|nGd-hqB{mG z&tnOx1=x_4n=reBt&Jv;$8+lM9OQmZz+_CZXYhS2-6sULbK|M2IFTK7Q-I6HUznD_ ziL8qu34Ney!aND(H}}&660`Xb&B6&UHnI8{NIrMM5huYcA^=A=`dBK#SuO=B!4Fi& zWp@iOw*(?keNJbQcU;CrMF4KWu`uA23Fro z!J{Nd=~cYnZ0rXFlnGz6#UQ~2(rb_52^sV4J0)#5n2Dq#Avay#E9RrT-9*qZmvk%>^U&sWeuxhZn+&P4!!KPXGB^0vBC z>2rxGip4SMz0xrO`K9?RDnX@Y&o#sv#=#m ztN*2SDc|x1rq=gn<+V1PpmeTp+ihg%DNwpCnZ;dO-$6;W5Qx^E9 zgI*Nw&v41cyLLuT=q7H91!0aAl>#;rl-+%V%87-U*T=}s&calYxn{?{3p&Vm{tF1? zy+OadE~wyosGez&*`dQ;020USc&{#msaUczOPi04zky}{S5k0<0ElF?5Q%Zuo!gUI zyFiv&b54)J%bb*aOE6yH^68WU247&8LK0Ds@~_pMe}9$;Qc!F(T?GfpKFJh?H+rIl zC%>|f`W)hRvO%FNyxBi$>3JQ3cWx2aDIYnJ-Z7z009Ss(!i^BXAWw&j;GPIxp=r!q zTV!*8z{E=1gnOFLu^vnU`EC+N#mRWZQ6e&#g!PSw9P6d1Sx%YcO7@2x z4?5ZfYT)Mt4pdd%mfXX(2y|<1<~i)ZmU3qVf-KjK4XG*NE0|>xW{VmzyVLyw3U5CH zCCODY^7x+MqCJko2aKtD7i+v+n^NJU{nSju*Y{QT_`HkkcXi!!w-hNyL{2l>g09jl zJ+JE33yqeLYHe>MDea%Q_yAEffR{NIN?^iQ+FR)Jo1%Az1Bh#XeTw=8LvA54yzyG1j{iN@xaN z?W}&XI4(h!Moi7u!3v9+N+x_N@q+9}q!vYOe$n?LYe{qK0vdIu-YmW zgJ{uGH@TL|_gYnbOZ%VqHvEe89&-9GN~~H(gvwSsUpOJrlVE>kQ5my&UEEiZlKb$rk9AGL5eZh)zHb0S~s-3ca>u1cl`H? zJYwM+Sh}sNJnQw@Yez54{ijw+{%#t?at;|N{4($j_7ky$m4 zXD?CNEd@-?miVok1vZCkQHq=Y^y__FX`z{?I*pW4xh;Ap%A>M54Bjynz|4?R4LjZlkt=dKpGrRcrb=0VUvTC|?H51$3`10}M#n2s{fO;LJe68e z&0p!K7t<~Yg-bJ-%FPrt$XG^ionEc5J%6<2@`?!x$$GRL%IC^?io)xT#^4GymAAX3P{{4AFhO*L zu#dNL1ep=3%>LxYPT(GCVsk*vz59#VQn^N;9xyuOl+?afH8jfLV%tK~M1NHdzJG?T zOg}1BHnMoGITim=o4#%@oy}7o))DZ(S^(yOw5%E%znImcT{X*qmWa}D!#svWYb5;= zNtOmNzKBIJoK`?wkz>7>1mA#D7*P~Zq>T6tE|!nqyf|qdZKqCF5Ht*vt@PfQ$Hca6 z4Z=D`$<-VN6CFoO?-Fa>L!b&>b&D4Bq<{Dm$a9+w5fDz7sV!aCMyd&7z#JYzHWkt0 zvK*^MH80m++Kg$7HKj3M$)q#8oNrI01(U(TgjV?HckURmZ)t;4?Wd1Dl80td2|y(^{ua zwCX0+zwq6kqPGnkdh*CpuCixCZ;6Qc!T9%D-7g-41YV#ll43e!KY)APL7u-joGMa# zCHE($fZf00PToUVHqTsNn#Dkk%x4wKGJZzHf#E>1C0~1A-{~`J+9%=lXfVCP4G$Kr z%ex%Ze%D{&YjR;_!!K~I&S6?(KIjeI6>7K*coF4oH0~dcjP%A+wzo?!$$XSpeU@C6 ze84v0m=phi4s=2O=fWxYFOR9uiZt-teHY>?o-(o#6_TH7y)dhQcuV8TMdf>594oA5SYQ{C9qyJP}-WZVb#pqdLje zn3r#!B$!C~i%tZ=??+R87Pf=xFw;BwtEd8NijWuknZH_ob@x zgb>;hL(k}@jiQS7uNpG`Q?fufi6T5J3L+KxrzWg}^F6XHh zknJEq;PCO3qG@1Q5AvE=UU3O0ZF|s4sH+dEDwZC$pRT$QKLDHh+h4Q3z{{GbVAq(( zAfPMnYaAGG)`*(YD`-Gu_u+DPMD?lRTge3>WXJ6A1}DmC`c`6*6>z z=<@~Yk)GACd0%M6YZIyMRg?$fmBtOJr{unL+&YOU6zjmy}qgP*)O?wRBaJX4dClw#_8-bNvzgG7B(fy$UM*{8ayLaTsM^`GvpK6l^ZaPq+!5sdY!f2o0wHb6=L-~xnB9uNk`c1i=(3~#=0o>X7aohESD5DZkN1q!u z+RylGWiVj%O7!f0z*R zMOa9(I0>FpE%|*GOoS=*>!D>c_>o$;hDuht3>a*%6 z7l6NhI?tDQWD|NhBPPj2^x__9G(Cw}EnUJ{1K@4oo2eg=&Eh>e{06&~;|VZ77!9{; zBU=g~Oaq9k;z4G8)Zty5h3SiA7!6~h56h=La%9e?*_e%nzygv4A`}u-{Ir@1KJu?# zb7QmZL#53Ewoi~Sy>IWuohu}D+Z4%W*sw3gf}Gw%M3+38991>%{c@ zW{wbdBtu!#eRQ1Mm@}QH!2me|gy8OWf1?-Z(d~rHezht35FHo_NyzN+mH&m88gtBR zUoL-**>5sWW9)?#r!PhwgQJ)-Yi*bnKXcQgv-Ijx1Z8HM7V(T?Ke?KWBvE^o^JLHD z{iX@lw2{>6YjZ+><}t)=8N)ZJ1aCI3%vE7_gcGBXQ)9|Q3TxlrTcsX7kaB1|Vk}fV zeAhyg*Rl^9ZO`Av=r@{}4&XnC3b|YTofZY%IjYN0fVQ*~_Z==W62XJkM=+|d>H3s; zKX=yq2m~*jH6_anTfM~7H96OlF!bGo(HB;I*^5B>nrn3od zAb(5pwfraedE%S`d!i%Vj1{;W5OBY>=$%^v!up8;+yIeoW_+HJtp@6xxsDaS9d&hvh*_G3Qc4MrF4bw zLAQ|68#F4cc`bJzB0MC-zJ@!*;5EbFD@WTK9QMUJyx%B{xj#xSFcV%_v^@Rj#dPl{ zOGjolMj_)%_3(q4NY|=cZnvVL8GuoW%r&CJMGg88a+D)FGX+nY&LA~m)8V0X9L;^v z9`9cLV*_I$a9_!<*;oT6u!jdtet;4J14;jk(dF)iUCWzXMM`ku6618tc7smlV{lY& za{cUOC-Z!77SEFGH~+%Srgy507L+c{EsJPSd`Emu3LsSsm~ut~Yk_a?^9|eT0yWwjp?k)DA0;*um{cFo8xx=ycy7#^P`%K+Kl1&qPPh^|aW4D% z7Zg%M2Cd?6P*$u!_p(Pxu<6Sd)YAQ~<(Ir@NW@d>ug~;7yafQH#zk^(o|U}(WE1Mw ze35Uh7Zq#3E!=I@%(Fq!s>2Axx&LhU0{)#Dx{W7?pzocOr9CIa*OA4))<7$Ft<4*B zK>Sq92~0IgddM+9`Y_?OEg@H@_w8G=Rc|rz^n-IWdfxfRS;B_5XkBooy2i!TzNc%U zZB>@$N1>3&;^=Z?L8=I*NR!flse{}QV?lmCR^la&bym6V{ztd@$TUs;G^4!o=T*Ta z;nqiRsq?82KO=RzkK0;WC{>-=f~vd>{c%i+Ck=+^-On!rxi0d;blB0lz5RZKUAN{L zTy>9bZkmXa;N6U}ocTXLz7O5(USe--lIPZ!c;=H7ZbXd-C+v##H74e7D2LbP?Q-gv z2WB~I!5=*R808xLs3wR-|J`ZM7Js?ybRe69O)s-sxcE6maR|Qh_YyZkC@$Q20E>qM z+b+G2!w+LgMDjkeFkwHC$fv7ihK;*bk-@=KX2)!&2C6VbhJFy}y84)@J#)yE)t$E9t(m zT(vuyiFSP|7(>D9O4F2_pz)bPGv)6hcI%ozu@yFag#0v+UiH2h6Hsns80rb8Yf`$V zO`QFX2qTM(UsM?VrPC`^uV6(S37#E_0_Cbx^N$r@df)@=)t#%9VYC6*q<7QeLJc@e z%f)#*TC47WF>7DHxVv#Ob5Utmf{C8L0)-{&s;*=caxSyl$9XB~RSJ#hl`#9!XZ$pZ z?z`{BZC@zaf3GEW1}rPO_Vm0z)L~sOOMzgau?p6GLM;+}QFG&P+@o=*kuW>Uvt>@t zqm*x<>WXr>WNwRWV9Nzovc+koEr#e)judcS2zrBi8=`DB2CmmM^vdIBlQ_s@8?2>@ zXQGiia{}H65AKYUzr@a9Ajl|;v(o3|sAXgfj~2)i zcEy%54k%o&Qo`9C&kI7KR!J{_`MTd?+d5bj>9ya%s`zq>j>WTfheWAhDzqw@>-+0m z7GR2hZ&4T${e+-*mwQId6KywD>BI{*8fr5IVZt_Etpp*ua&qv{^q(+uI$I;&I@HYR z(fS01tY|};vhfqeLnK4}0$SCcShnHA{moGN>8Z0JB&H`^Q@27N%g!ek`4{z`vEN|w zv4Af*0f{5$sHBj!Vioa(R@nLz8`8qh%JjFYF*rrnt|MT4CEcwr*pD*jX2PehA!R*4 z16U~(bWbt6@8ZUn4zR|D@W`M z6P)`O04|HMI=q+Z4K6ZbS)8)M$g~=Svlvjx!Dr|euVAiYyz#*Xrrx7zuyqpxqZU@_ z;uYW-#fFq+)gvatub?_&>{D~FqmpOIo+9PzhOZvf8a!-@&KO!|bWmOL1dfB|xNEqZ z&2~`Zw`+m|?4E!u$sjfFEHs*AnDrxiN^n|7?kEPWeSX^|N~BpB;n zM0_MOV&HS_p}luMx;r6%|IFLhQI*FbpS8n%%C?*1NWVDK_f1*qZOS!Wx8|_TE5Oxi9)4+3fYtTVZ0jts317ypaV;Ka z(@l%5TK~N3UXYf@mEz&B8%}oY2Jm;Uk*NfGfO+26FHEr>w#_kmdBw6lOktNhxv3G;2B6XUB$=RnCtV5(nY^b}A?DUTJVWFQjv#ZL@W=pI?l-L>8x_+YRe6X5n0#&KnZ8G1U;b-2!FTBL_3}1 ztqoX$IDN$6^1g!7=fOt3Q3_mFYGs(`bVfnTbz-^9n{YM8HX@lGtbub<8RFG{U4AC0rxu`i~~ zUui2?xMzE9I(jM~c9Je*S1T!V_d4TX%~CVl_JIvYE0Vo6EXayLEpLv_kB| zdZEo_G1yo&Y#y{B9Djnrxg_qnUEqZrxO7R5qB77g@vhCADKR;4%)lona|3YC>-UkC zKXATqjO_yClzHHON&9tU+GAxQGKTcpOkO6=LlvZBy{}R)fgtK8QmC#yJPavzPMZbo znU1B^M}CAeg@@>cHuSR}*qgFrQ)f!jeG2Ya7Jb4wPNc6y5McX0mCSHyqcsFSL{1bT zlz|u6i)IRM%BOkA%eA;RB)hnFJ6d-kJ3-r+&hSYFCgAE=X&vEYK5|-T5h25RHrBr8ZNUi~n$Z&fN(wUSN4el&m z*DiG*d%+p0TXpHclFV}=-_F5u)Vk<-S8!Iji+9b{N9^6 zEjm2d%DUqN&&7<`P(g938a_~q9TD{rv*k7Wsl9|L(3kzp-(AGQ1%a?CtGSYq(}BCh zH+GcPn$VM?G~Ao6h`kpeLf||E?62#`x{Id zG}Yf^1{D(`tjHhcFogR5s~qxAX!(Er@`3+cKSeF#2_5zINeSEcdwr9~cze5;=jQeR zk(rPP&aKP$nwgI}o#!Re#<%;`)(2l$^Fh#%#`84r9T;K_aCcr|D?+ytOTKRjC>L*< zw5?-EJ2njj!_?Rvh4uo&u3xA{VToP<>c6X?J)NfKNTS6%s@zsPL$W1y@NC z>kba7_p&jA5C9tj+qRB+7ny3^Ph|VGU3%xBDpqSAv*ZL)!q--P*vHp#S_qpNK-fa% zV7FZmVUMpNcn#c4epX^wQo?dYy5WwqtSrp*RAG*26$mOgTY5oW+`Zm>9)ovo=dZ|t zV33O7;Pyt?MBs0hU&Ji}xQ@XC#c;*TA!6M8aWz@>NNOB`Z!2P1CEbF~|3L*5-?>J8saX3&k{9HD>O~NZ%g! z%-;Z1y)MsJC0R7?G1E)_7E*Yr#asRN8YvI&fU6qfoOrp`$UhdGp&-SNeWQ%%r)`(# zF_)Y`YPn%Z^YcCg4i6#!@net~mf-npN?Jnve%eGhD3zUb_4i3!Xu5V5Z7LKq4icL8 zd#TccjRnVl5hBixU$l(-OAFw?EeshdL%?qq+tRx{#j@4y2e_*++T}FnlTSxoF4g^# z)e542DD}_CI#FicR zZyIul|4$q6&mQ&hf!GWLsr0BCa)yEu&~lc@Q;ue%gK^nW{22qf>la96k6#6ZQx|nf zEt@uzI$UKTK+-MVfv6t00pj-CQNd{m=&YB4zI&Oy(8gvnMWNyjx!~CHl-b8$a)!gi z_=kkH`Kx>!|LySoYsByo1=A`(A{%^i^Bi5cp>AK)Q$1&`5@5Y89%6_86IbZ2T>#md zWy#T61#Qe;m>j9S`&wC6qNtUIQCzPF9-tLEz-O`z`8Ecif@9~d@b*Za%7<8%s{M>% zK5`=i<<7+qCfy=W&;9)m^&0}(H(mPclJMgO+mGHN=)8O_zuK=RyE+HqXX##`BWQJO z3e@>S%q*bd(+_JRT>$L18!fW311Y)-M3k@=Sn2wo1LVJq$bD#OX&_fJ{@%9u_igjX zp#a4ljWa>~$NK);&;Os}w~q!?B)Q7;cPngvjoSZv?T%st!^*J_-y3sW14Zuj%R(TK zm4biO008|y^ErW<<3b$ba52jT!pznR-zTaBz_c}hMz)XSRCNF`aO=jb6vN0dFxLe; zXx)39Sfpcep+*tMe0DVPb$~e_kl_uCvOj{cb7B!3#7zTKYk6CPFQ8W5fMur%=pZdO z^KS*3bc%F7uoD6U^MUl-@BL=k!pV;KxZCvf=jz|RVcN1iF?*(CU)q-In#a7gQqY$Y zkWHOabnPsTKBUQd??j(aY|Nm3bd7-dEN-Y4k@nTEiREqvpiJz8>dY$e4*yM@vl_I*(;?dbFjSUJ?#u-OEcrUnk!?qp ze-Ry`e84;9#%(-tx?3_1v;)IA_5jtU4Df1l1Q#J>3#0o=f!_KAC`5yJU6HqZ@Bv;5 zw#*m-%qEQ50sw-Y0ZiUqzX)>M{zEJ9LV$g{27w1B2FhH)u`~wozeBuj00g-T9iH&R zEZGyQy~vbf_M^@>CfnD}{lT9@{0C{kqNN_wb?n?3ZEvW!NUPfe0W#Ovm=6LWq$QOm z%%FE-uX?Wx;2y@R@Ie0*h}Nt=FOzaewJ;7k+jjWExDO|FXcWL0(=J7$OQYx1tDY<1 z5U2VM;$`H5Hb`2+o}h*Ku}nv<1k!AMuk@LCrYtB|#@Ff*M*~duXf7yv7HDU5ivufo z3A47SWRnpvik>CFCe^|USi^ckXjB2!lGb+oP23^CkbTX&79cbauH!Dy-Ea&-`P%d* zkOjo(yIGYqU9&FDhWHmldHt2NpMH->oI#JC2!Gdycgnvdj&rHlV0Zf^|E9`2K{Qac*A23>LfD7 zsbAp32^i>HzdLtZ0oJKP|6*p4!qDdw)K5G5%v1|)^a^)6sHj%v;T6n>j>gWnQ)3rT z8PBdZe`4uDB-77Tzi?qJdX%;cZ!FAvps&gTZn2||W{K`imq#yaw}JfC_na0$75VUy1-l4gqm_}Fq3w=)G6mBb!^ zGRtH$S&LZbet+8RWtZ<(&u09j2yC|4wQpaU8WLERH8;k=E^~u&qY3~D&KRib(KmbK zDckGHNnI1f^cx}}LKtiUIjwk2=W%aVlOq6397_VR*$&hu0;PJdJ+Ps3X`JjE^ldKW zAP_{z3X+N*kot3IXVf6Pwi2b69U3TrMTH2| zmRKq!`wgJc67|w6WD3;zY+Ud9EOD;+)q{#mII$2p(p&87*4PENC)tZ3DO;70Ae(xp zdz^@f*Sh!eIA{Q{VHd6+Z3Ty)biYD7F8o=M?)Ce+q?U{k?ufu4qIk&v{CR>`T9s(^ z?*-QGBdq&6AIYzo*{?07&}yU4*N$fGjaz@6+|)idYac=Ha}XC~JX}54XkKg`W~J z0C1WsEE$m8O|;O82fL2R<76jg>Ks>CqZdyvs-xEMUt8zBwmJm&2FjN|ILCcwE^S9X zRr1c$G{5qT>0G@yVv(?>IKce3hH=fKLu&0R@zcbKV@_|0Plq^A-HeM>AMypmq79SO zvsk~A{`%Mm^TC-;aGQycN4pi4?p)LeJy~7p#t5K;4O8fsOmqW`ab2nJu5s$t!|~la zgKf#@aSifWFuPKgbwV)Jlnh!!Cp;YDkIs9pm@v1$GCT3=3H*L794rGw&pV4*dbtZ@ zbaQ&#)+6Lp1pF0GM=T!*eRb_m#6~%%2iXO>)Lr75IX62KskZh~Nu*)KRGf)s(6f&9-M%5pX@>JH;2$Ucm<%)^SxSLI<=j>J-SF-;b9kPfvUy}XS-qTxUDEK_Z2qnue=HvzGb8~C?t#2Y)uvID#PvY@1VxpqghYJ z2Rf6LLuY(>vhhh5<)N#2+LKvEFb{~noxgax#E*<;!73NKHXF0FT@4NZkEcO91ODcs%B~?` zo3ut#?3da`s6GJGr&zpp?#a9N3td$sRN`&!JKxQ@)zHt<%I#7 zCuF{8h%_QmTt7+V6-MISlm)T0gi6*HAWcD;-B!9!`JmLifA0v4FLU?-!tnwe_3_UOhE_ z8+!=u@j{{*n6 z0G%XyIhV4eg`5mwNlB28Jd{U)`ysEaQHY?b8l0{(GvO1w|EdWXSou6qA0Kf3-SwVx zP(TE5l2)blU)~l)A#KnhVqkh&h?=hk-gEw`uB4C%;3VZnuG_szj$@Ku+03FjiOnZJ zSRXoy(tfC`Ogzm4x^0=#6w@Jd1U^lUpoY_SbO?BNsRkpSm(DeO`~w#z{Y9%2r?i*Nk=fMVgPvTWuzU3D3P{4ld*c&2V$DR|Hub+8aVcOKaH>&o} zwM{!1n#7}om}P!8kL{#!C~A81^LSH@*YH-|^^#cGEaOY2C-Za`9lHCg&NJ)?6}>8U z0p*DmR^J~W#_Quof;_mak;uNmMXSfK4Y#A4n}9Uujs4= z$Q~PsCd=}-bjlp9^tu!TKa8YINrJtW&!DU@_9`mNnOY=9Y%_h+j!PDme`;#Pl z$Zm}RB-S!Oi+|w1Kanf@dLRiffk()u$J~~4yS#TMk2!&BMf|n^6)V@|rSJi|e7?=YY`*$zMW*Bl9^M_O&cvl-eQO~R>SJ>xXZG=sksYKcgYaC|83!r(VqGy| z0JseCEYELKuT!u^Qng5a#eAnWvkkUTCQSY~4sf1cO3XeM5M^n;rlHLc54)fzjDv{$ zO;ppA%n-AVN=)J1FxCW)C zGfUtK9oCvo=D8q8`kacbDV$p~(=`8>eb_ZW9U|DaZr;;>a|9Yaw78AnmXM6d=0y2bjy=w-UB(0utDdT(Iz6 zONU8t*ES=H6;IX44te20`ZLl7Wq@oZ1v9An7?Nn;L|+lXvIg4!D}d@;Y*UF(9x3z+ zd8CjMuy^l1BW?Zmj7yS`-n1Y;MBo}x+lHBH37p$9INglZtL>?()DF#?-~7RRcCvcI zU%*thmX_(Boi3m>$Db;~yaB0r^>cuVn?vqRoTYWsxtT?7o0oPgWUq2wdJ4DVlDJv6 zR!YbBxHDh@-C{IHht393?GdWJQ)1%%;u)yXd(>ee6B$xbklvfp3k$dJ;OhMQu zen+us#*LS6Te6AJ=~-VY8Pv6iKfXKYx23h6VHWe8^TA6ntIpU@>1@y2<}mH8bB2|M z{H^f&pF#>n);AN$Mk=q4Kbqkv7jYC?YIvWkOdBplQAQZLK_ztr5vW7Rss<%B_Vr$c zSw~k7qC8@blE@7ej#a1uH!a6C6CP_r4GKeKfph?sj^IG^rHAbqFXU}sdavc-dB zGYfCJTy9kEyXy{gL!7Y{-cNbIM(%_ZI@BRfOMm8FU&$@#){(beJT(zg8*(G#Tu^(66Bn<7F1Bv?F!ZH;!h|0y*DvYEBP(R}wa| zbACr$cVF*lXF+m}8T4#&U-?;A6ZJhl>V!sb8+6sjKWYkE$ccZn=(l(l5PjN=wdzG; zO~(!94TeznnpLav+eK^_x>rUDIqTdAHj{*+PxLk{7{lzNt8C`l%$}62Nbw<)oL@C* zG+B)-r;eND`+7<&l1cIJ4c`*-HYEvm+m|LtdV^=uUai< z7PG0F7&h^&n{QQmn{h9>)tZCbAkR(VqXvA!N3nYMd%p9`=k_~E` zvboAi&aHzRa&v{g>?;=DS%5A)x#DXL=xumb44eHdXn_cwlRzjxrXw4x6ds};5HO`_ z!uxuAb!5$`NePiLG8}N)@)_+62-rB6K3%)1yfF#-{ax*|{tg?(TU^cduD6ERhhTuwJLM8OZ}UamSWpKgIXnwFagN#_w}*QosVOZ0Wg2 z^yBMVefA0VJnbL1bqfOyjxO1H6@6%!T8b{J)YtDtm&bDDwnlO-TaHv|pV=z)I-kE> z1;38q#%RyY+P~h3ET*XWoKzG0YU(?i28P*rzv{EcuHpmNcoU^N_7_suy0Oo4vw!{( zaJ%MlW1ohR`}1*iF^8R@OoOiDT!6j?|&Q``+61+hwzCW&H$3D{yxm!EM0nf@sz=MmG(n>#G;$F zR+Ea%!UHUl;{~A;b*O8P-&j+N8Rz#KC4W#^_Q8^? z)wDSYNM~xOA?^T(#rKsB(dA3!Xoq#$wM`d=Et)xo7VRJ^{o1OB-4AB>NCl5EH$8Jn z4tNjRO5f(Kh1|TdV}(fPQx}#Yw!iERN?f0}Vy>P%Up_U8ub)sj^!crv=ah#b%zVwQk}$=Y4JnH-TBG0-8Jmbt#hdpJ$2)i~OvzIsfle)7B&P>OG?vyprXZWo!?0uC}o8$2n z^Lak59cay!<=$TpS3kVP-&}b(UCsIzz#zi(44Ay4r#-q zdFqlqWYGBqJa2U?GherqsT2MgiS2um-$6E%v(AQ z)pE0-!ivFy-8y+uRpGswSF_JBrZQH^g(RZf#1p?8mG`U_frzVIcs-|r0q$i^t(?et z8t+FJ0>3+p!Zv$8reAN?@i}m>Q304#X{p5}@G4T8W63@W@GT~J!dK&N?1xhByljbu zc`bSPrhnI5t>XgJ*9qF6neF;fEJrB$ALqN~29HGCAZI+up}L)%#ZOz#`kxJZGxYTD zwQMr6Cg|uC?wbyq*LUSyJ0-l!Lxtnb-5_7iaOrwU#kvuCWz@W81b@&R{tS<1lkA-J zr^!afi|B2hnqH=Xu4XJ0L+{JIE)SlpKIl2*%rdBo6ahetR~Osn05z zw`n@S(;XK9#G`8gkUj?0;e)w=sPcGfSZ{{)yt z|7`=-MIsOk+qsNRrb;5;k;8UDP8f-9nx0SYp6#}hUVW*~KS{Zu8Mc+7@cbQ|^w{>2 zb8hY}&-V%FCEWJ-QxTrko^nn~wR7k9Xaz!Ft&<9{M10d}UVcHqoP=ydRNThh4qilx z@vZKmXEWJGN4QIWL1J|<#Ud-2BDzTqtVO*F|2VFPk{K>L_Wt7Yl>hCG?gVA$=o4A4 zAohpFF_1^&oYl`uiJCm&qe6Ef1p! zyIuqp+$(zPd6+|DCzzyK+JBAt_yJy@JjJqxVKw^j0YtY3&;u%=ygsx!>7D#4kkPv}+UGjNV*=bbyo~U6Xj^8Ykg~#u zt+sJfvrJN2W4O%&faT(3RmhRVWdzF!Cg^AsSD;Gn&P47)0w1@GO?4Z}|&D=K}{53bBReC+wkd2YY zo-tMMCfwLgChm^>tf=Vcu}4fl-^Ckz8qADRT5$Ddple8NYB&~tb4t!OE8WcPI1YIT z8*r4g5Bk&2>+bty3JVoCoB;o@7#Os9JCQ2LEmS)&Q%jRQRhY4PXV6^;>508FcK4-| z@wd)@-^B*2X*k5bm1&WZ!^Xa1S!&3`9s6^MrMgNn%=M5hkHO1`!hxssM%6bX$=GQ` zNNc{9TtUj@HWlq@NbKwM?B#N<>t3rn;JBwnwF7 z>0q0zJ@t*=BmfdW^RT?5(tEN?VGMR^+=X)bvwh#1m0$3t9`+1qP$AqB{QRjI7!A^; z!&GyY)mDi2F+-<|w(l$i)lwV7E2M^cyQ#T70!b}Sj3WBwusZ0n1~^U|@op#*K+~1> z;bBHEdCmQh!8VULwl;d0(~Gw}L>w~HE_Ou7ENtnfew@1Yk?=fKT+*H5F39#`3*qi@ zZMZi+w*9qz)y#KR@e5Xf#M<7kQztx_;ZGek?KgOYra7@+!e}5NU*Uh2;8NxkE+>l!X5B{0RQk7jg)Qn83TUrAL>!$k=S`q1nBJbQ`{n% zC;1(PkT;NJD6-60q1w44m`gT+!0tQYy9G&~BW63-=;WXfA3T}gG;eMT-w$AM6%Ljb zCw*lJ%6*pDH8PEHH(7wMM*e~CA)-X#uJ@!|Du!JZN0ZZDM9i zn&IE1hreQqSv*$b<<@WMt?`Q6`w%ht_)@C~qx75Y>T3&$RqaomhiY{Sdgo4~T)YNt zBQj|8y7pkwW;v_Edn`2a*&Y04H@)S|9%pp>n|wZk&W%By`x*j@R^uh?COe(nG7DTgyP%+HbIjvRXHnjD zW|pv|)XucB32Cu9QO8mPowf*(JlH8kmmbY(!>Rbf^-AfQwx3ll(G2lU`=(+Z{F!*y z4^&#r%~2o8b653swk03E?iW(xAljNEs;*C5zMn-j`(WKF=#Be{antmKVN_3v$A^Wy zs~7F>*QYXS7~i_h$GP2rE)l9G<5q-sC3HFT%|O5-^e0OS@#zSAd{_TGBB8RgVDpaY zgal`_=&k}Vmsm*A)tldZ?Bxcb#B5-&VAiXaeUT3YxNQ{P+QV%q!c0VJBS`8c zja>uz(9xR;P)DL*4Y>au=*x!$0v{>;_`KMHp&wcsje$W%JKMt6KJNb#*O#8K2;;`A(&i^CI$=pY|BA8=veQm(6s2#-V zMiO7u#M#F81$Y1c?pG6hS(u3=USE4&ol2_!1fe88F5{HO6B5e02r9XoBvIV2;upXS zCCz%&v65wcf786CFmA!oOvFZHv_PDMV1qY!T$FVwScj8i#QTn&*sJ0j+9j>w3@UVl|S&qi+s5%hsGs~ z;GG5W_F7e}g~d_VwmxMx;SBy0NQF4El*d~)P}G%yO>UYFX*KS>v^vVrvH%EJJO%qi zTBb@3;oq5n%j25OKrGBV;NF)c<}rk5znFMZ$!!F@bH@ZCmhsf}%?8jgo5NmzNEfEA z<)a`5NSrIq!rjIE3s$m3d9`p~&8;w__GVcDNK-^sIN?^uxsiq0-pt`^&X0IUlKB;R z3?=raM3EekNvs9uw4znNeI27})J;vPEypN`P~X{gtSc+JSm$e1+QGN2R3{-1-h9v~uczo)zdiWOXflPcTSk7q{AJ4a zX^mzffBPFelXvpyB|??wMZ3&j{;>WY$C63 z6Wv2yZr+GE%0y=&I0HollL$}BN*;_K*D(;@Ty6R~vP8(IWgqb08MgpOQGe+JyMLf* z?qUJ$zgW{aH*z|tjV~E5Vab_&N$O6%Jr@at7GUwtN%y@d+zhW$33bU|1o!FkhLB|K z`1~UwC@Ccp)#L8l*0}=$Jj~d*IBLjVDX3<+-Z?`sO-md~uo6-NRK-clx)5lt4<%7@Om!S8aSG>Amm-62Kz ze#Pu6eX1gC?TaVx&3y>7xW2K_Lc+sxqvJLi8(ELznGbjiH|e|}MZF@z?y&NmR$t@W z1_-&X+=W`YO&#CHn5*Ozb!~BT9>I|0K$wbH*c#CIF}o@A<U|w?UYnEYfFV3%o{7* zH`s(=>xiGH0%OQVpL(0zlZ|G4pxwrb1}aqB*~50tWxILyoBD8fizRHX5v1pqpjLf% zQG0aQbrp9<`L8;`sS~$L6S0SYpH1yGb0`!I-38k5s}=b>onZttW^uGlfDIaE(HV0a zvF{_|B2k?dz@_WV!sI6whEuBN!?dnf2j4$2Lx^5tZhcajC!ofaYN9n-ZAgh|2f6#^$6}{?kKVQ$ihET<(3Gi_ov#Az@3jw=JyTa zaCN*MXj5Q?6BeBBcS+_K^atyqqRMN?BXhaKLvgbcij8W^HSk2uGwGkFupH3)!*YvR zDM6n&eCc1AzNz@xaj=GsU5FnN7$QvKemUNq5rUzjnCW2k3>+B`R(|TK7=`^?05oIW z{?3W5jbY$NOxB{FxPthpNN#kVz(N`6WeVNPho=VbiG2!16;O-BRkrDR2kUD$oKxXll&fEE&^Fju0SaXWf?Rrlkk> z<>5ycK1J74HEr0L6KQS_LP_avYY8G9y(Fn|P0lNxw$w4@oC(mKJ?2R|BCF*pDn6}E ziZIj5X~Zpx%4|~u7R#a!K{6r2&`NC^XbO{W1Wcnz&=!3I;b#5M66&&dF2!lCfj%XwAgv4&rP= zPt7T@Ti`@rJM#ulNTDQQJ)cKujgiyHYeZk}8Ot`2VNZ0Sh7Ax2vc);Y!s{mVP}(`z z3kcA2cQ3$nsINXeo~QBSyg1?;E8ud-t=bu`;@?Vp4$kIcILswIVH@pyDsuQ+c|rO)bTXKLP5-BG8EcFTsix#|Hp5gAc?`*y4BhU5( zLkn`x;pI562eG_^Ki7C-HD;T4T|8jeqjjQJc2~jgU}3>}g>47kxcot8m(7xK!8^Lb z3s-1%pRz_RdK^PEKP}rCZF;Ccquqk_C_GAxA?lg;Q~Y=ZBy-H{z_E>mCYHZM!PUfE5~{}{P){qv+H)Q;qKDipez$Te2% z7R;Dw%Mo#U9VflcpH38FhmSr0!UV26r4#e)ta5sB-RKho+0alqKtDnzZ22eg?3D`Y zGrcuHwz+-xWalyZ^v$`G%w0s&K`lw?33cQ(1CX1(0{0o)nCL9OZ&>~ybI6LSvzeD@ z3+{RcIo{EuSe`V_RG4ubrB)}|dwkqlwg|2XBmG%b@gupCg-9-T#U1jtO=Razbt(P* zZF;pm5L_rQEBm(ujTC3j!UAP{Rs0{lIOpqwZJ;*;5U=Q-b=&&9<^&cD@FpPYlo*+u z;u;^l0q}*SQ7{C~lGyr=o}pFqjyaq5RMP$Og_yf#zRXxVvvHzNRqHh^20`;-{H517 zqIW~I>2W%;H5W`Ic?weAUq;tbPQ8r@f6D}CIz&*b7JOOWj3!!cbN%ovo+MJp>P;Uj z?k(4t34)6d58LXo6sG=aa&#E z0mIL}f)ft3fvno^FWaprK#)K~85#Du##TOzN=rKiR=?9tuJOL5_2W=dLK}`dYZT90 z0zGJ;D5(^pL54zQEMm_|3_r2T8I#L>Ei^F;){>?zHs?NHj^r2`zXU+RUl{{@l0o>f z+r1d2nLwV9;AfmozT>Q+{c9zKZh7SIBS(}hwaOS_{Tek>D0JXO0oz+16D!?9A)p+o zcA^tu(d~5h;0B6}eefHdYnYb(np|(HSb4TGi`|19%}0C+Siw0~;T%qs+F$ZUlg^fQq4T z7v*q{1g2XeD!xG)1&2T`ffdG1hi(YAI0!z~`xWU~GqE*$dq@j3gZNOYpnY%hWjjaN zx1=3AXJLH%uM?I*F}Bn(KH~|_ce+b(qi#h(&)i^ zSQX8Ql5&bWWm1CHKom1~6h5=Vn~_hnP$k<3{(rp}nwK6%#&Edf^4GWlR1AQ55}Pt6 ztGpQ!jJdcm-J#Ov3>ibZ-7s8eqYYV&E8l4y!*Qh71lr zSA`m*qBjsA%$jc4A(_6h$^Kc}OS_$$FZ)u>Q;{(;p`$X9F`86^f6&wNh$>w-5o`(^ zvb3(leAUw`+90;x90y}RM=i0$macf0u&&NvLVF8kM-=j{R9}bs_G=^%4p86tjDq8N zm#{fk;XQ|nj)5tZ-Sl>S+K$Kf;9M%*W}F-lIjK7XV3zzvY1CY z+GnXtNzQ2X-0$tlBNmqw5DqpSMZ85Y(HPk_0oST86jB{Oo%ElEEY;l<#rQ;3j}voL z>hZ5q8mcaCTArmsTF0!a>Pbq&H~4r6%M+tZUD%>F5~{I$j*?HhX_!hLW?G>=OXi+x zF#pZ+<~aOPrsf{(a`pXfxq_plBKhV;9zg`&qA_&fxzLx6Ql29DJFk4>u~A(SM6$+s z^^X#Ut0eN3p1HSKb(_(I(aEMHzq*>Rf3_X~J?=#mP(>Im`Er&hmj0)L_*p2Pm@m zMcH5RbJiIdOI(?rml2BeDD{FjG_*HT8v1=H^a*T=3G{~HeG8H)iZj(jGcT}btjdS| zv+IU#zg{%iEF5?r%nfhc`uU4*h*0GWBM!W61Wz*3d2FnQMCsm8-_iI7pUS;(F@FaW zqdmsdPtKbV`4!#pF|By&7C4}UE%@Zf_ZN;r!yg@n=r8TkWxh`3Y5*~2CZmR2Az#&G zFxgq~SPX&^uNJHI=25m*@mn){oLB3Yb z?)Wvv zSv_-4yT2@gyThRogt0Gp&DKa+yzW7^iMG|X?O`_177Ix#qP`ErT}`P9CU)jd-|&lU|0okn4eNMmFuz}S-8Ndu z&XC`V7Ji4I*`Sxp(-~_j0Q~{ixsQfZd z(La*DkQ7_pPqQOcATUzL3oB>b`m#}eY|l=YC_QOn=l+J=#`)Lty({ZEW>r z`}=PA>_TnUCa`P9_-U9h!Yhc{?`S(U?>}bB*=5RjYxVAZW0BkJY4;jnE5p%li!Bj7 z;^an~fiLk3kM=J35HTk7{*+=(sXu2dfkDt(%vQlS`<(I(! z?@);87K`qdxL3}h^llC~PsW*cvI`y``kTNCqkiZCM4A-s6aBGJ=pNW`$e zzR1KD_B=W9`q7~$$v<5PdaPB7Ie(t-#V253- zhta6TQ#)Q4{u?H>Ah0m!$bVgjXZ7D%Dl%wz{^ z)BhXwcNl|Le8rl19Z4yIUuP93hnTeg-nq=C^kclU2MmEE`S4&Pe9ZX@6;0J}!GM1+1G=OQ%N z52}wcG7`U~D$cZVLOn>8x5>0S`f_t7AqS9uwG>bi!BHi-)bB>KBe$;WY#F4!Cc6G9 z^@aS#kz>nDsSG$#$UC+1Qp`^C+ znWOnHfKx4Xm-#cj(U(uRYpu^FeiqTymLxXXg_bp}-*GFbJRi?-o$5FxD??)QQ5*QH zgp4j_&h1!O+u@?ZU;6UOLV6<@9L#CN@4JhB#-cIx4c;Vx0b*t!Ht6I`B>j4#R3@Pq z)L}tINVoc<=5G=Y=I!V+Z!;eW24^TgIWUV~axY<@BJhgtwlO;rYg zkA~YKyAE7pYve!QqM&iS+dVo8-LUpZK(snpLeXrL8bjQ8fS(xOn%e8!^4)CC?5dLO z*I5zeLPP+U&BDsd#WxraX=7 zL8zP8sD2%}uZW3#qKV_@(+o2E6-GorZa?{ggVhnhFu*doxc&NLvxI0PO|;ZIhG7)P ze!9iI348&qOyKqQG_@UDm`xalzI;JN?idGY{8|KhTOiI)Iqr(iSU6m;N;PgQd;Q_S z_`yUF(QTEb=G_FTAv1KTP`<4=ish$ny3G7)qGDC!p=8b2$K@dlDjcd@Qf?5!KLXGi;Mne9xUtF&BpK5|3! zT5C7c7(=}deJcz&$*ORe&(y7m%|f1Mi7Ur(c=h>sr(_UbC2nW)w=jZvDs$XSlzd<( zOsD|=$Z{p%a`T4)<09^QoQo&2ev8AEo*FSt4P3+zg}mHm-`67QMv;KYF_Ysw{v62d zZ ziC05z&J0+nIlzbK^z|#) z%PD@2Hg{#7Hs$v=Y7Y7J10SYNU;Z>0-^zws%2Efo$pt~g4QC1C)-Go@P~=(6184cA z>>9tEc43g1W=`o*)~!1>(K+9s+%_0aT6Au&>AZL4M0a2FZ_D{pq8u>{4K4Faio-kW zfufN8%*%AR_J{}fe`MzT94&xVpXgV^XAIIIgpu zB%HNrpU=EN?X{U}bpz-_gXJg-*ye%!Jm&Lun2vh+6b*#$d`&}KhNT#e-LC>r&$u2z z#3t`98yPy@8E$D|(rm35ZT8cw)sXw{a@?{d5ut(}?rGPN-<~;sSo(||1)his$z{z5 z6Z(=Y78uL{ky_wW^}#1(FIi<3+O9bxtwTrt(KFtHBJiHp;;E1$c_9Zvdqv0TE0Z!Y zxtx(6REP&(qQZOc=I(FuQQ2TeOXX?n(<9&D9T`+Rxg73AzS97HRc=dug2Q?8nBh8 z5HFAs4V`%36(5t7=rACl#X*(@XoSqB)(*Zy5~+N7cQLGj7oS)-EIpQrVon;jej>Ir8Dq+TJOXt{{hc&%J#JMIUrC7DD6+ua`gT!-BK8UY*gdV4{OFe- zMTC&3v7Gj*E5%CQP|H4|hpw;BH#~QNbq-LK2qhpJJ$EfQKsAq~UAU|5xrc1 z=)!EXRDpuZPD-TB76WfJV2Z{wr~g;>a4z8gIfMYe$J<~V=+*^{h@MlN{w2;``Q{z` zZ| zWW9@7RiKREeH5PEru!E^hljyoD9(=lQMsBsU5{-1zYVU&Vkd z@r{azHJEsq$i_{|QTkQAfS<&LvY@N2lME()YTnmYqhK&Af+2%tu3Y>lgd*9@&)|0}fnqnn zqSIha2_Y#XBf>sd4YR?lXxoZ&12wr-X)tFBn}LJwD*2tYOjZh@0~$K*o#B8czH$1W z2mBzEj)n?|lQ7~fhW-WJ0Z8i9}!4&O-yF+j4> zHu;*s#8QGtjpLz)b=#=!^c<4?*pqbjv^Sn;6)sxn-upzcCxRcIkE_PHvNj&_g}ut@)7{N*A)#~^nsaE!Y-O(4 z;Cu?nkdXu>w7#%P_3A#<^ez?zlx^)LosM>DtGXT^dgF&{2#gQRzHZf6P!Qkm#9zPs zy_Mgp5T;_~glSVM+#nn9_m9-Ja+^kKkJ4&4P<3kGgoKVrk5kmJW>i+`RhjR?;T!eyRr7eLc4JNe`B~@&{WiXFkh2cw+RhHyT91H&A&m9WQb;hU{Q4y> zZWWv_g!jG-UFb-{1~kWX;KlNeMsFlb;{;#={PiYsGV|k! zdp8M6s3{LQ4gzdk3(Ya`f>`%LG6fZ)Y-i2B+$nt@STjoUD?I3YohOOT<&PWZZ{y9lGku0G>{EIwacX{KR{tAnoTP5?)2TBJDko02_TplQ7E zk#h*p4uw<qJsukp8Y-(FO#2?3$+GR0vYG+qP$Nweaz>bfsvOR+-{ zD2QyV-vESHy2`#uua8MRi8Y7Nu4y_$j=mEWr+=n1LWk(&R99gUc48TT&x{|i=HW3c zc?|rfb0C~?c1Zm(cZ?8Z6mTJ~n(%M&eKQq9Utu(w%n%sb`Yaa2y0+eDMAhExnZ0Ha{Ok#A?Q)4 zx<}K2+`x>RuUaq-ST!@(vE4DTw!=^F)EkcEUkr$4j^nBEG~Q^6B*)xB_ZsVtIZ)OR zSrYHOs|j&3E;4CABZiGLX>PM_Nk$*HCZBor^?DIBMt}*{d;!pr%{;Yd@y^c}d>L`0 z@+L==^&_5BUYfmhb^j6;S-Yi-ng#ejhNnVP9lsOZ`L-gPEL5pAwH6fQh9{XY&T)4g z4s+#tbSvOh>`(mxBTI*+hOAl~x969ZEg0EX7Pqfojkmc>AB;+xegR5iA`H6@1dKK2 zoDNY^F#e^X+d5v$!w2Zl9i$#eH_Jgs0K0&Iwl-Wo)2Wa^&$IPaJ(XD7ispaWfs&yCmPcqaU`+O zsJe-ysoHuONg);qv8`odtm%GyENiwY(*V>_vaPuN25Ytk%Rl}p-T^7g`E}#fm`6T) z4u-KVbidVAHT}i_0Us+Gw}XgY;)EW>ad10_1Mfvo=@rV_IIqP{icEp!6Qvpe&jYuM zNHx4nsl-{23afIat3&W74=V?rhK@xrW|NVWt9zZT?f|r$J)A`0qK(tq5#cx(f0v#9 zz^bQsp4#>$T5Me?n~uI6yC9}rRHQ@X0E%AYD32Rk^>{5T#W9}n^H zU0cCl;%^#j=H*f#Xg@_qlj6}p-qMhDERX4)$$sq^PIj(pG9NbY*2f`m!H=%tY^zW_ z^|geuI%an^8RCLfW(U3jtLUa>S!h8EHE}_ByDK%jSHYo;8E;f3x>7W$6I8Bs1f|;k z)?g7i#ieyM#?JX6DjxJjp%Xc*U!tI4d^P80q`YRb(U47~)H1r3?hpZ}h(j-y0^#72r=rX8?A21kN~f|?6ipVasoOtJHCh=!jsfBbC$bdn zzpB6tgtO}Uz4!2*!y!AcB&uXI>S-?27>o^XX!PH2D%7Qp7n0u^Hq);eFdO|zUG=&A zI&qa0Y>|nP@ClITMAwoZ58EAd{7;NHML)vb*H+-zrz;P(+z;Qtq=9h0@G|Qem2+92 zIZWX}CRcAcP~J8u8mjdQ9De|Gj@-i^|1F=)yR(0&7IuFHY(q7q=A6atj~jxn{@#|1 zTW6e4f-H@Iy0IyY+xV~Q7E!|v1oX|BZyiFNE zqjB39&Brxpct0+M09~7Xefb5nK`}zHX?ofKNe9;3IHn+v#)t*Pwf`J#&JxvXQ2W`+ z+y%Xcl)Rqq|$>>cO*a{^Z**>MGZm`EVO}n36@D*X;Qt$q%QMeDPyLE>& zr769dL(_a9GHWy0bcf*(04y?3LUC;yuZPKsIh?GrxYX#rP#nh`|LywPZO`f~PSEN`%| zBpM_|9YwBh8kfhovGjyhq}8yZ>C_mN)zMsG5PZMtecg-s}kw>{HCmjGF2Peh!J!jH;xFp!U{C387?v{5h6x%qAm za$x@(H~0{XtkChh8Trvf`z1d2PNL)<8DsER6s3cd5ZV5w$QyrH4G&4Mcl<|(o-2k9 zNxO=cb_{MY>-Wh#?CUXGrQBnVTrtlU9w{~JOh^l-dT7I-uZY@K(mGnT8>v!P1&O-T zT3PK@`viJ1(%a0{r-$AO<37<2cS-;c{zwkcc zV&?|-K52|q_e(LF2DJ4*;O|5peaFfD5pq@J=ODJ-L5EPs9HcZ<6}wu^p3cp@KoLh! z5+*V8505%%0f#+NXWp4B+5hW?dAMB8QqZ$eQMVZ`996=`bg)Dsw4giiO=kOXMd~QK zuMxEZt#kcY%Eh_KSXlMG*nz%LTj8k{9tS<1?w)fY{=$_x$yaI1r z4(TY;Gt~B(`1e4sbmdu!5gX^V-&x()esmlFLzA>=KDmqGuXU@N|HhWm#w$euF^wHJxvx>P>79v!SE%GN1@qB@XA5GgNb{0DRhTqDND_bYtRGkeeM(D;_k0t z^X$Z}SNPN9N%!=EJ!_0e`*_k~nC>3&R_x0+BD&i_e5GY?T+J1N2+F%MT~JX2JFZi} z%(VL^$Nl9N#jf4aQtkq^njnP zA?IPE#$Um5UU4c!yW%Uq`Diyk#CeS${TpcfkjVb{qxH{2EsZB8qp~YepL4zmw8X97 z1g2zKXV~4t7#B(OEeSUscL{75ad>jb&jEm;otx2y$Bqk?#hHLH^bY-)xvhBGi>6DC zsX2gqr*WeK$P@u%a$zRNxD{a7sJemAI|s}h4JT$LaoJXjGVIG>;lPh1p1{^3H~iv% zS5|esG&U;}`S$eYwZZC^G3_q^joyCGT$S-AKd8P|@Q<(Kmv9D{rTO~P+8P%wZ-`w)3htl5{ShZ7 z3&Jg>8Z}?vXS7ctgVoS4Aqy;9GQSJ~-yxRXe;mVCnvPd)NG(4CtY7x!E_B4|_czf_fDIyfl9vsW%d3tBEacOIjQ z3Cef|3HYgI(DMo_ZO zzctwG!tAa?m5cJk4~9nn%%;i7oY{fQni4rQ)5+7R+_WEqd`KgR4bR6dKFEO}UVuIz zrSmUPErnm=6K#l2ZhViUti6B;Hd!sJ6%vaBq6q`CbyOLJAARGn~tcI=skwvX}o zXmP)K5glRA0@)aM1D49gwu|`5*E+uM>Q9uiOqn*l(;)2hFVNd{{5A|d_z@#y39$w^ z&#kqKdTgZy}d)*9YlQYV1^;kxkoTNEhXGc`Tta8P<^JnR9(RH3DKvLNPO z*6m=Kv=4%M3%~D+Zw}xE)@c7cQRr25!mZ%>Cz^B58OD&Cu-Sb8Ea~H%gXaIT$L9fI z@MsA^;G8qG&Qmmfs`*1a1Q8>>lWPsl8+1`JxaZNFbe(nj1@VWM*N`wdXa4lq&OkWr z+UOYA6&kE?t%%E#b(=P%Wy5eZN0x(f-yA z2D1|T>#5;S*p~{N`?%1UTJ$e&8+vz8ydu>C54}2J z=O&I_Y?quuh88kJ#cD`MG$BV*r8r>uzyDgUI&<&bB~~>T4ovAj#{Px#Fu&%GRb774=_fivlKMk8;}iHx_|tT{{+5B7XxL17oV`RiDlw|#!ycKpMg z<}d$a8dC7DbuceKTKv%^EvDL&k$bOY1CzBI6XwK&r{&&PwAdUn&?)Y+n14tXp-r=Z zHTgV1##`s6HL=a?k4Td3S5qW(`Y9ME)Q0t^t^p0kl1cLqzB7IM3V{ zqGu1Mq6XklW=nQh931xmLB@#{b5YeWmYUj&r80t*BL0Vop306=T_IFQUip;%k&Nd4 z1JI*d2AZ2H$pam7M|R;+7W!~i%a?t$;UCA;uCYJl<(AP*-o z;kW^b1@QtREY}TqjJYIHVHy&j&my(dL~BixIZ#Lghe{1o@39ksboal#23J$Mj1Gf; zkv!^v+;gf>4G6nk->xjWaMT8)iN1%^MM@0NffL)D+I|CG62@MI76kTrZg@@KcrJik zXAB_9(24KaW7Wj`Q*7=e^dwlWw;a~kcW#3Iwj2AJLzTr`{4XEeQ-wUk*U$3tZ3>Z< zz+?CyJ9Vp7=f-yjBx<=T)zhXy_;Y}r-OQWqf*6Z>we9V?HUcuhnSjFSvEDgAnq9;R zL3uxC{r20JmA%-RKYG?RCNKaR6FI}&ZS;bWVUc=&)tUe1awV1&r_!?5velo4;r>th zA8fAj%SSGFM=?U%Z#@9A8ELl?j_E6S-}2~p>KV2h)a0`?ynB$>iNQT^Cz2pqKH!R= zyTj?#lPAzub`STyw?7^7MzZhk<@_&JuY@p`>4=8MttS9Cckv-xNaD;}7J%mWhDpF^ zsHi`i0EpnuQIEg{p`D5q&X`1ZQcp3kE}lkui-h+9qVC=z=8N~W4OSHC!&fJl00XL2 z{tlonIRmz(*Ig{-Obqk)@W~B(}z`5KtBw5#KG1<`Q!64nE5l=IBx6$#R2i7Fr_Q2u&` z1I#k*T@w_DsrNVKRQwqLpj~<@u=qK~=DhvCDC0C%te(K{;u%w40UCbom&z6&dfElc zfglMsEVl17oHX~3%{A?Q>GPh_%19(|_!|STQo6C?2Pj$e?bEpbi$xrO#^TlY?}9D? zM5OYZnZM?MKO+13y9zGgTjxSPi$y#~Rg3`0Qonnf>A69GnR{$I2L~dUgc(+k5J3!) zb-GoG^YA!#+8ov#fbZ7~|x0m#3lU;mE(5ETpceC~*$eX>@r zOEN$5AO!%mOJe?(g55iqj01$&ixZ{~)5iB1Esek<*rb68I8$gK|p&c3@<-LI%A z*cw(7Vr13;@~+b(MN`)QiB120!JHXbQOoEPTUv;bIN(}T=sWw4_&POwZoDK|HnHKpb};Nx3>hr|JRQwr{DjgBz?FJ^Z$C~{~Iy=|Knc)*pk3&VZgHO@A~gY z$oVLgAbu-~>}FO50@g?1s?Q3>(u@E9&%u@@ zypJ%Q1}7r1(a{Y6Vy$`6uZ!Hq4fv=A!&5q@L5~>t|K&!{DRCes9=GlO7~Th8qmZ9O z;U|j#JjJ7AVx(8X`sF38%B~fSm%ebnl%;F=gzd7!o_g*;3rk%Q9D!z#EXv(+7 zA@PaBQnn`Nm!D&#Jpf2rQ=NMf?g9+umF59uzRCA8X@HmD6eaM@`nDVX1H2qE|3?cq>-@if z#$q{3{{O|pSb5{eJ(vYrV=snCH2#`?{`Q0p;lMDqtfTbstGp0PA(g zhk9ybGblQ}2iOnJ`U30Hwr-A7Pmowv2|So<*yn*+^vod?-NLACxpb3s`lj(Ozq6wW z`({cVSKyug;|NIZ7kH``GWA)@gf*`h4P8h^+7P#8s4YO&`4WIBK5l^Bq;l_96`{o9 z5oy-uJ^y;r|3|{a3k?W9X4XG}uj!eG3lXjr-U<(4!dl#KvDRm-%vA6rKczt# z7VmVvdH?x?eIB;1ZhQYHABhyd`)w#vqEe<7UD!uK{w5FsHa=E=h0!n$gdzxY5GhXVqeZCd0Ku{=I~q%Nv&M~z zEr1F8m*qCFoTD6>8YIp#&04z3RX*k%NR z*IqMuVCrDuQtFJn=KWCSj36fP`i>`Olg=4*VyF zBrbX_6WBRMXLxB6nEgBOqPTJjx!E}luyP!OtSZN?j{9m`O>F8N|NJ?N zJ??r;hu$3+4$J)jZXF6Z0)4@k@z=i$1hRnEJ>a71!kbv%@a&cmUM!g+M&~( z3q*)}z+d*~{?SIT%nF$7qpb+(DA?2T%SOz?7TC^!{el6VI=VL+I60dNrUC;y>|f5D z!2dSh3rxjh)e|T7<>3CQ1}2-$PKMOm;Mb@SeJ7wYDN=V3UJX3#8!-0JWe#;aRcXPb zr{GLdTblqA#Wm<`Y#RC=%#IV@`!WgaHZ`I7H%MLo#0MPhN@bqP?3N<{(z!W!v>VVV zT>G9;1Cl%Cx7zAYK>n*MWN6>!3aU)^bQ-Q_`{aMTkl zJp_69_+S3$ORHFHA6P7*8MPdZ{Nt(EH z3ejVN(N~FhfYowv@O`AvRmkPE8U#3YLZ+*aTQ zc`0oHo=3oae#y#)W}tT$in_~OH`6KyX027A44v2m4^h_vCFBN?@(95Hd-=Pb=j7VW zwJ=9=MTjf-3Sj$B7eyc&*JPjF-(kBo!2Vb?B8o(5_=r7{+qAOa*j^be6yL47^I(Sj zFsugzziKQ*A7dTrPG+1ua=v!SoE^_P*jx3(g&$v$UB4dc>(cv}nliGt)fiY{1k1iJ zA=0~QSK2Gz3@6B4kO0l!s|*Y`@aKl2vU$+^eEuaR{NPXLDL3Pt!pjuX^{jK^>8e(_ zestWBU)=B1eNR^hOr{Bqq4iH@%1kS~w2iZ{0^A*C%uiRYO!Yio>~2}%#94FEd%gTm znyml2{*e+Fd}BM={ojhbq}`&7I^Kf4GY8F!uV9(T0tXI?5q^*G7+*Hl3B;rIdiSu) z3LoWLf<-ReoA8iBS$Wr$=p%aZ<)g0)t>7m;uuI_{atJH$zD;z9xgW5Rbc-8WtLYsb z0bZ>s9k4N8uwS$C6{|Q>ow7)~B~wXCy8&CbJm&Sqhna71nC|Sny+}f{rEn>@7eml0 z(SZUv0_MrK-<9HCb^1}`G=E)OHxNiBQ| z$n+SRJ?W7n<=)ys$My;~2A7&lb;8=>i=)>e*OPYl8}dahodAokCSt6{%YS_AJvt1e zZLurWSSvoNnk+k?sp}>o2qh`o(U%sI8w5$_5IS3WoV^;JUj3b6rzEHFfe?e07?T}~ zn`&zX-9bu$!<0Yp$?}IHKC;}n@D2COpD9*#2Cnb>fC8T2aXdxp7jl_)i!m_H6lJ+JoZMaSZUBF=yY!IkoV zn@a!prjd{QYNCQ<9W3zrTXO1+&Bfvi1mg)5oD6GzCWrqj#;}K z3mxD=cX{^_>@wKY4T6vlG9GKh*RGS^wevzZ;`*QtWM)+)lb#`FLj`e#6?OI6RP)tt zt|Vv2J%nRWFptvuMk6V&{fWHyl#+a!ORDEme(HqPa3rCF3cRL&8;V}~3QUHB_Hx1D zD)M#%Aprkayg1l{H}C-G^Xh1|e11s1d?Og65D~N`{~_puB(_(2^o}GM!7Va~*Ta)iL_S?Jt8U1mPP-GdP!6|j zv|;t3R0}3!sM;v{9k%4z)aa72tpchBv3JEFHt6;h2ZQvC=+oOE*pxU@wrl6Z{E|kDL?-$Z>?g)cB;u4!BC2M$N3} z+Q~<_#s@Cmo1R6M%mdeAZex#tES`@cAG*{T46-{^iz)?3BE!Ts^|ni^JZ}q-sX0Fc zy`mbS)Z2*xCq-zdZvdYPl3-9ymOVR!QqS=bjPO@Cr5a$RZa>Kz_~s^iDeutZH9}v# zLNVX3#p%WF1|Q#1TL5cv-WiFsyG*D}^7OrJReY0DT_ZDVnu}2H*~Sa5K#Y_x&a?;C zT?HaH@}go}isOc#hPG2mQA^Pj{CO3oxZC|7_o5&Sp8XPA!>EJS)~~{kGWuLgHAK7& zwGr=SBr!p8xN&4DN?qaODewv(j7Etxu}{>?wJ{qju$;;5t9pb-Z@`mdzxi(Ybuh|s z6r(p@2Ik%M@B=X@aqFWM^%UvZt0M4f)YqtV)Sf(%k>1>N5e19i<|d|c5z`7y~3FsXx}t{H5rOHN`#p~E`i$9hf4^N)E0Ht~frLt+vRN)NUs ziK%9sFqD#JiO=e=3q@Q3BDpIe3VyxD1U3ARF-IE;Mv-t+?@`MAW(y4(;k|MZLztZW zA&xvtiQc(|iIoQ$pLA$e7Ymki{Ce7%8Pevm{hS9S;f-yX-AixcHbe5Z22XyZD@d;P zB7N5)nRsFkfq(nZ6n!t_qJFK-5gOK=K(n{0lCx#ka0GQQ0_mI#Gr#1PFfhY}-(DBD z1n5zty$C`7_yL%)4&IZ786R@j)P2ke!|!lzlzf0uM{+JaT|BLo)Zy4*E&%f_S1Ygc zD#Hdc1&>g(EW?iE&jjyPNs(I(U=qH6;vNGI40bK^bW+c#b+*|?Wwm33am16E>fmH% z*-cIH(zVn4*#j>p|6yg+u2p5H@E=zcGjjLM6?vw`eg5v07He078B5B(@5~9t@q%2) zzgI;kLt8#GU7+K*hd4Kd_J~lYlxL0Mk(XvFspLxT4FnQtQs(omGGpI@sF8PPYWC!B ze*dtOO#EC);wOkG47`EB@e9pP7WGdV3)cf97b(!;o5q+!!M&|sR_6p1v8GxPlUZYQ zniAwQHMb%>)L#EQ6E!R57C+hUZx5$K?1q=tn7!$JL)PqSCVQX6A{a;sk$ciDpH#83bj zJ;YF989XQ26o@}G18`o~S@ZF^pS@p%KW$;z+BXgf8l~z<ndTvTxq0dzpk#%Kt0?;<}iRxh-k1lB^%FsU*hypNG-tKU_zT~2Mmjr*{PGi za5Q|AZz}>a#0DLoh~D?vULdefA&Va3hbNi}yzEEcprLKPN*bn7yY4+NK`-k(s-p1l zRKbY(;j|4el4f5ej^DWD=+W6OM&yIryWaEI{DK?uFEr9mKTkLGN5Q+dFni(nJ#bFS z1XFU1J$pajnl=qtiDaa2oP0RLtP{;j^E4>VJY+ud4VM$xPyA`j4x1R&G)+dpY0jRc z9^t66Jj^TZn3`Ee%D>+DR>WjXvJR4$Ug_lmPGiyJ5TlKr^dHAY;`ST+xJT{tr&c}F zl`=8kz1fT_8)_f=^`b{6&QAB*1$V-Qo3=1pOFHpu{7LbLz1W-g9zXX_qM0^Tq+(uO z6yCNMT{2hxf>KQ%OAih!Y& zVb$5`(TDY(!N`9N^L~;cO!p06qpsXT(q#1v$ zVze+@tnlyk_vINk!;^r9iEt_Fsir077e7n5(SF24Rk8Q7!Nk(l2KDe5owi>WnZ>RF z0>ceEb!S1U@M9g%GUZ)pr@F_nVNhEHLXugTYjVh-NDA(P? zsUS<4(q?F{fAGu=C)70F({S3^0t*5fNsJ=n{Wjf9>j4avQNq}fI0CMZ-nOyXl#c5b z^ZdlK%ttfbj0%3uFl)Sg=N^2yPDM^F-T$ zd7y}FJiNPT4~EjDuMtNVNhQA#1%D_nQzXj##rBxS|7v7Ly#dLZsjo(zYD8}H+xzm7 zGN^MH?En&7o=4mwb zevCN-gLf2R!Vc=M(=vKQkNtDoXF%Ap`G}ciCrirF2*CKP45hH~ry6@4jZBN~HR#ZH z(M>XTD-r&tny<}5g2}DHqcRp<16p%+Is>y=yTdOLOlhSQqc15AQVOpu36Ow287MVh z^qydsvyvCh^H1~B`|ISy0ThR6x~*rCVHkZLBKFX*9vFC0$gBPTc`!&}!A4~mY$`*6 z&j)1$3Ie2}wf+_*;N6EAF0%$f+|8b}pMK7&vj;ME>)8%`S4z%=h8R}qjJz+hw|;49T>ozU0m1U&%nOJ5Gb|eks-~AhDe4zu;}JTe#lmvJb4mQ zqLS8+aaKHh^~FeAX%FloIXvnqtTb-{Qtk+K;GQ~-LEvzT4L$e{e|_}M5y4DYJciMv z4A%qK+#F)Gn^dhJT*PJY=e9G=+&>BDe<)o_CiZIwdB5v(-0`)$DbEsq7IYbKm|p)2 z9r<7GkpI3qD@B6)g!SAYfhpLmxC+K$XNbYw>7EQKN7cPkkbr1t&xv5bL#~sXMz3*N zjLiCOni|3~&{WOi3t50ODZN*eYcM_6L-iHJ6j)Z#xHc)`T!!>sxm{|luVH37lSBFJ z2q+FXl`>($sQtwxeh*+~ntHWhAz$v@)p^@v{l%k`U$MiWg&Ch3B*;oG3t?W$rXQ@F zqv_U~rI-6w?7LG)e{7olc@ad;nH{8UR045b(xf3q@$nWI=E@_SI)0o&>y?54*D>6n z+S2s{%3B;1`v3*vXlx?Ng?4$HK3u2Z|MhA~!y`+NuUQuJRe@d62mBwwrpCtnIQoE@ zo1wke+ysoZK!N+<&M%EQw+{RTcpZPSKSfbbUsHpPg$7Huoq;gLgbJq_H8$E$ z7lP5Z=TqH{D|=gf6wR6*jP;PM1L*M{ZVmj`bYNNp+tw8I2{Z+G&qs&+ye4o_a%PH# zY)OsBwCh{Xyh<8km63cYQQw1;yTO&?(aI)D@If2sYfJnYne9Xaf~i#lW{kGTt1*dM ze8sO5X_W7JBTUI)c(=2S6(RNP$S=)N8Waq@m#8IgK~O}ER&Fq+vPug)sl2a|OL3do zhm3frpvI`7J~b=C`ao)3lwAxpCJhm&{s~1&jOR0pcjrHNPyT7!q~WJ^Nbr{rC{1}5 z5Q*tdn_F;vwcTX4To|kBp*FZUv)QG~Y)Q?mjr6~5?X{WWn8E)OS%_rht=-Rj`E|&r zh$(H3x;R|bnhXKg939c1_OgDZJ!4+vov=HqMLJ;l2jHshwVK+mZpi)8O0|*V8wg@q zXJf!`U%5!fgcZg+g}rG1Nf>JU-caOk-8Qs;-8K=nho&kUV7_+`ipkn zyQ&WAY7k3WPzWRFk5HdPIOKQ@53h<(m)n6l$3Y^s-4nILAHf;xjQgFfS_Rg<;(a7i zme!G`FSbF&akEGd@B(ewmE`rf*zgHXdFD~FNca?56fFx!-dK59%o8w^`h9QH_Oq1Y ztmh9xCx&eYr0N;qda?eVRMgM^(X*V24tjHAS38y6cn0iT&!4b+7Ka;p;VHwQXZkn4DXach+BulMB6pX7F z8@9*y zyhCg(jue19fB7e}@rCqbq&h%ul!t9L2xq@vYPg?9XJFj?Ey78&?N)ytt5NlTN0|RF zkhk$IR1jawGA4&y;X@IZh_#}Nc1PKg1AtLHfti_#0*xfKX?EjFP7Li6$s+KFT#L*Y zzN?MruHvYKA6Z`1>;p9aDW^+~oF{ju#O|od%%_zGm6pJqHaG)HZdi%~6iOT(Nt>Z4 zu3D6l(E`y-XnKD!dMtj&ATPN%4NKZw;gzPLj4Vrl?1RWzSo%FroPYhOHt*JO8yQ_?EAY)hd1mV%(Oy6F=`RJtID46(Z*^z zzZ0T^C7OII`K4Raf|;`9PYrQh0#pB5z!XN2lDtmu&aM#nc7}mNj9?*R*n)`-$+JIq0P~TS_d% zjaQ^!hLo?C{YgMIPX~7M0D>b4iy1=9GzGt@vI%;Vq9(QuO<(hnXaUF8dZPeGF!Sv- zJ%e7S1Ty2sdk8KFLuy!PUYEr`o$F(e^WAYE+FbIq4x@4nVK58HYfr4gyycLmp?={S z^*%3M2Y{6$3NVSq{e1HmLA?B3pPg;9nx<|ouh&H6vOcwnD+DZUY0hJS)hb>rwl;P}p2v%Y{W3w*zD%I_|&h~2gMD4w_8OG}A92S;MG6pv~ zVKp_-Y4O1e>wcdbHXd<3rSlJQkH0xPUDV%fJ_Hyo6wR{1IAdUV@sacSmAFtrO+j|- z)Rjob?^9p#30|WTX^y#qnb=ap#5!gug^Wjo2$!ehpdl027K{$z%3?y_U>Fk`RC3KSP)Xmx+}|b|A@pWE&Rt;q*0t zI-#6qhUHZvl{80lL(sZ7iWYsC@*-zz)vdS&5hFriNAny#{mg+*NQyj=x*?3G$_gn? zx;zb%?nA#vH68(|Q8VEH;+QC!MC$P4T(2SLAFP5Kk__i+Vf8+{Gka8<(|Dh5jAcEH zw^r19d`BX$GsKCTAca4Z_Zn|T-fwlrzlIf+rB6jj8_sl>od-rwr5Qv4o-H2X6EB_@ zVIJt8T1tPd-C01ish2Y@-PBADz|YY^HOh6??3$ zPx1P%|4!>nBymBr{?mlk{U^64Bgb8^>>VCiWFnv#S5ZFKtp{s3+t40724aki1C8!d zZkp%o@}+{pD!`KvW8lLNFvc(1S4Sz0sfH`N6%($FrL z7R06L_8dd251GG0pjThh)tOgKYezv!X(Jg&%0s^ z!9hYRHf-n;?p14N!Dof7Avaau!+z>hz27PL*aU#_j|u7iu$#mS4jFsd&4>HEFQKHB zMxMBWUb|ob0}r;olm4gYqDx!QTCle)`i=>ei8nrF=2r9L59DHzB$8lArS_DS(OVAR zyIvdJi%qPgVP3$2mH8`HX((}YKKy3t-ksGVkl3EFk{(ZJ=Kp_?a-eO(e|-rXG2kPK zSQm^Eies-fAz9H9YRVJ;`0Gz%7qHb|0V!_w{UsC-z}Mh~)NE7CMn?pm{?h+07ygU& zgW>csdDG=c&=;i|04NoaWMba+ukZbT{uJjxi}~YC58;K=1xz2zF!qwj$cu|mu!L}^ zxS&&qu9;Oq0}cNr1c^U@u{%@Wj9P~uY+{&6n<$=}BNW)D!7Ae!e7PZj)tERqT;9pH zWLMhutJU^?Jxnmo10VICT8jDa&_HgI0XB>J=N$6MXq*?dfo%yo1p@FL0bS-LZoiSt z>1H@9;=Bm19-^Px0$TW5AjMNFL#+srdEF;~M*53Af_n&i*F@syjlXkKBHns5{SfZC zaY0S`AX@@(Mqznn&?_G|C87no0ix zl|vvoO>;@}=+RXXQS1*z%2lLdV!@B0g67t_E z7>+Fe0dx19103dCZ(pkapU?FFeY5`hN}6-f>vgV&TlsH1i2wF_{_~&n=ePpim1O6o zz9(jX19<-*Mo_+U;KFk__sJmPzpoGehcEoU8f;Ax@T(1H@ud46TJJ$TmY_rsQ#uZ& zoAI47hGS}fKw_zGMuay1WN6UNIn}*3}+x@e@B8DljKgn|Sxu z02tmr$vW3*+a!yF@%&PJ*$GgQ)Qj=r5&#W%C_T7q+>l@`zub?|i~|8~D)<>Zg3)FLN8!yWnS2fa!mgmIzYsVE z!DTbGTdC`=HGcCF6N^QtP2ecqxH$+1j= z9SAa>7_6)WRe7934TP%dftb}XElpKz`3XR89)q|a#|$tSa19K1`Rnqj*~1z)C z)xTdRe-+Z51N(tJz|3ugqUc>fB(Iu*vilQ7D?pBX^A{fn$_;e~@$%c;8uVhlA|)N? zY7X^Z_G!9qgOI7uac*2>wNu0ni9MTlf`8LMk|#*|b3el)()r~OR-tki5xIwFp!N!1 zjFdjKGIgYun+3}U7bu;4MZ5bISlFaVfYN*vVTqx0LH+@~e^@D?EWRlp&`{n{DQ|7( z&_;*5CtJUB8x%!~0oW|KdNDV73=j)M_J9tWl97ZPY;^2r<8|IZt;JyW_r{Df=I*Ib zu?u!&7o&|I`5gih)&j&RdUpoM;u!AQ{b~Dn?dOXUb<6QQ>ujF3(H^}(&9J->Dc=}$ z&V~Q*aw)JKuDD1d`l5n{&eWc*syLiaOYe*M!lP;_FtD}#ynMr=HdX-H($;gQJmMVU zOau!zkLIz7sTpE5=tjxD({_6n!!QSe#T#p6wTV^Z#O+>MC8oWQ7lsNm2xh)nc?vWV zoI}n*mxrPQ6T4NnJrib9t0FLl?%$WXJA446@%M$L$5eXRo54+Bja*R;-s>KPE_&hzLR$PLzC!>2XMYul zGw?*5^?j(-LI7M;4wZ%{IN2<(JF#mm7m-|gF%S5oJP^kw!d=@Bx@OHac(d0Fd#VCa1`nPk`v)~a7_3Dvq~C%Ha6JlbOZ zL=~FXxQ>!Us?vJQJaO{^>ktP2aEoEK<4aAX-|9ioD8Ga2v*Uy2Equ4*FEQj^#fXi{ z9s-|3*CWodhb1c}7hpV|yb>mOBCpcDf0`90uns`RudhbfcoiO_Q$dA!Y)~v9Lq>scs6M#o!c<0GwNB7h(hkFgjQalRrJ%NPb`r@!upG zf>F%ns1MHcUpG%~qp7Cqx(sc;x6ZdwijtfMpV!*=ngh}SG*fWvM~l?jp-0kcPqnI* z0om`&Yh~5J&+9khG1hT4kI^;nr5ZudFEDAV4^4rvQfYhJQlT=fL0OI#zT&K>H`j2_ zGnnHXQ-$w8YK{{+x$Chn%--r zWOim5T;vO{uY!7hSqVQ0b{X7GP+&3zX&heb@ICFcTYzW*0)BF+@2<2OdYeyC4h3D# zE6Wi5Du}guSN7Ma7vo!f_^QZjeJqd^yaov;P_`xlYBAqi9Ez|MxC+~dZ}~1@{-R125tpf4w+tx5sboPBmd^vPf=?HE~#Itg7^IQ419_&~${oD9B#=%SX{tDBpn>khwKpnneY# zJkFEj1+(#~Kz<2re8c^hwQFw0_m;?Ut&qTCOxMB2ww1r&R)X6=Xp_$vr)%JiF6;Zu zS{+=^6@HWPa=~FP<7|g6Eg1IW-MpVST?9CaYj=VB1Zo5@lAlPut(Z#b_R58Ar7dMX z+b~V&ij2(T!^ZL}VNF?RG&)+uJCgSyoR7wh-EV${l*G~>$?mUPhg?GVfdRW2%lO0&#eKmjid1MU~g@3P_FnyC!p zD2zv){;*KI0DmN32GFC3l!dOE-?1R-RIe?TdFT@ne&k$ib78pfsFm~E@rN8=Ys$}= z1+k2Rz>=uJXP0QZol4dcvFN!#nKz|9CTDUNHaZU8WYa(v728_Vm3}w zyzc~1JbO|-BJT#=G8jRb6 zyw2ZRxvTIj!5n734c9O*SJtK%e4b#cr;w^RW-h8y_6ivPeSZN0s8WT-ANj4RnC|=W z%k;RgtXZD6`T(Vd)<_;m~)HL2KTuzlskr#V$Af3E?Q;NWo z2|~@y4IFBAM!xl#bli_@woLNhwHjI|MUr}TpKx124n!=TIkzlO=iw6{yPfg*ta&&;jrab~azAz-zwEWa` zI$dkghN63LVMdf)4h0rJMJvl-e(GE}C;Z~+*BUjI%cVbQMfk_&<^_y#X0iM=#B!*D z5b)ix?5!1S2nO8X%YvbCc#wJ47U^XBf8^IWh?LAvC92i^V^Z`Tdx(<1^^%;TC47u&%uYM z_`nIn%BFD%^j1VZFBG)D0?`$S2@mRLkDzr_%CK$aY=Sur#^>nI1W)?T{ifKTc5`lY zQbmgZE#U62S0rT?Q-w&kZF_JQQYAkv*+VzXT{|SCW(#ipz+F+bN`I{OI4jGy~DAG7OCaGb$31^eS}gnhG205u$e8Lu{&j3 z6h+e_vudWA^3OyB;{kygdyu8}{tymiPmstg_u=v05M%YyQ^3ufzT$K|tiA@cVmVvM zZ`XpTjU53+`umoNy4YEewvOS++l>!~&Y{&)2-sLwr1TPYGoic!@W#7*He=JFe(@8D z{@4+3`+G@{U`kTnAEA)6onQVl_4Yg56DZxk3jvYF}DA$bk;PEUN4-??;O zxr~iub-9{r(qT0}QB9aRmW=AMJj&djf1kXq^Tqo1EN2dZbHEULN_b^3_1 ztwNKN0;~Cf_3eH&lK~31SrM)AJ7HWm!k6npT)=KV%&XJJSaWi;wCkkzbr&x~1*Dtg z{`Tv_<)^1tQJJUtA|Yk>0r}kxj|VS8W$aV&*1wb~r_0ru5fdkC?h+0(OVWqEkyJi> zAc$L$LQFP!gW+0=4BOYqoge8ZUs#6SYM_=_&37|)`T&SNXOH%2-}oSOA^7BuoJF|=pL^D<$=vW$3np=QPtBHAB7gdqluKt*v7;=(8Zl=nS+27Muq~^KO5srSe(upi6F#1qi>Q6`;4vV))Ifxd^A$1i=G)9L&VK6dW9tn=&EG2r z5G*CGP^8oeT3zV~G)b+aw#EPks)H24-Z$ti?#*A=Dy@lf#x(Ou9e{Z852DC+sO9~#)onFsBzFhGHEIt{hNWxtEsXg%tKy`d$VYYZ3c0G;Enf02SQq`?=f}fG zkNHM#dS*rKv;FIf>P5=&T z53k5~HCs@i;Kf=&hJ*)ZwRphY;q^8=w%3#%rl-y#8uSk73ZZ)$QD5N+fOJaHebxGh zS9GC451deAWOYe)Z+b5*+Np!PHW7(m= zE$!_bAmBqOz6K+tJ2tA6Sr5L2`9kHwXUzA(f`N5_V7;Rn<(_M1{NDcV=8f@>l{A)S zT?u`UEmR(@`7^IxOZCZ#f6j>x_O+oCTQveUY4No~2=9dB#Pt&ZD{{{@rXP*X6MjLJ zL}#t?^YHoMVS^;J-M3<%MXXH>p>je`LHt@-a2H^3o5$4T6cB|3CfaujmAg=)+V&B zta2j#dMxPUf11o)+qRjZw2|r%>LreDUH9nrdfho#ILU(|eIgCgd$4ViNe5tkEIB3f zs;F)Bs?EEO_>Y>IGM*0Y%~jWSetBNQG2u>u2?vWF)o|G;kz0gub7B{xxv07LGHfA% z!{|%E$6GQ4LJt0qT_3d(mgJ&So+}pXe4A?+5$+hUDEjlrkKm#d&yW#CKMl!4zg*QU z^Y3Z5mzS}vvLz8D4wEgCQt}Ik8&`~I=>mIHCw7Y5DLr!}FPd27kckJ#2$$`LlC+6C zD1wRDxDwlv^0}*u!G`!dx?Z26>NlTytef9d9rpp(;!bs+uqA1z=WH=y2}xjW7-}D! zyK!<$vO||O?~;7)HTH@#{?AVQGmXCg+MoSmdHjp_HL(yr{&R6sgSI<`{0Z9gpqX@W zL8dP1*}c{(6X(O`%o0c)Nbj+9%mHK27|q3RhRlaAwtZg#eT`BXh)%DY)JFGO&Zpt> z%r4pc_THr>9Yaki>1fqOeMI7jFSjwf1(EiJdMx}3sv7tiIH*&1P78jxV0IgS?!)Sh z3D|9xhbMdd7$=W_Mgde)zfRi?=EZw(@u_8DdWE$E&MsJIQ>ObAv(wCzLtkc@TOuH4 z(R6?m@Pw%piaJMWFIs%RR=(h9T?nbefGSEvOe~OA_KKf9j(xu$j|mYV2J=6$JPJY7*-JHh%ay40Gex#xBWNFhHeIhTfxArZE|>jjONDW^)b!#ho!Kh&p%-iaPEi3e*1PA!Ho4el%00JDh(|EvKNiR5>yn8} zJ@sc`p0c8TIGX$xweGeff8oYY)Lhv|U@K|KbVmdk@b2n#u@0#cBos>c}R*ZWYI?CzmxF4!jYcSp*qiP*P;EhD01zyQ@>mN zR+RG}qOA*aq2lQq(g_YN3gZ(om`B|PlP=%Anrt#<0+KocYJ0_T{0Z}Ts@sYxPH( ze)+^N&R7*qqw3#@`OzRk?ucL!aOWAqgN)K2O7-u^*Dyg>inNJbmIV9p0!>>c!tvfm z#cQ!#i$SkBBu0MF6yfL7h~uJ4$!RP2{eLxAVtbg!fdsOaxxjdZ)!0@eQavF}*Ua!! zs~?kbLbRkV4c;U?+!X%VV;EK^?z@t`TYKS z3xe{vq^CXe5;{eLEnMl#it23O+52xzY?ikhEp#`6%XO1UJqkvL8{n$GUhA@!w7)`I z#R$k~hTv*!tO%|{t4uoVmOSl;H*%8ShP(3v952SYI{!gR@1FCHE`|nf1+2!pX>xBe ztmPsRtKM-Y{ZjL@={lCaaU2vF^mCfPN9@tPx^V-s-j`d^ zhh4ZQu9n8=)I@GNuvw11WO;n^qoTnU10!J;m$f^fUL=3Ik>-6BL*p;~c*2q?dVVZ8 z8%LhIRzZ`CagZ|Jf%Ur`14<({;8`%qz0LDPo**PEYwWn!cQH@Ddr~ZUOB;mj#!AjU znUH(#4M>t4v)9*qcmbETj|sy>wNj08@7vtN&)GTm{he8n%Fog#dH!HJ(M z>=-Tj47S=s!uGBd%W$-j!%5n5DSv$=_aSF0$o0ngXLrXRTujznxJ=i~b;9$qg2N+= zw9PQKFdO6Zel)6E3}^PfxS(4uHJ~9TWQVn}wL&4B@X0zxI8!l#V z%(z8yjn$#(nB{k`|+(Y#QbqV>6r?QJ8qQ;M`byg*bML3O-!%1zA!$gnjTXfCmVNIqXGh+M5YkXRG#4KAsCe3L zsL-djDzUJ=U9;CPk&*7nyLY(rY)r&xFW94AXCwb!dh)pk^ihn5_LL7zZk|GBeMRg6 zJ?T4Rm$1J|y1dofn1>5C^$2AaoYeQf75+7X{lD^S5( z8X4#B7Qeq2_AZ<0;E4H99ETgCwQecc&g-!|BqFmDfSL;mA_w;P)bJmLs>WiA?JuQNzd7%hCFBrF`|Us7B2hY@j~ zY4P%AqWXo^x5Z9{F06|%pAFZE%qhNsMmvhyu-0`s_SF1Sm=g5+aHW0}nz_br82lRCwShU8*u z<GI>CdM0fU09{-FP~JmflQuk`kO-Y7Q=)GDg5nkJ&aWS3Q(lG>a$hM6%yI zLGVAn>cMjoxpxYG?V)Y_Tjh~(^C4eV%)-5<$e((vQ^;d{8`TY$XLH_#nS$3C_bvM6!DXEwB9!dyzw5oU4I!{7HwsG2 zPj6nkyvJ!L@6~5UH(=8eBYeosm{6)Jkl>iQ;Bg%%Z5LDLD5)UbF2P;?hsV!g@hAUq zeAU8VhW|fHiUz1&#yr&qOy}tsX@5ECI~m1`q&)kdSb*o_YrTN15QzO}&KtOQsyoW} z8so65JejQdq(x^b;-2Sk_Gg1&R?OtLF*nL5l=d`Y8Ph~I54)V(4v8YjkZBivP<^hk zYU}ZM(Nf#IDIImAc*(?n>w3h<63vM453{~#0Y+A7qu>(PZR^V-J3C3Kc50Lt=NQeZ&Z|L?3uaTgPW@(bLXyw?7H38CL^y z5{pW4VcLVB>#|~QMZ_02UXaHKcLkm~;FolTN5J@PCLMW)Hu zJg)gM%dkizE}evQ%KYZb4G%pIZElnZ&Q28=k(dgkXyb$D3}jQ&|862uf{mkV6J9my zZO-eWqP9yWGcp8CEu)D&0?upkXoD=N)>q?mdXvo?>Prye#`;Sm`RJ8am+KQFw_rdZ0hE8i3MQ7!j}9OrFPzi?+Eeg8U|wK-3r zBrG5%8(~@vMVT|IY&c~E%BCa#aV~Sicc1DdZ9J9aZnT=OY}&sgYQEnSMv#A$w||57 zx7wBP=Hed_@2AQptc5o~D~SM_gqHqRybHZszzq5+eYd;7ZDL*TIO zYg0#!uv1w^w}yI`w8j^6&8optn~8=Rj<4(-K{ASV&E~fc;{5nRR3}SC9z)E^@CKxo5p}SSSsAh)~Bb*o|5?isx)1HHwUd^$i-fKOSM` zYvRIe7C4lH!^hj3_{K`3ztKo59-c@tO>swG=3^75DaYqU3SF1FF*_gzTWp}Vz+aVZJLmq}E3&rDm1&d1N@m6uj#CG<9nZL{EeEdai5N zm0FaP4x6E{fv+eY^~1|)S6SI??(mY#yScV4vu?xEJ*+K@9ddF>BN=Bl z&M!6dGraSfUdsxrdn?E!4)RRc$Z>mSVa!{=j8jxtLY@vf7)H?QSo<&^A67p#d2O~3 z-T(4Y+IB%Y{uZ^{iBsT*IJu>VMJ=nLcZ`O5nDr^&sEKWR-Oa~Mxqxs2I! z$49(`N^`TN;l3_k_=-SwPOjBBfsu5X2W2i(-SQ^Z0h>7=Fon8odx^pol}K_8F`D>j zm{zjc~aWyfUC-Y{V2yae+MNzR}d2{+*IhC6wnlAR9HrPa{g7k&<|Z> zvF_{|Voz+0Cc-r2586;?DKq^vXXosTzNeCiF0amofE zUoTSV4SP|f=7yef;m#ckf8>24whZn%dg}HkrLcctUx`{*kw_+nCk`+zgY0?!ip^9_}y^DY}se&|V0i-2Jl_E_Dy$gii@!RptnKRG(z2Eno`D=du zWM;yhz3;u&+G}0cv>*Bpwd2vmc)zPAyR40(9x?4E6s}CrCS|>K@326dbUSsyw@#sh zo1IDHT(dQ7p^OZqDPh@Sk3v*aEyx}k@E9F1r$EZTF|w@35JU+vmBj>P`ai@39E|EP z%Be`w)YrN3VFTSHkC7{rN?TJwT5_`v zT*)}Rh+-<7M)%w4D62o$GAUOoN{X?3l-iFG)1+M-Ky#UnT1`h}OYdr^y^(|EI(m|> zr@6~RSDP}bnO5@5qzk6(@ofgeBki@@@TrwNcs$yaoP=y7ikfO3!dbsF@O<@03fQAY zgSRq!7v0kKOMa+cE5gE6q$}RD&DFLwYng@pOF9o5ILd>{+032{&jvTJu|AW(Q{})biYA%A2rxrL zK?&sdC4L*{`KI+5n}(teaLW0z7&eC|C0LwtzGJPL^yZx6fL{_$IlqKc&J&dClpJTX z$0)X`u1JsDrAfcJ`Eayb0yMLS*XlJP0TJx<2Bcq~?NMQ=`vZI*n>kix#gb+HGXUgY zR*F?Gg@-REfOBu{b?l=iJ6PMR)b$2O)4>|im_9O7C)VMW42e4MGIU$2!%luUFJ5Y| zY&X%CujXj%CW@{XeEu(oWIu*Wl^k=$|6LWevVT#cL>V!OtKP=df|Fomhf~band@VM z(o&(#Lh}%`M|AOIh?iW(ypqB^H6!9lu4E0dzZYc#$DNW~;F|O#?LIcvTI(ApDV++I zJX_6tp4$A{=uqDAp@=fZKfU^)YweF-^|eHVl$sK5?-iqihiLq|y={Wu?!x$v_Ub;Xs_I?$ghcer;eZwD~6 zc&vf0KmA43@$q}bDw6*ATGo@M&PCiAFT2Fed^OzZKYf+iq2RXrdwzA{b*1rC(oV^(7DXtv}l1kn;dB|Ib`nf!2_@LKPuBeL!urk(aGQRQ8 z>8r=u{CFp6Eo{84YL-Lvp;F^JfZ^((h^X{j4KzPH%c%zUAGop41Nmugyx$d0CfiGu zOQRnm0_K3EdF4=TqiDBUuMSoezulRXROUP$vIt zh&Osg?VAtUWd*v+{tIx5vQ2gBlHA9EmYG@v1gbRu+K}r7RJL-+p^@HWNUBMBQ6z#q zJx6MH`#IB#M9Ke`@ZEg5zp{tW!-%j82;Wg+@q$$(45+T7$srPA3+kAZ%G`LdUWgMA zBPm~!<8LTna!BTeeZON8e|RQJ#^zh%2Bg0z`A%-=U&42;aThdwmltUJPKOO)IxEO zpCxi}ZSQbvf7nfm%_m8a)u0Z07J?^d6YV9>yq!?Mn!>ho$|l7A?Dmw>+lgO385wTH z)>Gan?+x$tB}|n`sb?R&cWlnOv;o)qQXy(V?ggO{YW_8ZlEIobdL_hCb2hkZCh`r_cPv2d(p`y8R}9?g;_nQvoyMWHz;MUo-!{M5FALe%6_lC`DDtk%OiPXK;{l41eb%Cb3Ype*82YQ52 z=hp}6cBc%M(cBN{RW!7_vPz2^w!)YWSKBQL%XJxj4Xoldu3lijR*^Clyo7GLX72A$gggFe3w1~VtFBzONw=L&la^rsCsSZ8Uc~-L$E%fNUee|h@ zLYtE3+#pm=uVJjs>Ph$VL)3|o9P~KEK2bBh&_!+snANq58D@kX?hR|NaN1@*BRh2m zwSb?3n0L9}>D0CseiUh#$7eY-^vwcv5qHCLmy4_jYjf8%D%+Ku-cZ7yhO`&O`SPTLT70@y_u&b+~#g((e81XqQ>|>bq`WVXz-V@cY z!XDUKEcYi`A4~CH+qcwRAIq~j=M10PO>-=*QykpB5Yb_ejoA;;L{r%tyKAAzKt;I2;x1pp(=5}KI0P)3WC)& zB{+0bdbVbS-h5gn#HrIk|464w{YTdbA92B5TY-%CkbDiED{j$lwJym7TF|`0XPJd5&SH zBZ=X3@bdcBM4<43=;+B>=8^M*xaFLXF2!}h0hH?ufkGK}MwlAiVezdX6l1!YP^kMn zEL56w70m8;k8l^4I*gf$vP<-!b@%Fe^*(#>7n3ASitlcb zT;EL}uFhF$B{YoJU+CmVDB5N%usWg$}F}^?hRrByMBwz)1UfR5&_1M z1^InZ{4eFbiiCAW4vUoZrc)If&fj%(BkaM%zQj~QRZ(PD16KAs@R^UedTl&68M3r_ z!9Pdp)-J%OVBj-?39pQCJIoBq@y~nIEGW{(g4q3D?xHYot#q8k34pr0Ozp;ETsRa8 zEUkHE8^VTJJ90Iv1qd;dSIAM=l6~DdxjZ&CSuO5=VvERpRS`CinkOI-2D=D#li?G-ak1N#rT^?Ad$5akk(_#g=@SB039lfPcCkxF z;rLR40o<{H;j`ODzWEc{XrsgpJ)~XMfzw^-yu=egCGGm*e@Bv0c2K&KJa5oBicfLh zu1(H_XUeCekNNK7-VbZ?tGAG&u3ij7aKT}|{qFyaC9z9iY#%syJ!BkUTw?r~{QlkU zDE<+7{5H?W)XKFQ+B$)}s(BfkID`PQ@mO!9P$-wP5O~G2BA!Ryigo{%Dpt^G(%@qJ zELub76{f<4W5FV3>ZUCw!n>ajI->2Oc*N=$!@<*-7$-2qE8?SK`_i~HS(gFHyMnCJ zxM@k{&)nH=&dnN`6uZwkZiGL#EEE_PAy9leydFvQfa~R$tlsSBbvuwhlq(r)WIZ8O zysLUBi9TJ~t?bw7mZQc~FE5Z|I312}5k7_l{g#VE>XawJzM4A<6FtH;-$Nb8*fT_Z zt`QNr8ShCkT+THDy|mRnI&0GMEBV6H$x>Wb9Z#wht3LUz!7P6(#=D?wpJxtAP}a~0 z73vfx$PbYWO1O92`Gor;^%Xmh*1Ml2NVh%pX>m}0_}P(4W!NrNjXWxWj)^sylHO(9 zojji9Bb!5fq?+>h#~vgne5Fjv@(~!L?%JuF4*X0M%1kcJ!!l`F850_r9f1x%Z0vz3 zG7MM&SEZ0*q;hgO@e@|xag76G)UqUNr|tH&rT1BfT+qQjcv-&mZ@>6_tm_U%kt1iPXgdS|S1ZMiwgu(1o(orN_@4cWcMFfNUpCP7Njy{o{Ed}B>oCN7SKp^B zVhVrTPJ)Cj)&`~|gTOJq_Bfig31Y_uM4b>mZ!w@Kv+S z7Cl-O2iq!_w{U)mB2BMp!8#&kVg%6E7&4o~qS)mI7)gO%K~1W3hN!t_`I1)yLvxn_ z`2eFxoO%pG{jJw2GqN+m3!Sus*! z&KXtdV$DP7)^GV#t{C9%bg>lViN<-YNx$qFY*jQQS6wT~)IEzss}dr0t(^1G-BmUiPC@uHM!sV!vFNw2&M=wH8yu_LyobI_EZ zc}Ht5=Sv#Ma6|>4wXY|*)?^b{CfYb+$zk*`Q6+_n?d4i~LHBF^yF+Y4@A* z?8)&ln5TdEf73H?>W-sahiLu~jC9ROH?lyb9A}4vMpE8x$zSx_qAUH1tKaf z6U(25XasA*K4k1LD-`YeGY@z(KCC>YLy>mROAZIYI-15=L>VWv;qYr+6Yh>o^z#_z z^ej5&AAzW6X4Jb*%_YxSG|>GH+SXnh<|ac&Vhu?kJbb}uo=;T!3P8Ej=d0`)PDWKd z=Wd-P1Nlibes2d~nG51!N>Yo3Rn-jWrC>pdHd;Zy39)W!b(M^~>}YvA0&;$znYzHe znthQsr=|})+6a<@HIOb0D(g{4)!V(x~4^NwTW_ujm|pp=l2%Vz}m{r&uR`UI+j zbE2sEByS)i=#mOkSKdq&*rk?}^XrJo(HN*}x z+;48{!us*4mTQ>2;lzoQbY92%0y~}NI#qvvb|{NUx{aZ$5$~e+zgYk-f?1{lic}hg zX~|gf5uXynuvMKUi5HP2R}adnT+%AXgI3Hxa=YcnW@x#ZpCx8k9D=a~mJ%2RUXiou zy#v4GtAhL2F3*j6MbU-zHwH9!Ws=w_B6zDNZg0*P%1HBWGXReA)jRXAhBSaV$E9Ah z|0EC5S8#LSC8u6!?~^sx+#Wr3N)4HL>y)dUrh|LTd(j>)4wBNuvq-%m(fx_PorR(o z$pQK%P*O$x487&Y++p^oM0?PVMKHj*QKWV@j9UTQP3jX7Dj0*mit}hkCO?h)qn*s8 zZj>{jCrzN#=htv*jn;>Uw%YF~Q~QndRa&VoZE)nXZu&tFu8)?n6#p==f=0x72e=$ddWkS%h%7 zF?)-?UZ&-2($HKcdik|URSvqENtMV+D6SGE-?;C^4iBa^KMuEUd5`#_7r_6O7y!=c zgfW4%2J@2(;(kJMonI3~xqd)6e#JK`cqwrkvHp@|*a|87LB&9xo_98uAHTg~@jf(< z$h9NRdG{uTh*sXC^=78eD{(@PqMP>PTVe?8d9q#)1v$Qv?+~uz&*YH}Rrfx6t_P&r zS!&V^8U~dF_arQG5=bHQpOgtR1bSV%Ij-?bNZ&aPyCc5>NtCCR#(!dVH`ry`7D@9- z<3b7Ntj#Mz{wg6HHl8Tkswps|qjY5=R!F*)v2LW&!Nmg1)s@BKY>c6Rx1(5-tt8l^TqoW`3X|RGiY5~f|qS#j* z{5c?BdBlcO7xFF3vn9a;hVg~k9+~07`nJ9799C&(tzzmf} z;0i%Ed4AB$=57<(-L*9=S$-8wZ$rK)x*N9}I*R0ketW|4VP~Pybd9n=F?H=0WcH5n z+J}xlgC{wY7OZpp+s~EG;xo9eOXBHMxwL;$IV71x`cQR#>If%GN}Hi~~$ zoGmNc$G2Hqo+oQ`Ons(Cd*qjK{@5_`VIJ}9WJdOIe+F5UP+*<>BQ;Kzemf;N3Z&9H zn;AmVNi&a^CY3RU@2A`Lz&KQ2cA$8WWG?hQXcPtwoF+Lx>o5~y)EOsI{Lg=`|IPaW zEIQz|r0!%B&|>>h4|2wLm51t21J(l(Njlsx?`5km`)4F^YsMbm9E*`f8jy8UP=o zt2>Lj$gEBAD)XnjUyfKt_;~mdi1QH3tdDHo=?7+Vdb0WS-^_i62pAdX+z>eD>Wo^- z;SIWfx&hvRySquR>Uicq{s{1)Crd4neAMs#&}C#nJX4DAF`g(}7iU$e^0-4nWSg~1 z;g8Sw^~ruumOlzmQn5WpXrc)yc3O7Iy!I-ht!3rz(5-XESLeh^QRR zFH+t0Mq#tI+i-enhi6=!-%H^!(t6tJXYXqBp1&KJskQbhknV}Hn!!%WN_ZZAn0{5!6L#f$lcw#2aG#I@?(OD-2akOe|F5ocwEw7h1640 z&$hR$TFalNkB@NHKTh4!pAG9KkLzc92vbWq(;DHm^nUP(KQ^PaZdvf^QB~Ne*YKJ0 z_VrRrh9Bb{F2>7N`G(h;$eYy0Lc!7Be@uY5{Z>=P@)relWq!Ix?>8jXsz$R)6Mr50 zi-?#s zyk2Mp@0*z-i))OePuU`vtA9Wi!OWIu;KDslqRCL*1he9)9r=VI?p;Y;4kt?QHvZ?r z4!b(>(q@l4l_ho)h~T34Wyv^kEA{Gpt;^~4`bZx>E{m1xB$R69(tR7G( zY&9m7{&Inhl|gy8zXnp#7Q+8g?&o1ol851pR+6|a!(6@Fuva;H7DX= z%GVce3=hl`fL_~*tHkO-XeyMmU`_~Ng?_UXmVZ4xlNg2I5m4$k?GC2>6dzjNzN$JE zh<Jt@D?FhMDiGr)T<2hjh zmlLH6=Q^e49_U#qS8)3{N&cyY)|5u-uk1g~md+WH00m3iz=~<2U)j$_OypaIf}6G; zu+3$a@Y|vy57?!H?n)AW&EWP5Dl?(BHL-Z-B*(dLah4_|fI=e*H5c25BpUJ~cMTCN zIreYZ7+&4^K8A>`G1?Mm4?qjTUjdwiLc~Xn7Br!iGK(7`4oR&g;6-$VsN3n6r)Peb z6Wl!!KRrqQNSsN`KBltfjhfN`pMxdWz#1Ca@h*=j*j1`Fj|hwSg3`JfxPH?yma1LR z>iQ1v@yD)yR)&>8+zx(&4ifqq-81HOn;G#se3jbM#J$)cbt~n?oiwWO)q(k1`{Hlb zqo;nY`S7-)~}-zgc$r*h}0F^-_yRp?5v+q%eu3g=?aZ=RG%&hpzr zK}lpmMf)`a@?(r()VKRmIznO^4Gk|V!?Kh=CDllO%*eEmi@`dGmqtIi)|3ad(0poW z>rhkn-&$z-eRbn%aGIJK?O8cc2@g}gRos(0`vvG3;Z}n;H_m7>ETxrV?rV({ zT1qx?F7wO$46{zokZ=69-Yd2CMTp-J5jR+;ppAJbuAd-ci(escxoYn!ar<}+)=r*M5H(Y40Q8=+vI|ye@O5@ zjB@Gii9NX3XlR+B^G-%mj5}3k71aZ-{Cdb%1S(D6TUu^m_;JqV)UzI-%?~%Rtq@qS^I8;(_lJJ^ zIm`f9+;(c&67`tplFCZz#mK&H@8fYg0`2509RD^I0E|{C>IVgUTDyCI+*03^$;?S$ z8d-j8P8pUJ0_i

7#_PUBIlt=unh(SkTuHJIME{4l zYF85-Ydgx$7h;g3e66?Uv6+7Z{`x4hw=PgHSa8x)xqFztMFmJkWmlh$~Hi*ymn*vaq-wJc(#kqJN(ED* zKpddy6!52LQVJCJ>=yM~+40vIj%^B<0#q!9I^S|n@FRMsyA;_&Y9 zb59^{EKa~L7ka@XbYV}i+W%C~deVO)3fk%sCm37PF^NMAeMEw}KGVCum@FBF?@Bsj zl;yyG3y;|E^Y+VCkFu^U_~=r5^V~IVoU-#zhSQKdsC=z5-%)NU%Y5^muq^o?J(pd% zXRz?A6+tR{^smkiI5D3sfx%C@3-O;gjhjb3L!2>PM(2*-9Fe*bOHq0qVSyTpj`izL z^d79)1%4yky}l_l1@Dv7r#?$UJGK&u(JgP^P?O!jF@yO@0h=lV5tJ!%(zYtzm^a1_`j$rDS|8B3! z&jydXE&A3c;3|~dJYwVKW$YUgw1>nN61fVZX%6nwXI!n*2zu{KZZ=sC*t>T($3*0Q zcy{$jH^zDp-6z*0<)FR@$@46&g@1JM5};&!vPeCuHbtUj%-<=_9yH)x6k=)r@23ME z(!Gl~1?3-@2M7;f__9nV%>MxfwihY&p8AFA_4KZ_B|U>I+=FqoR3vJ6Up^eT8TASP zBhqjnxa|*x14bzvvZ%vj)!l*F{Unmo_NvKP0O%Qs99%dN*h0H_aqW$ z8A}nwH!+b0_=2dfx??>mB6OJ%Q^DD3#qqy_zdTDHtcT3iJxfwsAN_E55x_FlYIb69V@EkL$bS;&bHFSr}cTxHp*E7LJmNMDve1m`-Fp-7ZUgE)sk!_DdHFXu0DqWM@gv z>ZcG#+=3SH0|wVv*;B~0%c*35t^?AJ_D>Hbg7KoLbQI&Tf9&9=Qo~BL!QC{=$2Fau z8UcTxr=RneEe`6UzE=k+SQ;M?eR4O_ZvM&o!y(t$NpP$-KbpZpb9_SXi*(sv zbY0{t0i44sRm1=DyFMS_sJx||M~VM+B>E`_-3x1R1z&?3q~iLF+9D2; z)x}eRfR^JXWC;Jj{R)>J2R7cE0mPISI2nf7xhascPAtj%f{THlqzEdb`m+IklI7{# z%Y)<_9$~oe3G_L#_W>cFV?l1>3!pw3eJ|&@klKci1Q4)D;tfRw1tIlFuMj5@mp^|a zaw?sol>fX62x)P@kv*@~0(g|17rm&_Z{Arv2jHo%_B){#&-If_fy?!hzFp)!iS^1s zy3POqwHW;fWSmlHS;A+TA0Pj_E&mII83=>BfiSp5SdP5zXfbsXFi+FJ{^8^?m?}j5 zI)V)c8iep%=Bu42I{>=7n%c#;T@Am^HW`asi0<2-AEIS|Zm*v2Zq`1a8h0Z7c);b1 zBRE1{s`8Yzkqmtav%&4qFKrLLfQ2_!B41VRrb;UGuTr6($YsruaIU$R248tU|L3=Z zT%2SAMvh4{TdseiBK`O8^f$2nLs=XXk4&^qXY4=g{om-l|3U%$*N?ovi=)Zj<>6%m zk7F6)2S#LDrGWp~F*A_zMNvuJb98~>i{BsMA;V<`-%}Schj9@xfHABm0)(+`?TP1q z8NmO1oBuEW_#%#b1ap@wEiLeKn6Vjvt%nmI>I(_?if5-7{D6GGvNS*d4UaI5j%(c2r$KN-APOhiKU#|}s z?|w%h{aL^DlVHi(?Hc#Dew|p9PafR~_5Xq+W26JZ+QFh&)0naE*&cbGxtwMkaCZ!( zfi3vk{R^(L4XsEdSmmp6@W8*VhyUfFzh)A^;*6O$1G4! zvJl!dE5cc{}p(9TZMwIgVb&Z4AF67=X;!_^~@=AZ`iQ!Y`?n6 z&B<$yb`r-=6Kf?D-fepPVb?KR+4IQa`S zw(r*~&Bgu#@}O70ZJYp=*6Yx)H=IJcTJvl|gPjXJ;i{gyZ!odc7jb*^SA;80!)NS1 z#>U+NRGl-!roLzARGlh+#Zmw3BKZB2UoPJR;a1Rl(%`XVhvY7d)9@(HS`1nUlWty` zs|143>?wJgq*HaNJ?(0+PORZBKi4P??FIRc+!K_;m@uEJk8lF(bl-FMK~<@DkD0Tj z$RR!yz^EJn<@*)Dx0KF7mOr@S#LGBs?Kx)opuyi}8^m|gbZm@z=IBvqRM zLm;of-q+aQuwXn%Hgs;jq8Qlv+ArG*XL70k_<8)cuP>+=SRH-!$6;SK0BF9)2dDkn z7HY(u7z_VZU73h*4zt8p(mv;oyYai9b7r0kO&CD&z49UP!CqsGC3xEw|6-wCdRLtQ?o3)wK{q$igN7`_5_l6v2I&JSWw`)`vt;S4x^eF#)KLR-Tswu_4BaFO-9F4X`)RGnnRkQ# z033459l)hBM4rTW#&h?`b1o?c=U*_e;<`H$szCF~xNr0f0CL7Pz9+f+0C<|%Dm^if z9_P>v;+CV78FEJZd;@HM+AS2*kyBce7+>dj9!L4{77)Znz_RmfjFhc`l1)ocVM?V5 zix-$}N(oh7pmw?}Tn-{Os>IZCSa!#o2~S4PQg^r{g2^&|e9_YOU@i@N>r`7FGb)5u zhOr<$E-l1>iT%$a1vs#uz9i779?4d2OXJ?ShQYFP|qPV7efv{C&}RC z&m;<==liG~_&Zz=0I}TL0nsvU2+U#`>h%*v8@XtE3dmfHM-xlZNxyCk?&3MZ$$%TK zNf8<2C{iqjVjKgf>&`1*U>x6py@-=UDNDZizR(E5%=t3o|HO0l({TSp>>XYUaW7y#b-fWX`KyU{V`C%28^VE2ueT|O?!8~^uxoH zSbDBx8LSvs3=3aU6U03*HMP!*EZdE`ed3~=V&c0p$7^d^o3xrpLJ811diZvy$2v|lAtx6m+)&~ zG7)88ImKylEKL};YOZSdKoPUXMEt&pnfO-P-C7(mS#f<$3bTgY?tlpaNSvi57wFqk z>;O6Qh@T9=C+L2`0u@v*v_(Ze3_2_5)W3mar$vb=ryR7nFF+h4t%8>QVb1Lqbb6}# zGgMAk4)6-4Y4LXpAdcE<3&9KsWGxWU|Y)NIn++acx zI3bcI=QNxE=(zWgs?-fjlD@c1PJ*!~7N36oli~c?h8{@8g*Uxs3HUu%egPb>2F6_r zC9bS%igS4_5n(D6f#IOMmAv3@WuE`;jK9ZxkwC$VT(VHCqoZN)P;Ey8^9d;^dyb)H zi)fMctZUiPM6?M%UMA%yo%*_V_d=e zC`yl$xdxdM*PIe-)|L?_r`;F}=Fy?rpRr0BCDI5$ zh6h+eldK#bL7zP5IwVYF7wx_uPc6NJ64f7<;vXXRC3H+Dw#--Mg6@1T^|{mc5}@6w zkI*vb{c*gOiFI>kyx{GGEgE(>dGk+q{ zj*Na{NH9}MMjK1ISk2F^u$73h3g>P0MX7PM5P^Xp%zKA`Bi%s&*^B(?ZOos7@XJv& zdCOTXW7TMFA9vtk(pF?mY8wTc_e*N{l-9fmz{%ZcvSY+9QOWu8Boq$oqorTk^jYNn?4o-MA z+mIYlNQt^zdEJXK_P`|Z;L12Zr3`}nWL-*QVFl)7PeDI)~tJz=%Q+ag! zM)GuM-h~`{%w0@A=kaJSxVc5AsZ0tQ(Bi#uqW;lN61feo1BS=8Rww+w0&Zf$o>Kws zsK=&{{@(JOxhuyEPQ=1Pb#(ifv~g=C9qod@G2eT=jw6g<8gQE@$k?Jq45 zHogUl#WF_VPUFZEL2_1xDtm^vP3_?4QVS_kmfA}Yz2b|Yl)Gp#Ipqqiy;^|_J`r!v zK^p=@ zTNL%DxJ2NCRV1Tgo7yp=bw8e~^~8K7hH|Ua0*MmM4)_Xu={(aJO_(D7i=+O3{>T6+ zNPaF7?g=`}W2}Lum3C=Hqu@-$YA^_fvz}+&8A`=wioNqmOlx_*d4D>51_g2;neE9< zpH~4EJ{BmZew7pCAdAS!s&UWe)N!`IT@eI^n#yp*YaCW0T)&=DMUoUxR2{I9%i>!B5}mGtGf^ z3qyp!wRizsFUeS&TuMM|G-57(CMr3pK~MV3YagIm4`5F59$o;-&UgeYu{!!9NZlZm z*V8Zkm6ohQu-FWva0IjR^-EF&XuV5;CiB?HtG1VVv3js^BDtAQ>#Ppm-_sAu15D5J zh_-G3cHLPa)`YRb=vjOr8g!AE{17b3e_Y4-l*IO*Xmx%cP|=Z~oN@ChC+!}Dh=LST z{%lTK1U85#ohkS_hXap*E?8H=JpWC<96i|U@|dpzEz#w1jUT|Xe)06MLnA&jCODqT zNex!~b~%SgI4n|m2!d}?pg2fLU!@O>Ss-j&ICTYS@|yXby1Y`QyPIH5Rn2`~p!SYu zKLab;%VL2MPsvhbjBrc*t(NCn?0$B}BXC{&&G`xKLuERc-uOMw@z!iTQqTr5ruT8q z1mv?4QGu9yY!e^?$p{Xmo+ZVlc94F>6czgUIxyWA1;Wi=(X3Zj!vuv7?GU*WAji_M zD%N}Q6gj!Fr93;}gUg^*rEYa1Ib3!VvK_2ZSbORz@>(0aLZ-gBHdie zx49~U2?J8J6m%0Pn0pL3Zp(Sr&WewI7>c(R&(d1;1_WKP9zGhMa)1ceiEHFK4BZlH zsw%sQQqN3!;IOO!gYoS$=b`g~OBnEc%`2r^f{#bXn4_%T5{DCRu^OWcq5Glt-%mI1 zX=NP8U#3mwMt@o@U;RAb&{4LSsln?IKy}|(v36v|*?0RZ#}c}IwR6oGf)z4RqA{Wl zH)loH3sF5);tXomWI%4+H&PDHHo0S4ng&DWPRec;W*l`}ZptBX#e2Ur;JG{NK+vFX{WuIHQGil0(fG-QA=C9vC{93}dO7JbiM9;ePrFUc`o z>U8n5kXIwxYWrdd?yw>#Ls_hI9~slmy1dKH@dgN6`JK{}$4FO=42r^5VUW$an>Z5Q ztiR;|jy!$x=i`fN0~@z_YF_*-t`yTH9Eb7YRlEX4h7M4hAGe4O<`5PXkOV{MJBPTc zPPV+iB3l$6(N%vFc3o^u(=7r*%W`1$?XOcs2QgES(k;3oWfT`ClPyKbp^j{sppiruSD+<2<-hOfe?<$=<6$>;kv{}|> zG(a#G8BWb^me0Hidc<(x>P0h&^pdbjMJR&Ue79lKn_r#0PyFq%zPoYcsFXs)n;*cQ zK!1H;m0B@twzg=65l`Mie*AG(q#{&MgzNds^#-2#5p4i(OP1SKW-@jn^wumuSXVaR zWcX5d3o#I&D94ghxa&5#R$2Qf%HqkE4ZCm79m)VwK}EQxB4Vv_L_{!>tkkLx(7Qtz zR2h8^XNr|zbrc9J4-E@p0qaBXZ0K}vub6+4_m*vm%MY;i&z3HyWZ-5-W}CroJD}Xg zD{bYQuU0v_m3T+~JRY5ST5qyM1|jqdP$C$Qu-xl)o{^K^e%N(5|~&E@2mZ z+sd6mf(IWzlp4m2zt%+1OOR2IgXFtHa(T|!jhcW3$(6nX`#@llATZsZ3tXTap9lyp zDW$zKmV+wi*(2fvW)j?U(kp}i`R)+|g?5t*kvQRnW993zP}oMyG)~Wbo=&v;ZRa~k zr=y#eYU-nRNnxN6g)>g4Q)0pkajM&X1{z~C&ddhNK#Vi^epa7YdF_Eh85i0d5VT9* znCDN0&s=xVLvvo2X5;aNe6(r#B!_V!f-`?7A(kUUgs?^lcynE!0rMgXN7N0^oor!g z#c~dCyw)d^R0175%t%+WN^SXZtwpn7`X@J$OWuQ`BQZ^6Fd=_NRG2s>K~RWQkey5p z3Xc+eD}Ao%$=p_NqC`i;ZQHCR*vb>if-W}K(@qi>dIH-6ce?l0MWDsDCXwL8VPC9L zBO6!=)>DF%z(nF+bW|1kE+49GZfU}R6L|kYICw4nam>6!@F3L-XosjRYx1RLm7yh& za(eiDO!gu!MxM#=SS^^Spx0|VRIR>l7Ad*PnHA|=Tsh4#0!rh2o*cj0u!sX#?{xy= zBC;equxwx~@s)z+L{VmoHdgSyJHQ3vj<~+|)rruYttr`yEZm~+|2S9vy7l_M_^ST( zcZI)0D}(@Y5Kj_V=U#r6h$Q_1ET$Qr0p`LdAuN%Qglj?Y@#ug#6NxO6O9f{KQ!-PR zIeZeBFu1BM6p8^sO7K=2$m3JbTLCTVnf3f$Rm@0z3RLG@(sJrV%J zoALKPAR(D%hi}TRU=VGu|HzIFeW(^uE4MDRrPgXtHod)5$F^A$6!ZdgNLIE= zEWL|cQfGjyf7#q+BQzCQZ9bvG6m{YgTbdVF^efONTq8XG za^x6b>xInTQ1+Es3GygET@$$FFlDAj>7!EH)U}#9U^?IVe(>}`S{I@ZKMH-P`JFgy zp$$vWS_$f*IcSndXYis1Z8YBS^PH$MVT*YnHQ7mb$#?|cVO<^CXEat-T}+aPf>R#YyX?B zl^4&tx$Zx_nj!S_z->^1M|9j@7hlyVm-tr9FnYAcvC0wOmBfG>^L^<0Z4kHqsuW{P zyKw-au(<|6f!J+Ye|$o7^P$pmt;E~iTrzlYONIh-H=57Z;QPX%8Z}LD&tT`k0@#Eb zO%GeBwxI!iEp`b^3@hN3BN$(hrJdd%#AOEG+XN9qybi6!%Hf0so&E#0A4YjBXg3v~ zO(0d;U2~~Ta?clP%N79nt|wRNa&~+@nC$UIWM3)57DjvsBiw2cFPnl8Cz0Zz%#fh5 ztcpivX%p`&NmS>`oTY@v6GpTTxF?#2na$@bM-E(3M`cE1Pif3Q`nJ|X&O0kgfYCK7 zyN}J=o6QMvX+b}RDILjQH=IVbV6&3kY#x$&Ci>R|_5Z$Kes8e+C=fF1JZ)cW*l+nD z_GRoRy32@krr;5y2B!-SJ#MKu_*KvoqNGEL@_{hf@O#Im)6sL<+Q=bs%!HkL>}Crp zX|hgoWEpl%osTdO^N`Hf@w%p4CV_e7p^qTN$_-t}=eBeLpU9)M$dabOFjCaR_M&Wd zT@=12h7gp56ACy3#jIeXW(E`M#-Oe6uI3 z$>8b$&b9Q0DF%|K>|u$Q<1lX2NltV0Fild&B;v>3Y&d^!Bn7XqXne~w5#R2OYYNkW z8kzeqK987qDpzXgQfF)l4aeK46V~KJC|&Xy#Cw5#qJ$_L|D-eiq$GX?G*ev9jyNVV zAq(cl8yUyiYX=CzzD%?Teu9C9@?F~5V6+)kF53>nDGkdagH#_q@QlF)1Zgf80c9eu zZ!gizWmPCGY8iAN&}K%ky)%LbiOG2ro&t>oxvKetYHEyaD#_VJlf)yU#@45lrMcjc zgVrTN>mUFFl+=XJ(L()?n+rq~pWZ;B^Fd8pStD9d0?G>$odi^3zP`W1!?wdbaPxD{zm9zA>bLY zypWuS^Ij}P?9iRnhG?Q|a91$Bw6T(#2_x`qTilg1g{5WRKfVs2VFtI@L>Fj-rUm7Y zW}MXt{UqP;pOOU6u%Sx9g=JK$Kp$u17oct7UXrR`;BN>PT9W#_(6s)6`XZXq7rCQ2 zU=_nxx6E9kay{>h6Hc!H3&r5B}k2PcFvwCS10M17GCp2ubCl0JCeL#!se$X2Gxq zem$Dgo&=(qbO8Lq87@$Sb+?meK*vruA_JIig%1jK8*US?N6EO+I@Xw3Q3O&cLNj~8 z-yEHkIG%2eI|772R~)UXe-o6veuefT>`~hs*l7++;L5lbNq5dU+x_hjoa`OxYAHhP zxXbFJ6Fw)bW>~O3@`$f$NYj|z!S)~TC>XAd;bOTT^{m<#g{s@s7uoQq$5z4ZTd(Z& z-fG&Wm$kBu=aq}NY?L151c93)L;fGq-a0DE=xzVCumAy(k`j9vpV(1R(?oQ7h-|u^V=l$cXb>4OUcDdH@JkQ?y-uHc7*GG~)L=o@; zgY3Wd^KCIzkE&{|G}kxO0%7qiO`=5vbuFL=)uGhEJc0N?ld9@S2*upfL57su~j-CXZ^MmBXmPS);uBzqO1B@6x@)9=4g z?JhKvzV@l^E^I>82gga)m$CYm9SaQAtF@)-CTB99WJRhaL*@@)CU0LB&xBTzjo5gh zLF$R!AHeh;H3zpoXZHh;EainIi)Pz{4gzw(fCaDOJ;`0>`e|3TkU@2RQX(Q-6gaZ{ z*JS7yy(tu>ord<_ed_=eOX-V*j$?uenCLrv`aS`49veXDTg+4|bbAnL6-XUJC+iw2 zaB2fiw%(jGYan5o1<)_v&5NJ>HMf^rvmp5;4$lOoi~wL+W!;KBa4)g~ip)OGy^3M^ zT@Vu}E=g@c8LH6w+`ODQOZb^{9@N*&n@*GI00~#6V(S%^GIj#*Vcos>&Y&dk{1_fN7|;dm$u(A z_F!h)ctUyuIIqY~z85UT9y^fE$|hJFlf!C?n^pB8f-w{_*OnB2aP_8o?oYG_i03Qg zy#c3r7sjqZjy9+ktiY5hppn6&3M~H`)I`*d1rXJm)=;TVOof0^+f!K5`ZRN+3d*b4 zSyQ?i&!s*d+?HeyO0{~FAV%PFJtKZ70n2}G-;cYda!j*_x$$_&G4jJR|7)a+e7ri< zZ*RJ5M8K^BQ0&wJfe8=e-|y%w>!3~`ss_(rt!NK1v@+|0WG-X5T&5M;0Gw5C-#ZW# zXLa85sunbT97>+@v}Y3SHF33nq{d6el319|RnA_m1Dw@H<%+?is5J3ilDimxQ6m~A z9ft^G*Y17}1z8)W@kv7pAzIaHTG#mPdE5a{81X&BK905-wZu)~#J5xGf2>e;p;Q~fq=$Bl=Mim0QL5)Ku$qbw-Cj$*X; z_o8ql#8D%CjWAgPAHhRVHumhI&{L@53p4sCOn;R~i#4Wft)`&?$kk`SzbCG64t6Hl z-W_2Fq;4VOp&ISKZ%``u?}jsvTP;*$kv$)LT9ze6@5pd1`1uX2G8aO zpqxImbEC-5=0UaEj-0`?eobeVd@jOGY|8iehDJ+a1RO#v$c#1L8$u|!)Fc`zcX-Jy zL~tGWxyWV6pYNmk&wg$btHDK` zl?k4uiEE8LEcLMee18+wDQaI~E*ak2NJc}rV4L9Do3SH<$)sJ(r6b$-Jz+C$q7ABD z4Qm2>-9yL3JDCSJk_6B>FrZ_n~QaDaoZpmkSwf-~=BYD?yO%!k&B zBj$|?IVa(3QWaK6S|Oc2n+ps(a#fO%!Vz+vuV6e^rd>VH$U>ng8%M1H}}kA=3TphLVekI zXZR@34_EdqQaowl8mgR5TL zCw_o;n7r-h0_sY4!9dw~c4HMt0w6(3l|4cg2qDv89z~87wf~RQ8vL*{_a1|B5@E92 z740XFLJtJ4(5ku>wo2Msb6(HYCD37B<^4ImHGTk_?@YmKFv&{dwgxqSDY68~3tIt+ zMe;%bI%~J>KZ|Xx4zIy-|8t*c73595lu60e^5a@C_+XwJq{{(SfKkz@)zf%c^ma+p zCcphIqPj+V2B4e(zjBttQ(9&X3Pq~ho^81JUgr$uiB(xR4|tn!%t`$RfL4kv8?6%A zH1@?^RYPubTJQES_98A&n9PusozyPy%#{t$T4N9#CM*>LA{y_?E_NEDFk}YH?LyZ0 z>d`N18Ep~~w>m5D<~A^{&S$8R7;bu&d5`24Dp_o-sf4nlCKszDKTm)>sC|grF`RX^ z1Y|2qfn{V>i(%pCLcv#jtqUqW`y@g&!#cc_tH=!9z~e(50zVgbQ~D+7V00+A|2Mx; zY~)ywcZ&{C&=c-_&V2LW;oWa(fUaW|A&Iy>Z_WI1ttdzJ_`x;jCzp7NIjJhBe;Ky$ z)0G9(k);@x_+KxG{A!}pPWXl3Zh}||muSOa7uTovnK0Z8d+Rfw4uwy8kl4fJ0L~QR z2QL-5ejBCL0ukMonv>r5uLo{Vcu1RGI{BI@)4}a@E%>s5t^A}jNLzfm;KCmOb|mfpAbY5UG7?+{j8hz?$T> z4TgABNpUpUPtOg@vPJHIk#fB=BOgSrEJiZ9!sj7#8hGXFO_X zBDtR+U$>t3+OkNg%2ee3p^Z8ej(UUd|Hm6-YZv8N=LT=A>v89cfB}bg97(anc8mjw zwnK1KuhrOIkEsuoSdoRI;p-h`43V=nl zwGj2vd;%{^Zc9k@$RLr|gFm{_(Cq(yIg9aG4bzS;9u7O|?j;oVEazz!E0483W-LJJ zgM^PuJbgNSg+SC{Ku>;y8flgSEpkFX^ z#)8OKIom--c9Um@#E2v;@=e2Ccsey)*^`CQ3z*)R8ihT%!NXVM=fJt#blDWHZ}kYf z@{8WVd*J6W%dT-gigBR3pizG%^x#d@Qq%(tnHkruLV*EOQT{O!Icc}Hpk&h$a5Y+m z8g5&?<$65fYHML*nUF`c$SL02N_-EO;2Tj+LaO_t7*rJlW<*Hd;$L=iN=>%BjRrk8 z`~E^O_)#ZXE%1LZUb39pfc}8}+~WL2h8e12`=6Lrl=-0=rIsC4)0_Q=yS!!mW||b` z?-_bQ8BV=V|JkXT(0Ff`bn~56zlO{fEk9*+VwF1>z&M1 zmbncl&Mga`w_w!)52?tbzFiP7*7&`~xp7Np7tFlU0WrIC1_e=< z8gt7i2|71pH34DywFxLN)hk*}fHEZZ9Y}I2KihxCuOjA| zwtSubrVUnglo)S?s>wIEeT~n+>GJOCKcLpxkzD0jpf5vTCx7bRdorf*&fTOfC$L_q?Wd@}rM!CKD_L%7)OMfp?|Ju&I z8L*90{EtOjLeC>?6VOwJZ?^e&+!*TzWj)J4}qabDs{2<1$V zJ0y*`y#dxGj-+`EB%I-|W86Oj%1cPWg5Y6z=!+@=o+MD0X$WN(pNng8$-jYr^C&2! zgQ)R~^6g{nG=cjN&*fwOqsY|jc3?H~Bx3UP{yY+Q0PwriOHVEX)Cx?zjyd3QpHwwE zplB6|_WiuXEteqUqaGB?OhHE>&J9$IY8v;Vw!jUk_6U96LR$w6_siMLKYLpmlX(>97iDQ>22FMG?oijcsQJVYWEVE<273jCM%A&vFlh>_ zg`h4sy~b=Gl7|ydE5UuC_(EIGZ2(T817^G$^T}pe@iV9idb1yGBaX~%q7xO5?kM`7WWBF=ZUBy|a5Ew(c0r=r8Y%(JqUj)IvdlABRy@`JOyol{LEG`Nb0n|M z;u}^}Nrm*JoPVSG2`BC^s+7gUnCx*|Aw1D0b3dz}cTBxK&Mr^=gO0^!r47!}_-hH` zzs-No{xYn*kya>j?BtnBsQr52fNCgYCp&4zaG#_lhkZk}EFLwRiwHiycu7Q*LPxnr z)CWbH5fz^odz63Pe_@9rw#IaGTmxIEH_*+_0uRLzn--Z<3&%|gK_<)P2e((l!s2CN z&*qO)N+U0Pp-(U<9$zB=Qf>`@Rq!0)mf|pj z1RX3QuY!u95AhhiJOspvbU|kyqJw4hYt^kAfZ`v6+@!glBsH-=Xz(7Vdq8g&Pwz`_ z=qNdBQIO$NNB3@vrTHSP zP$qoGs6ou>3$sMBdO2PP$K0s^S$alC&-nz7A-#dH@;P)je8KB+RkCo0siJaf&&0~2_Gk$4 zMSP7;i0v;m>MQ5keyh4wkuk~aJ8ZG-$9EbGN&hr1*=!9&2Flw_bgmVMZ0c#HC`ubu zfAXm8VPi3Nmml>0kgR+{{A>8(8P_2PbOTzND-Sytp}#bd&ghPIWVpe+Y^N1m82H-} z0QI*egW^K*bR?zZIs;Xb)nm6}s_e88DdGWo#GaVLIagYS)nHdJx>VQT9`qUvO;Zjk zBpNg+p?U9h;mhhyp@;N`mit8r(Dre3-F3amb6nb|+~Vv?g-32UwmCf7IRk3R-3I5s z1?rz^FN4eMTCG7Fj2zFWk@FN9l*2gLOuluT&ev`4V2GMOpGMx02*BUm0>C%Z2$LDxeT-{axLt_tRHR(@xNJ}(RI-N-A02ip`FnaZJ zHZBSi+-Cwze71DkkV(-i6Cg&6d~~;XE^d@>92Yaq($&> zoa~@sAi6WW#%|?2m+)I6_iH-XBdfW6BNdh6E+*I#0p|r$MCAa#z8!S`@SAOlj~vUI+r!n)JN& zvgA@$RV{(6#PxRu%`Yw!*83}a*R8fHM8EqOvya{=eJRrILeBf%2tPM}CqIx95zCDq zy$pIu7Z)$ne{7ys<~eLOne^Tv&Xw6ldDrYz?2gGV5M213*1tkrypt6VTn5mP+u&+2 z$lkUFvsTIqGpV3`@dQ_Q!(ZGuRJBX!*v|`^BEFQ^esa%+tJwDfVcksEABrEvqum%G zgluE9b`2qps?o+0IQ~e=Tz}@L2Gwr<#=Y6d|H@h#o%KV)^`cR`wfWi}wYJNsZQgF; zzT5P{Br)@faR}+%4xE5GAveQH#g1JAdF$cvKcX$w>r&D>LOM3h^vRvkZxKbHD^nAa zGVk9h>St!dl&N;Ovd~6c4D0GSs6~2MwOiw_G`Ce47-AYaOsb2%JZ=tCS_9;5Pwsd? zhvd@JaYKP*VYbHq4U)whFz$xAsyPFYY*=i=!H0@xT?`~gKLc%*7cVgtZt~X1cqLj} z2t?*D$Epw44x6?&xd&dGd&;fEd-exGsVTX80Z8_6$mOF>sLLngg$Y9XcZX0NbM|Ty z)ZC3MLd|8RrhxL0JWy;Ywy?79W$H8tVRy{EN;-K=XuDl{yQsM&;f6wL767@;#c`@83++*0#&?^L!!S+_Z~Yc6fJ^M z2r{XM&~#vqz`B;S%Y=CFe-;2o`@QiT2-x}0qbP&4Vn!nF#N%#)G&nxWX zRP~J{N5)*|STg)YRd%lA&k*`|Aoh$x=*LHJD!EZJ)0fxn9F_GUoU0sixY>sxLS7;$FGU&3y?z4 z9S*!DO6|T&;~{@-K3BFGI7+Y38?4M}2B`?YX5_}Sw^?N_CJggL_rux`B+j8xDzg|* zIO*MLOl%SK*~=>thuwHOkuy~H2|+vm_pIZ8u;v-0pb6LXJ|{O~oG{7zDY($|_35>BJTy;wx6L#Iyenx%+g4veeI= z@6d}aKRTdpEc3-xyczF>svjCs`1Lqr@ENxS@wH1m5bQ=@An<;4d;HF`Ga&fw;`2~6 zKB!^Mh5*~L@uElXd+N$QPKl^!@w=8}97V(ElDYnKy&_Kne|ocJk4PI*+8$T#@O&e- zl&+QKih&WW!!k7$x07>sq$i{8PU7-UgzVF&v(A*2mfsq>T3{#z#88!fAX zBee*@!Ug*;Jj?bzw7`s2?rn??D5a<|uD1qYHX7_J^koDE^bXy6= zPmUdbJmMeKY_g4dolG2soXyq$3Q3tYJZG`&$Mei6Ugw8;g4SE;4@Rux^q33f%cl*i z%`Zq==w!}Wv`5NXUM+O{+w|geDIe>+sNr7|d`%kqM(*E^wRMqaEkzCEO7smRQZSFDofRi}l)0FPe zm_4pvTf#gwbL-}sUFbIxDUjj32W2Y=y8gnUOT6>@(UP#HRW^oYAe-=%7UN9bQ2x8> zH{?>6+8RP7fuY*J6?=#F4Ez$69xe;ds!lik`jv(Hyy zo%NEG{eu*&`FRRs*{hr2&_71*OS}?4#YZGUwnVri5ql%qa+7F}m^nB{0j&T`)l1hb z6^~nlO#}QJ3a|Jr(XiGXrFq7GA*krLEH)F%#ztLi=T$t(g8sANy)*7xX=ijcxzY%QSIv!<#*NmXHySz@E)ygP=Ag-F~@Km$Z7s-SN zj;UF5awo`NgMP>EVyR4&aEJKnaJ{#nHAc@@$}+~R6v|~sVk`bd9|oy2p@8;OVm5!6 z$x^cnpxyZ}Z(M|VRtW5KLy-+SikUKl>Euf3Tu8}O*kqW1u!NMp7(R^36 z@>pAx&Eb!Z30uzziU^)ixqr|T4Q+P~a=ce(w-_7VF0MNI%W*fojoPBdI&JBAE9FFo zIOVQFu8`V(q+^0-XL%95V`I+6_p(s`xzx&c(9-@~(J2?gCGIq^$!)A()OM%%o^4ML zudY*uVQsJ`5Y%vs@|&^Mzq}WT`z?*F9sN=9zH|KhyhK&&x!_C?$%sKu6$JZOvM-sx zvZuI7vF;%LtAjGI*=8xoH@0C|k49yskl@)TJuy@LXy51Pk0`ug2BFXCt~HezDY2DW zi!UbE3A9CxOI3XW!c&BFxv*IAPYEr5ut%a*V?UK?1nS76NVUmmO#Pv->l+*ofjvW^ zxGwPtk;lyJgw!mSRA#uH_5Ebgul0_~Kf}V8MPEMVSY~b~D30J7SLgk>Y)E(S8HJfX zEg<0zt&S)o`lCC3U7#xT7?qpm!FZX98uCo7(YxX zHh*!;{W%LW`GbD`4}^ZnNt zR6n@5XyDc?P13!MFiMHx>1}Z}3_RcGl;9vN@PF+Qd~Uf4Xt*@?fQE~1Tuk>Y{{>ze zg>H_~u_ViC9J}sVTAd+@nY5d&vFCuU)0?3ILv6YsLs`m$M#x`9ac^2`!JVzpOF}#e zvMzqHnmUlGf0G2ze&>1d7fFI?SH%_nF<6cbMSJz^?YL;9E1ij7z;mw-i{au4fT9^+ zf6>JXu0vgL~m>Cz0_|uDyiC5Jr;Px9{4QHLoJ^9xNclgAMt+uWY zl*QOW@balPqKl^ibP zwo}Wj(y(<9NKKZfh7s;)d^l|9GZ1+RE{EqQv zy?(*8mB%M_3TjafGHt&v&v1t%w%FIY>n9v1;A#25ywS)u~>uv0Ua^%-+^m zzpmZ;uGjW9$P=Ow?<9$z!R&kNiGIX;6i9PZ>Qa+MkracY?&a@O;NRPl+E1V%l0x7i zg_||HH*d+mxWNu75G9R&1~BQ{Sp7HJ*)lp|`UB``J)*G;hC#5((nj=ikvRe#w*KLV zsS&9$$%FTwrn$vbT$fe~JTn)$DQ+ir3tJwp*nX}Miq!Yw+sw(ZYx4iwcJo5I4W-0*0|>is{9Mu$RTYm$IZFaN z2&k!lj4%wL2Z5&6tC!lWzvCtz#UI0cxC(-zHPG`Pt5cXQ)!2w)8pl62Ve23=+h7Gr zf;dAzuodMuSw$=FdaGw`VVdsPW^}z0BNlt5@+lnuc}1ls6IrLY)a=&F7!W@HJDa=h zHHNOh>zn-mnkULmqllc3cjJdvr(WHm)|Z;%i9GcG0NJVi&Z;IlbdArD+g5T|-_7&w z3=757r6w(US40HGVNkU+R-h8D*X8IwuKjR19xBws6*_5t?u=T9F$!FByP(%BC;ax3f#3AW`c z<{}RK2cu9NCpJWDQPWd&@P#w*>5iZ25H4=;vcj>%jf>t@W9u#9C7%5_q;q4bQL}u# z`8FX0snZu!hatCX4EBRZ7ddBBOg!8dcfQ*BfW;rab@>~Y zo?TvJ@}!P>)ffN1QLb8z)nF%;K?%Xp%O4mFWiK~;*v-bT=YAENk(r|M?wKP?aYoQS zb>Gr&;Uj1*hkUPRj0}A-Jr-yWLpjtZQT1R>(GAOP#YJ(W0Ge_6J&~?3<{Ym zc!-lH`Nm;QpOeHi(<6NjwoXS!*jOCh#}37a1#(hk>L<#jI+C5j*vdY5j%|5o@v#ZO zD+h)AuZNDg$>=jv2)lgR9}OG|j@&qK%A3xr;3mhhK!zQILh_--jJzSol}zNj|Iamk zgJwzWss4eVdD@qpAdMS81^d27a;&bikDCSGj9ATHEQRiHW1na7aAsj86C;0nw4-$R zoM>_2n}={w{u`As_K0VfVuI=;KDMO|v5harM%G_LLr6sC+Zii-&_m&)8#_cWYBN^Y`kkH8$?sc^MjN~LcQU~lcu`a zN=mVz5s_>zY-}p2omsj1Y!hLSJ^pD5!#GDaJ*`#wFp4pe%HU56XO#w}e(j;|W(VhS zRAw7|H?!C$k?Xmt)}k6zq*aUZ9@>w0QGC}FE;1LbzJ?zX%fneVS|Myhh~=>C_P9GR z!&YkIXAzz_T>5}A|0Luj@>mp!J4N@SLk5p?2l8p-Q)A~J6~x>z#|DW@S|kl3YCo3dM2CorG(?FL zzm!Qnu@yiueZOmM_PG;K-Be;Bs-uE8X|Cs1&I{Iva#`cjm;x0E>|H13r;OV6%LdB37$1q1Q)TsTy77vbR&r@rV>>N#P+~-4oArjM0jNo znZ_UC4eaLZ@bHidqMLkkVDnHjgHm0y11W=fd3ana2YX6XB~l%%)*GHxrA-P;3N`r7 z0j19P#Q4>xm4QRNliVY=`(|A6T91pzv3juR)mXiY7r5_+Ka~p@amJ8E(5&-$=(k$`-B3EJ#{2=IhuyP7iOaT@E6R} zuX?x@x4D?zM(f0jAGJB6PdujSa$zHSj&ZWuV8vmx9x4IeFLqs&^>AAxr}L3;J%*Ye&bS-j|CEr z=P8S~QAfM0M~H>6`zk$!yiM5K%7$pl(2>Eu^%IMl{;|C&K$5Ytvt$s0G|40nFWha3 zU~KN8`?m?86G^N}x`-|d7ooNRU>G>yW%5!^d9pOVlb-Qp#@YvL_^*>hp&kNgkqtP|9hQT zUSfDo9n(I~hD!MD*D_PqFf7$f7I8P1PZtwzK*$$I_!uLYg3S9f!ASdWnqCE;X$H{9 z8P?k7<7thF&$1oAPbZC)4$GV+NQXZq8IRTDU5w&Mc7{foRFgmhSSWJ`75h3NH|Kf&i zNUyOJ8{-MZebGmXhaV;l)jTlhH%zw^s%l+mU#C(EOQMG@+>=6NPqP;!j^s)aK5OG9 zML~t9zKWM&KTK9?qNE}RxG*!lVcY2$!vKaA^kyRl83o4Ok4kuo@ez!^CKYGF>6rE# zOCzkyM2MbrC2}-M9-r|zJK1ppPygZo$tyJ|afIi~RpsM%Dix-gRK%QY5t#S6x&a=e z$~DQjTnvH3l0!{;gT|)cjo$VJ<`GApV4=&s;M~Wm1kZyEp7-99S+$qGzlSM4(vdV* zRjf#p>~;RBB1(f%X+>juN<}{})Y+g-NWwLRnSE?LNRw-Q^SPv`3^Cq;n}+7l{8WDs z-wv03UC6v|nHq*30;91@nRpSePhd*RVzQJ3cu5aqxkPK##MtFK;1eLsh*e<;CBBaQ z>>ka98M@hhKQ|Ga_7R@SDH~@EIeD}8Jvg>qUf;Pbz4cdIKK`H}gP@tABW9LmHfZe8 zdDb(Oe4Oduk(sdqVILy+j4&qRnQ^WR$i9z)Duzlbjq#~PRfw>Y;S@`q4(e_l;}rb* z7C~N2e(3A2aSMVhnv4>4!ZoPf74>!kv9AXb=OaS2!^`t>EGNUmU#FHi26aU*)QR^1 zyznR||6;w2C%~D$E&9(skdI;b`q}u4Nxie`EQrqGt`azJ4J_To1t~_ucLJ#(_u;dI zmH)*?+<)h2rtH-6N}QX@%x2V3e*69dQdQ8zpfWL|au5}9e@$ihwIEP=C49uODy2Tt z!g$;U@M$9RQ;PqrF>KnwAXHZ}??8SCG_?w4mOPDa+D63A_h< zSk5Ug$Q#ffvI#(9m&BBl)pfgKtrLp09IpROk$n48uUO7GWy{yAx!T+A&`Y?cVp!`_ zm|+>X7tr`P(=HGlPva(E8V8K4oQ`ijG)v8cgx}WXa~b#fC`R@kMEO^K85Go3Y^vMy zfqe3)L*uhyd+?fdM_6wU4?L$IV82FA6E5Oy(B_>Q*B2;6o*1av%OrBf_CA_Y4Cxx$ z4|WMz$qT_AJQnuWpZvQRc1#|Fx?8W{Y*=JI&W)LHbn zo2^jxT!j4smrTDbug^vAH67K1{gjG7cWI9K+Wm4*PbrOK87)6L_$1K@oY+$HDKPK* zN>Wg{baJ(tVzmelWiu@dR?%Dbf2kW$h*g`1c+*>|oB%D)s;wwmPzb9dI>8e@Sd+DR zKz8%Kz9Qvo{yRdQ_*TzDAc;*`OVzc-0DQW!Sg14wlN+NPl6dRBkZo1~T}91obS3&Y zqV9HNteWHo?blThBlEc)Rdg2Z6Kd$_?N%BDSD*lj6nc09l@L$}x^jtdA0y|bydEMXeU>T#7vFz6RwxPb4`TXl7p zum$&XmtOcnwC+d@X(wJ*G55t@^WWQj#}(^^VXmWe`G%dOqTzw3`BexSHurC-2;eov zgWBke%1;$qY=bjiI{x)6yFv+j|BwWQz7f?D;eyXwze`aJD7mh9H(?JI24nN|p=|S- zIK9{4Cu>>dPRh32^mEIUYR-01eMUo?29@Pt4%I9`JpJ`8Emjo-df1~CXTgJ>Hv^{C z;o6jgD5J6NuO=lpjT(lFuMt0`C(cgO)h<+!@S*8YVwlpy%f>EI_J{i{{9m3Bi7&p5 zSAOl)ILNZGp6_1By3huhE?a68J*4M=hgT}13Vbs2f(PXjoU3vr=$_Wo;}3cO`m`Ny z?vZz06vI_^ffk*i+98LT8MG0Bt-$zq5bZ_y?dR8ALKXi9dx|bkUx$K+kL~%m5|W0v zSq5~fQ@0H!;c4Fc7ywktpet+0d zH91u6HQBg6ajN5Ft#unmghXUh0v|jRhRk&xU^yoK9gsD*Tll2SpvzEal6JJ#gbs$mOqYe2 z2S4PY@8`m*Ofa5F6RR7-XntW1=Z&#GH;F5*YmyU$hx2VU5o{Sg56MrF1fXx$>FF0_ z*>cKf1k}we#daSESI~nKHkUITIj6!D$ZPx684n-0;tRgO86uDHpPQ!c{i&U}FRP`q zs6J8&F6jBSnSBXCkqJiIhK;?q$=9n462T-oj7U*ZXViLbl$)$9_*l`@_Fe&D39?LV z0dtjx{9eaY^xYN_8nz;xFjbh%stc}2ZQmaa;{bo0?-*qZgI;on#6BgiBJC(#F^;Nu zi~87{O9!^QnL?V1_+tsH2&+i6oaN$TSMMNJ^n~Ygd+K3ZaxC=3_bhtd;*X=b9zL4- z{Pgxq8lh{|(1%>$?SH!~j!xego*kU`#tK*acpR5vuB`T@m8tK-vh%R*JA1 zLIlT-fyUwEV#`a-IEmqT$4f0Hxx&|U_D>qYTzHMpN5l-l^X(PO1Z7UAl$U-c)UU*~ zJg`F=1uJ%Q$qrbj(hOJQv@-P!hJIr}ND7T3%CbI4O8eI;>`$Zoc@<|5?4GZ0*B8qS9UW8**D6fcmKyDqin14*ls8UWM{6Ch3l-ddRWb z){J%Q`D5+*a--<0g(JMl6?Gp^f5;BBqe#V@d}iVg;md61AV==yr|b7TM20~a^vBmN zv2$Tu+pRw{e~2ACA|C;1E?}ZiDt}X9lA3;zA-V|hLi2l81BAJq0h6=O4Y8)1?om0g5V#iSK%mA5%R z*zSnd+gN(j+M|%G!#9P!f2wm>(kSMl=#?do!dm3r?v7ab~YDSxuR}1IYU&>X-BaYLPIUbHbB=OT$**Zo`NczQ3%j_ERFwG zZEf>GQurV&Hp88KGJn@VaQRyx5>+n-52T_vE5w$5seBh=dHhpnox848C!6Y8^Iy?j zaXq8*x1cA%4*8OpC=}&dw*<=Ii=^OEV=0{wTscPreT{#IE%;TNDX7D?YDp|kK6xJS zm-h5GYTN^8uJw+?!@JDa8P&7bsUTbHHjbWcu0l-U5|Jc5suwD*1uRO;Yt@uGW*SYJ zNZgl0fr4`9@sJwwUrRzDJl9y`#JxXyZp2z{KV5=qcA>!Q+g zik4_}$#K#`6Xibnn4(|)Mb6dwU8|~ixreyQ?nEWT#F%~vbSmBcj(w3n@Pe|HK=|V~ zu@}db-(i##QIxop7{@xSFW4y7XItKUtoZf#tsK=ev~Z_7Y#Q6`FW4!6Z?~JbI?sCJ zCz}Og*PNj#m4A{NQ*RQtoX2_#D!=q$gkUlIJU8OUepeWs)syQ+*e`d7Vs<3F`qWK6 z;0r-RG(GvhO8bUyMWBCJZ)43}E`Wtfm^52jwF; zq33lO{>td1&^OKzQ7c;0<4l4-NVD&62niI;wRR|QlznG5rlWv_<{n_*Pw;Ik`8m?c zkzmyu6ywiFLWdkkEs$lR?0lBpuGY5tLN8Z&e`GXEl_q8NbC|v0U`g&7env)CrwBIB z0Jr+}*l=l$V7||rd-`2vb>S?u)XO=3^KQXNCd9fyLElysk%jVzp}aK!oed9#7>D-c z27NiOzI4uQiJA=`)3Z|(-Ru@yoydhA>Sl~QS5P2csT)!%D{cOwJJR6VyJ5a9*SCwLs_krYqXOKmlsY_#eF)B7a0q`f8W^={W(Id5@RGT^b+UE)BUn@bs`?kJRy1zvt$&X=C_2zzzJr|p4t@$c`aPb~b42sby=_qWG zr^Csz2S@M0f6l23%;*r^5%#EtX%{ga+iR(C#2ynqQEn;sm^2dxt>ep(QYte2f@1k# zju_Qz6-j=9Pa?*W@Z=YjF|+u(_Iyw5az~k;wQcPBNe3#6=aF3&M$Nk&S0)@xA}p5L zqhMD0(9^IRtz&%Vpr%*^5?{@6EeW^4w_{_<<>8|hxGtZmi3 z5qnUzFK)uW^NxF0@hyfIS6+-XWL{QmZd^91AZe_JA3+>-hCXgm2*rljF3(e5jjvBO zI9eGf!pQ9s^IllbRa?4;^yu|)WO`IO!XTunmNO{{==O(VSXUCPGn}MLqsE8! zL2|NdA1iUqc~w07dm-TM53aNH;Jj z&j{G;ZdkKoF5@4?!4jYphausAYuiYAg?()bd`rLeF78k3zH)580#Z&5?Um=*dH9oF zv#yuypO2$XU+QQY|BCx7_w4PQk)>?lLFXz>>;4&y@)M+bqvj4$V1J<2D!yp~VwF2s zdyC2wQ9H)y$bj|E zi*s)R+rUpA5j`c{cCbB0R@>oQ)2~V6#EGP^bbevNXwURIYbb9A5)F;BXbtvkKQEc%OxlxOx2x zolu#_j{Tdy=lVce?$7RdO;||6tMP3O-kJn41(l$yYIvgS+6aS;{pJI|m!5l^g7!l> z(cu?bT#zJPV7l%q^PWkZ!%&er-TB{BM$=^w5IgLVC_ZME6c`4H3250my@ir9` zEp@d`!A#6`$MQdAaTTcFR%Dz4C^N5-A29+4DO`Kb%wOIQWm6-+DGIU05(E>2&o+%gfd1?TLHTi;BCOixoeZ->kGHEni`SmxK}u9-{ar$_jW z;yoV^tpM^|hbE}w84&~eh9g%(^PwoBBp3b8_9iQZWBbyj4Artm^plAe5@pCFUiJBd zyN61KUIo?blz_%;ozSx)gr+Gzcx4E((CK5OHDrfaz$SFK>{Y9<0d(WA|V^$T-tmuQ>?($>J<6M@lJw&)+2h;Mnd{38rZxYL*_f#~RPAiL<_t#z*C@Bd*Xp6ISs#BUU2pU`H z$FuFsa&SRhVT`5Y5iQr(^AfCQ!6i*?`b^3`-f;)S3JyO%fh9Y^GiuFZ4N9p)X zx}i$99M1&*pC?6KoEW7zlbBB|&bd2XLnwS*ZizR`i`nCwE4tdieI_Yp0{(i?rN>ceuL^55=p1G-9^7qfJAA)~p z!!@0#+I`ptN*0LTZB|rV0DNYW63W|4aL{~GXx$+$l=m)t5v_3P zUh}bVFo*p#w(jU~B}TFFcU2j^^22ey#axaD{HaULZT?u~!M5i^u}u}fleq$j-!aawx_j4tz=BkmOloLZI2@;TQiMvW1xeF zxyi%KO#(!sg=ETVk2TImhDmsS8~qsBfAp}lyn^LA4A7pHcff2*RUw6bO?K7+#f*h( zu-=A16OY$3Y{}Kd0(5Pvs>=QZu=#7cd`Ui{ln1nD60`Y7xY4OgJbv*4&Qmy87itRa zU)uW0bn0#6A#3QZlACej7Je)=O z4?ABMmqE?T3D7nDC=`Z%1Y*9%e>h3a8G_>veIltq?Z~er$={lD|KZX%MA-G5rqk$J zm@kNqP&2eEsQww+47B0eBg7X%k|@|STjimfjNWkB2n>lozZ{YTvs&1m6g{;b)S{Ej zzqW8(&?qYrX;w+mw+fXmChFIDq)XLVIrYUAw7&|$bQ>JUztWt37<1ii6%`;k&|Joe z{`=v?0x_Sr72&6pdp*Mk4|o2z($5BJM}&nH=urU#_($-hU^%eF8JqWY5ghHg8~KHX z+FR!Z#zm#xRa4x*<#Hc72P5md9ebA7@DtVZZ+HQJW}ZtZBXqh2t1AT@q^>YrW-hiu zNbX#^t;`C~VpWlb>21UKA*4a@E;5E^5l67fEUk@@b^jf={IV3OQbBXnM27uF|3y)v z5%V7JfvevjZcY=BX5iE6EUJuBUKJ}V{6z&|I+#bky#BI;xkLDlD@||p*nE}w*cI}3 zK3fRC*l%vJ3~;`C3I(pgXAwCJ=8 z`!jFxr}`Nbh!xn*=)gl`DoiNL*YT+e9hVvSTR6kNw)tI^2AszszjsAGtG!!fYvEYm z5Gul|5XLs)yJ6PNfhB4tw-bKGQANePIX6-<>f8hEU6C*At< zwFGnk&Az8On~Ln@V;cG+qM{)~|AgJHs)i*rw%NuHgh?-OJbA>QcJwYDzz8b3>eqN$ z7JXMG6LW@yJ=rRcIc4(4Kruemv>$iCzk?LXjK=bROpP~^%dxP|$h5nKb8#J`d2B{R zOXr^w)q0{&IICg$X{otVyq9At_T#pVOGXo2egeyZeUN+IM@`r0N*3Lvur^9I_f{VN zL0`yo3+{LI6Va&S|JmN=`DgWx9g@y(*@{ydQV-SegUy^tSWg@h0E~b(gzwh!TFf)k zUZ&?9!^^b3(=y8}8=nqT*p(ibf3^voY^X@_qgzOm3U{{SAKYe!R>b>eFcYy733&q4 zNXTCUNz?R=d(0Bb`2PsetHd&);`t<_bd@pqw9hrj_!lUW{u>cp&%&cI69Wx@tGVjH z=q<3pX#Ux_przIaP3@K~mR|p>8d0?Kmf^nLbt0dtnb3WXDNJv|9lrwo?7V&A`eNk~ zcDR|_K}Z|MJN*TX9S9R&$v`sIvN1$dhGS(SHmLX+>qxwE{U5!hvk^-F6*VfELVH~w zTDo^j_e124A+Qo2ybTdzp2c-8alcA|g;d`5jXa*PNulUTUg4f5`^yLRML6}6%&aM0 zNxKXq37c~5_qNzbvCN=IF7Wyr#<>%>>ngrrJs#NIRrqW4H**Y{u>hAS6}gM zD2j?~0hPUirVvq7r2(0g4yr1vYe8Y6DZD0}Q|KGT3GP3>zn4+WM#~=hC#wG;1KfLr z{(j;0+|YOhra9asNU!{BKKmazyfUx+IT5#tE~Kh+3N<>T&v>6=ls6j2<>RtnDL>k* z8Dy}>bAV1zKhbQ(8KI}_vY!LO7Ky6zqIHUeDCl;G&N20k!5T1#06;CVD zCD-m-PFJU(e&3scRjh3SbMJeijx5?vxUE&eNI2su(@OkQ)080$LX@ht^HX4(*pC;f zG^4d_5{)+qRWe#)T~d@~K3(G}jA=gMd%x98pXj$b$Sz+o)m}lu=^E65y3LF;FSb1K(oF7g8JmjV(hlx@Xw}@|bfh{BTT#|>ibSI)H}2T=ZG1E-wv%?f z8{*QO9>2B#pKpKfT3F(GOzvtz>*53Qs?_STk_Za)*`r?o=V9aY? zXC3Ep{Ey#%oCITzjqyyk@IuF9&xx68&%rJFAhK($#?Fb$LR@Kp;_HQhhlrycZz=|lJvzEJ)>RC?b#0lgATJutmg`D-xz0lXaZ-m{u z2n$|Vu{Yhjg=pB?c{ewLRQAKfg@bw?PF~?+_l%r3iPOs;4v+ZJof26*KlL&$dt|TH z)u4aba9`iWWJ5ql)zg9q)E<70g%7r_tIselhsmY>(qSsLoJBQ){7dPRBSJBj_i|H? ziG4@fXP=DYM|3|`b8NP|j5RyOxZPtq%0!-wH_1*#L(AHlM^!<#nrpB8#j*XEZr)i~ z|FNms`6OP1Z71a>ZmQtY$#1`rQ4I%^(+GK&)q= z;Fecad`yPa>9Sz=6wTY17jpkN)wZ;y_c(>zMYm&rjaYfV^B6nqcglG@JU)@Ze?U<}1Yo(ZH-!*8_ zhnTpcwy*d&yFtrue$RZ5x5{>4n1~P8nJQaj%2(gRWAjJOQRKWj#jk{B#O|2@1+$a1 zZ>5V_G<>Jqcvh)A>M+Oxv0~%gH^Oq-7;K?5-tT|(F_)~a&|y!2m(@g2hEGU|MSIXoN1+#8-Ndpa+G;_g&s1J6iR4E*ScSxmTd$ka zMrZHr-U*RFy0cb`^^h9$VEt*X)rLx@7i$%9ks_CJP;gtI2zlA~5e-8wm)bU_wLPF- z$jk3$D{Fn~q1Eh5VB()+R7pZtY4M+I z^nRShnXl_mAdofL?pjx(i@fiSS=4~3buk@I?&m8Nax$d4$_w@bla2Ju0)|VHRZP}2 z;~te4jHp?kw1cH~ZU)yTopj)nxBAq=+BG4c()?+*OH?^zYR%u_CHr&AnL8BIVl7wY zkyhgEhI1&$U7R$r+(S=GF4VUIw${48ZLMRT=%~(F-i*p63rc!6YtSF+gqgxVC~#7p zm<>#=J9u7h%)SB`9#xZ28}Z5qadq?$H0_%hCRRR~dog_-apz9*?Ro9go;_4T*KUgc zRYc{RkQBe7kkT7!Sp||%_OC4Y`e!u;PfZ-%TKfcWMb?m|F;u4{9L$_ZH%nu;^>9|P z-ode#-S_!d{Ado?20O=--rjPOcDJEZ@9Pmc3b9AJ2UqBz8Y_!CTv({G*sq|Jq1q71 zRPzga%ePs`x?jn>ulEuhZ0MuhgPY%O2pOA2fc+t}Dn-X9Uxm+%*&xz3q;!^K|5xlg zV-C6@9T9E~@{~Xh*9QyT+C{AN4za`aRHqD2V~*3aE!6F& zOP{ZfL=$`-Fb!e8FlxBvyS~;=(wRB)NL)7sxk{O7xmM8FSu?&@JC0}NRyi%eM&&v9 zEu%y@iQG^{?1j&^F;RAG)nwM!rmw1%Gz>&5o0qAo1A@b&Oy*{v$8@chYZCdeS)DG@ z`-(|a1w`*PcTzlLFBB^k($X4tR%^g#VyUmp{h&I2pIt2dxrWRyr9@6uyDJ@1;0>XI&1pKiIal4dL5jngP5TMKKB=t{NA_m}JSMm- z;*VnwwcyVhJswhxhJU5BDV(;nS|lIc(6tF~T+c`L>v@-&+YRr3+0->T$@dyh8dgkL z;IsC%6On(0Kci$j#e!d5=D44oFq%0j^y1i@&EXX~rGH~&i})9D90C=QsFwHdgTB2X zxgDqhS^c$v4sB8nz-8o8dL)wWq`lv^M9Kl0)VU}{L=$am)|ndi6N=dsbKt*>c_{jL zHnp&UVa*V%+iM=tBV-i#HvZs4+BeI{(DT_3ig`ki>$R3T|B@b1)Tv z=n(4IzGwbtW@`^~mgw zAR&a?rV_!08OyMErA*@|M}2PJ>pn*+UxAxVv!X7SH3Zb%T2+#|l`!W@fV zOuEDz!mFQbb3l9H=WB3MH*u>o`LVw)iObh!z-TTN#0i-x@x8t(?T7s91cBjc-10P* z4<~t6e6SU260TCbX3fux7xQ2dEcz2cPQK^ag6k!p_+#+{3Cx5gQ`=Rr9H@pWzLkI0ZVn>0~Li+})nr<S{lmIs zPGqB@>V$HQH-$chEZlO-svvV(@HEvr4A1yzU?z9v30&`*9xw?r^RznH+M)p7zZsS3 zaYG>9Y-r>E8O{PPC4#U;PGl1zuuS= z74m@TXe@EBHk7I&)EoYyVMHWL=iF)M)w{fE(ynl2?eYhftTtSMPln0NCUXRf>}iK^ z@`r9u6U_7j@`0F055%{_8T5s_IltBUY3UlmdCP0abB^eCeydEyi3=BVE8%1$*N!tpX!kLIWCpCF-{3N=pw>xfa-@7)UnG zZCmzn@EJc24!-+xa*Hq|KDREXeul`nG)RW%ahP^OE!uI808(wCcRd_vwG3Q z!oIki9N1!zIqDhZSjsR**{^P_Mf~kgc~2v@cFdZ7ZQ+C%-wqb~YW`blN<%iEmoiL@XsVF6PREyjYQl{FUQ(dV4u;sqpxT zAAZ2UyVa}xc#oeSyJ6c_jxe9tu`F@a3tiAZmS{(OFI+VU@(_f*)lU(;g`-IpJ0HS| z*~%<&+S9RAG8Xk>eZY~Qoc9BiA!D8dXrf_)l%0>9c9kyJ^iViaE1g~9&=oXy%&QfQo z_SQ=Q^<%-!V_r_;Y#8or>x`lDjiTrs6_JGVukUVkuf0_+O*{-YLUZ#g>R~ND5O^gj zVTssfzM_(9t1-#!dh6wftK7q;X_`^nr=^`&QTk_qN^RhBm&W7QhrOJYb#zh0IQ{oN z7(R0Y1U!#aU(=}N))QFzP9Rte>~4$arup{*?8VV$Ba8<#DSP+Ivo8B54EdX<8iK(4 zgkCE-`Yep|W|XfhI8(;x{3HjS1^^k~#Y)TDQA&Y<(p9}%@Og;Sq&=Y*4b#}A>FRNs z-Tlp-gjKRVBblUIS^8^m7BDyY&~+&HAONrffX75lDBiQvEAC}_tK3{R)4QQ6XT(Xn z5aV|AwZEsA?U&Rq3FzE$OL&Cmre}%f_=P{;T0{sM<$nzJ0_Ne9_nlW4pH;mdmT)&D zI($S3yX7~AN)|9sQ?&T*$7cmWq%7>+BH+{L=afFBPa?7TJ#sG90T@~Y1BY4n&{zAW zRQ}g;lB$K?pzE>~m^&?h|2>hQkI;?u6G7P8j=F!N^8Xpwk{v64uc!tQVG|5M=&{j} z&nz-rH2C|4ecXWaNZVR|mXFdnL~BF=?^wrvf$KI9%v0Wg7BDMj@?#Y6#lr^<7@E&2 zT~=9PLdu=01P=nf-xNV(ecS@(fpHT^#I}+)?FQSRRnNXsL?K9<$&PpCvv83T|Ko{% zfA@C`f5LsQAa6t9>RP(rZxG=K3S}rPld)g6uiG``M8_2tw6h_q8Zw~*&{0yor}tK& zeM%V3WOZ>N=q3ijAbNNq0gDZ>pB?oYIr{`+CUGyB`hf+wn~J!;sDY2lDs zfPijpXw(zv4*f!rsvysgU7lLBN2CW-B1={pWYr zx`n3K(|OU&qUt)mHS0y=d$H#?j7z=%3^mUm+;osCiU*@ zrsa34Z{?o0(OeN|B_mIP#P2pgg+8#CUIgT4e8^;-7uYXP}@P1lc=}V%ISGT06 z6)yoXDmV}WKSmO=j_S}Po(BTV?4sQW%?6W(x2pg@>F_5NG%o)b9tZR)Oi+NG`PjMt z5hQ>Am+AFi-}~SA$%YzjJFME>7QqMtu>bNA|Cc{a(t|~3A9=4Ya{YgP$$AWb2tu}4 z?0aeLHehW)GzWk^TbU8=%C3xPU~Ol$es~hv3Q&2(0O5JE)hq0gOX=@Iy9)u0Ad*wj zwl0KpX@iJT4geK|s?6JXDLFLyKVQVZeo6xHJgP0bnsV=N`De?;hCm2eI3eT)T$4N= zXRuQPsH@klfRe+-Kaaoet44Y@o& zVfLNDrHQ2!A_UIFr^qj_0JtLrfZ5J#c0?{00ZA1bilV^wdIhrspr74kPG9%@@E z9MGdo3watrFljzx-|U^4M`7Q`a4|kWiw2o+R-LUEtl~o*wB|b?V!ut*|JJa2&cnJ2;E0%ia%>a_ z$OiBW9hP_-DU6LZx}S`m%wP9+yO$NExzGejlh+21rgTlYjzj*f)2Od2-(|{V)`O%{(n7#3#bt zWW6H$9EhbwG0Rzt8(w9;1JHbC23zFAOuxJNrMNJBDZ%#LcY0v>rXpC2l09-Qa0vtf zU7#FtjGN;0J?+ckbQbD0%8usmB>|>UITY8sV2QmxuE-Z-&-VIlybR1ROtx9E|s4IL?JogG|i>A&h>oC1&5aM1;mqkL-%FkwfJaX28rnJII+gVbCn zZ9PBZQ-KJjnBaf$M*o+0?XQL|-Od`!bZPRK;kP&56yUXI27Z$zmnMCB?}qLDJjdbD zsV7EYHunL5S+QsFxlj_Q=vx zK+8*O1hw@6njsf@gY=Vp@uSaE03(; z83X`48*o|bq3V8UAf)2K@GocLKi`{C9*h81OxOB`?+Q#F^FFq6C7%Y(Cp$!|dKBSD zkxJm7(zw2q$A7$P2N2^n(Om$>74d9mpyuXhsfcVn5|yfb8RZAttD5uJ*n|>ZRlN@j zmXO~dAaQzO^xbFi!Zg~GlV~aUh?f*)_jBkk#5axc`!dgGS9K1Zjvrpu`V1@hUeRcT zQn_jwKLc2up+SSQk)*4gVhW>e++8fYhO1~fDUCd%5Tw|&DU2` z8oIU#uM#ByVS9t8yfA`MuJG5>5C+;ycR|?|sk3srw`NfNuj+@I;xaxNv!HohxLaSh zmRBXi_p~6o>VLQEfU%Yo0BeIHU>>-Eq(G6=sNUDQ^)FMbfb(KGv~?pfK?RtWUaSH` z^wdj0cQjUwe0&L8Aa0lf(y%0u!zk|UyVGX^ zdHo%OXqn3sWS%o1(p;~K;9}4wNUQV+nnW`kwG7>#lh5%fAqQuzg&7p08<+=O)=e}H zHoP-P%MrsvH^Df<>3d?c0{Ok4!g8TmTTbOVN%z|49~-pZCf~Jk$`j&Ru*;M~Cu=eM z-3$OxzFkc_6G%dDc!{KpM(xG=iq-?rzEi;~Ey53ln&3dOiC5OhYH&X@j*G+a}hw8FOTG?d%%f( zCce(Bkq*p*UoW`~)mL8)XKFX~gu>D{scfBre_3_b&52^KWmfkYN;fheqf0C`B-e-H zTzwFNHut`~)`~>42zz7ibC4E4x99ZzF~;&vSm>kp*JoJk)Rzq23~~WU+8M21h`lL% zpriT19OUpX?AriJ8(gtSkF#^vp&E{m*~1(AYLKhAdj`R z5AhIPL7ckedpJ{}?f@{}^9JV+Bp8aug;Epu`K;rii%BH%h4&er*x~mmNBj{&@5d=_ zgwIFemi5O&`kf?wszmge#XG2L5Id&CX3qvP9R0*Leo~l$^^K6u8>0^YlC*1yr&D6b zeW>q2{9O&L507*@18J^NUGCoX0F+z~%-q)8J+qq&_;WK)LUEaLzk-UC^!X)uKN-gGLDoU~s%b?ml)0?+=@;zQI?krKe2}H9 z=QO}Kg$<0VHo+Br({D_BZRkBpLfiXj%*p$YjDXW8@kmQcC;lNLB@?`de`^CI_m}j0 z^PH9dQ&9bbeBR})R~o^^pFc}VvR>)VchoOA37wp8BRzZs<$0gVkJ$+J`gI){m3Kf^?T)|-e-jDyO1Cm?@?Cel zr67d(;A>iOMkkwPO8)eGG$GzKnv0vCx!0Qeo@5;hHty{_&srYMRFI=JB0fFs;Pl~M z$g!re@V^9E%B(x+WAvGVKQHjm)toX7Z^Cj$yeT{VEKUMnh5y_a(TaFgG*a#VD5<*O zop0tXp4lJZv4sZ8cd}Lo(gg0x8}&RwFMCy%^?rymax@bo?mO|`57?R10cp3aOM~+( zqA;yoBpot5myw`3es3YGbZE9?Leehl4oPsF4>FVl&x}jS%bOXOrPMKVuU?^_YHuQ; z=+Y^vlu;#{#safi!OJ_g@8OXMK}$h^^(Byk4rSay?-}uUqJy^Prh{7I^eX&wJy!hD zo6)>er2kGTSP6o-4JpzmA0t&&7zcjDhV1s)Jus3(S><0OBu&Vn3?Z{aK9 zT@HdXY~QBBwbuph)s<7@Sx+@*933BzWqo7wdKb@T-7EG+FuyT-&g1RX=5TWA*<_cv zhLLN>T}_-O%6(Q!MO~Cke$sy%4F4kpDUbDILj>!8e7Go2%ch%>Tq#(X6Fea0DY_L= zj}IWtH-{h+?eh3$$(gCfiUR>ZKL(rf9!{k7obn`DKf72k?!_#EBr*O?$%hnO4$s{{ zn)K@x8V&h=D{WeYnQ?rjn}q-kss-Z?_Ja@8Q^;AgX`nFGyy*16(}*NsQ->mG);Y(6 z5P=4|>WEdED_|?r%ibO|@cx2(T)o7#;@8K>70ZqQYz@Z}$)ktBXiT~pH%zVJoc{uEe+GlD^{o6mnvflzR4(y;d?$qzRB1`H9l2*`1xmQS$v?l&pY z#37Tu`>t$O+Yc_s9`xuLwnZHVGzC!^_Osy94igIQEG*Q17KdQlK_|4gJ$4Jar zDl%4LuDN%oG)`Jf5)H;#Z5n@#d0ltx7~}*_fmM=H9ENFP?>l6-0}NyhVf-HsJS;Vq zb}@=X1xme;Sw7=^;M%XJm9#%p+oeWrDfuh(>(FwyEZZN6SR}qBk`_`TUBV9%txJBG zd(#}z5*YMr`y5@CH?~|W8lH{J#=Y6e-nDk=nf18VB56$}bu(tfYbZ&#k`n%m{emZr zWbc=H61dNORMxt712>^1+wbc));iEwz2ivP%#0m-(ECeKA=i|4@+ck5g*wQY{SrZwf41xRIq%wiTY!Y#vW$eNk{~a*C2qk-=Kd z424;#dR}`S?z$E8di7E4;GotuPZa;wmp_l#mFpZaoO)J%+_bJRABq=Z)EMQJfTe+5 z`bN=EDoahXB*f;7(5HkDcd&GuICOO;wK0wTG6gjq6Io$P5s7K>csQ$vRk$fy%-<;e zWHc}Ifci0MbycVRdU2)i+7Ru{HBr`#gczgj3TytJ|_J@B;;oj1-s0Ac$RB)^EL zY3;wipVFdR(Q%#6HuuDE`=#pMlh;n#7djvj+a=P~IRz2M+t9Z9n|OjPo&!4r^wc|T zD?8688#rc-sBT#+CMK4I>og!cb>CZB!R%?@?=sk2k?sXWm!VwtUr$D{UKowBFlzN9 zi|xOCj9(<=pUF4bVE0GYZoqH<(mH~ZoZ2ku)e|55;$C@jdp)?4^7vABrv&TTFZb&n zws;HTSv{mo(K3Z$>yXGze$Twqb(eIf#xl+I!o)q&LkgwW_V%ObAR_@oek+qJOu$Z! zWUs-j==oF-195)S#WJUENu!>(KDW)fUYnzCW4kKtoE2=Du-ji|^4&99?7Ne<&{XyY z?QmX?D0}0Z(tGmV<(I;4Za6vm;P3+(t?kv?sz+5?0UzLGbYVxltB%Nwy6Vx#$SP!-T2q1eY2={C1dH=g5F8 zWFi^8OyMowM&u@y=2?X#g6bvm>T=F=M<0cc{^0tdy9Ai%^0CHB*Yc;S==AEtp^Hpa zVHmRh&YCEEQ>ueh!&1E!c8}X8Yc1zL@HgfyM(HzYFdm%OjpX`n^Hk-GOtjM*Y)TL) zMeyL5qEIe7&E(gG>7O+hyC?jQ6@sF&Dv**ZlwlGC(vcM-UG2i(fsK%1gKFg%2fPp5 zX!V1l%~sx_s;EzzSx%0lN@vfMxQx@k5L%b}qjZ;Teu<1)--H#9Y&|%#AT*z>ASd`5lxd3E#YNNS(Kmth?ll4=zDd6AounUKeTv)xGLBIt1lM+jO=2$3%fWAr*4Fk z?w>c%f1NTh$|Gp0OUSu_Rz)`z6oz5RL8AJ75%Z&#Oet;O!(8m0+-~l3> zI-3^p80S%XFXCiv>wub#ZM%$NJ0KyoHJYk=E!^#@X2Ix2v4gcKIVJ;1=zMal0O8bT zOb3lX$6ITI>AVas+C1&l05ee<8z20=!8s>o*xn@NDiy`&Oj=&{5^5Shvyxw9sGT4p zm0#?4GudE;Br4bfT7s-4iXFhJ!Dtn$oaD`X6yCVs=3bPZoJ_m*XqWp(ESDWHuC~$P z3Eyl8_Gm41bm`}ywB+arE>{1nLt!Q9yu_0WrRAC4w+!ZK5j%Z?t*?{&Nno*8qrF%* z0WJ-ePY*8PO6`F!O_LL;a@R`P8T`-Xqu=wTNjDh5zI4l#++L4Ycc1@j;0gJjN{gy8 z*?A~sOFXQN!g)eb>CsaZK$(+aRzXAKSb+w;}lks{s8~#d0h> zt3GV4QaQ}cwv!|>)LCFGt0(J79lTHFk5@S6vh7GK#F5&0n7?Do`Y1@f*{I~_NRa-y zB=GI*mH#NXW{aT8Iyu*i7WGa(9pC%b8~%Sze*KpL|C0#Zx2@3325sQkSg;{o?-GX^ z9W9pquzC3)y*GsRpZl6mOoMpnDVGQ$=g*^eX0#45;B}A+gOKoyj%9xD~X4#kSb3 zLaj_>ul!J^y*&E)h+ORu3Aw$Fl*Oq(vto|Jo(Az{XDQ<-{}!EU?tRJ^n(u0v?eb<_ zz0J{!rmDgw@?u4fC*jz7UTM~W{t1t@>kEj?^&{Qh*cRd@X@3bIhTGAOx&gZ)C4S!5 z1O~gRO<&Lz1r|Sbv&iEuYE2_zukKts&VDX2j|{`W8slPNkUqWQ?hLsaxqy%1R3BS- zpMwjlh?%VsnX3`^9eQeXn?54r?=m|hi+pTKH8 zVi$sQ<7U~f`OG2B+R)RHW6UcLdOoe>kK_RYfx zc51heT@S+Cw8D5c#6n1O9>*+-01-xf;+r^f8`SsJ83dIppLI}X*lMikqX3E^Tq6D@ zcD?4k?Qpjl+v*GzquqHg0ry?-74N_3pIgw01zB%$HS42keU|i8b?y~(A-V$$^ zxH;^5YZ|MjYT|q9c7%Ym&>b$4nTnG{qXr85w`&atg9pVP%>slYHbi^VhmKdtBRtnG zH_CV(%(N5}sp!)i{A57(t7#}<9@u(U|8!Mk8X)ph!=vp&AsxT!cWH)wZ!@h5bDN9% z1rpB5N+fdLy3{80nfCcB=95RjD@nBzjxYDPT?9DJM2h-sJ7tyU;51fkWqEs309i@3 zV=3FSc=zeNoaAoZD%5=PS#?IFcm{7vjL?+<5NL&)r`iGBTGIb(STX=O-&y zvhi7nT}iyM`8Df%gk*$!t1o`Cv96|UWdBI!Br}z^!k^imSzPG);7SxV6_>9z%|BFJwSnzH z=`XZu5bDsTP|CL_9w&!cbUE*5dTbx)x=O@Ii%v%@mqwLjpyk_M&$`$=%{7Y)bCW%L zJa6^T<-#O-xYr5Dy{W@KZcIuU8N{AJF?T3F%iNjANSIl_@vGx03(~cmYk8_{1Co&C z;+WupGA=hece;TWJPMa7&f3@0E=udCA?4|91QJpOTi8ZU}if6S8DOxYx_CeficJ z^caNx@*lg?r+63LR)COQiOmo4&FzkruRv|qovx&%l^YaYO+G}}(0dc@LC3G@A(ZvT zo&`QEZLP{l6+~gS0Rgrt%hcuDp=rdhA;JS70O6|+2)#u;^QMXi=>e=>Qbz|h*Vj8K z>}OHsz2Kl~4WnrsWK5xd42o6myKmIkNos=ToFaMxDDomtcxY)$VWs1IRQ=RkrW!@S8j#E zizmwM@OS~n%phcJ+B#TCd*dBYDew6Y^*7=;Sr6$mPofw(+03W7(@-Z};qKM8+rZkW zpI$k!N?0?-a#OJ`jxoe``aIhb&XYEDGm%Va`ui2F7hgi2wCGHsK1hwmKXY{ck|39U zT-2Tej5~5LDf)!;+kdaKxsTdYu!5zILdy<{#U}ihRxo^QdkyM0I%+v&^GSS+h|}z< z&;$(h`vnDh_~4Svg5Ze%0=F25t$uiq=yzp+e#dAslm1+&&$d5ZLAfszyPSWiwra zt`|t4cU7Fcz`xjwp(Ia@gWNG?P_plMU+Z^~wAKP7VZ?4U*-C6~)5U%td)?9zsCqpJ zGh0~rieeJGPrW}JWc*9wOkVJ-*P}c12I`k7IL_pxVt-&qTq$MRsto@%*u==J%39*7 zxruTV_2t|JURpt?&yntbUlRNs75&2Sw<6JyaP0s5=enJpGvnvU5XQY+`G*A5HsxG% z{TAg@BR#`kO5K`k3J6ujTgp^u5C`L9gc|A83`>#$wE+5woXBk!bI*;Q@K=B}d zN{=_6KxQ0{rBgb-RySFic}q+J9U{oOeMG8H+V z{vS`)>lZ;^oiOk6+)3#>()LO%!LzB>h;TP8nu^yy8pTb)cEQm~$!~yylc;;M1u-kq z!1WT`;vnyl=gSr2^~RUwN6(bII?B3BU%qs=mC9pir~>h?UuDf}P`NI(TIm5UJ6FpTgzuNtM820eLFsk`O^Ih3dQ(B!kbF*CnR5y$#Mqx`TX=>DgSoY zy+Oy-hWWg_#9VOyDb5_fUZ^(tF9o$yTtx&@yvobD6^ogef*qv5*Zz1{Q|Vmx;qJK^ z!`PhDS3=*-X%KbmEmydOF5k2NQ0%tX5?x(1l4QmZ*+%nP+uZg}wNdMy9aHB05eyi< z9ulc8#6yCJG4=&+kGhAamA2D+UO1jl-##Tke8|-rz3X5|kN$`KVh5-b|rv zqtWjnlzo*VO+QC5kLvuWSE{wrq5I;)VEg^}B5z8PynZXahhB=<+g`ert5;`0jwCpQ zkv86j`C=}sxmdA1+g1#u%~>6?w>ny3@5cv$7Jc4-LLWp$b=nc8auS`~1thO~2P92e zW5HT0axhG4c}k`n?uOd)_Em~7ncY32V?vyYhrKE)%e$k!2INu9y$XAY(xEW3k|y(- zJU0H*Y>rUa-o;+$iyYM~6bFmr- z<+HD|+1-SIOE7lxVZvUG$H9OA=?*!D$%Y?T^@@?I7|7zWw#(z0p>(BsHN&XnXis_I zK}?e`s&w)-qU~^c>OO*REbFOnlH>5IFO-3C%90jc-v}s_;C0oeP>tqmipkwhAxib_ zRQ@}c9<1ZlwzY7W?r73IIHco-a!3(Ety2r`CSAA{=hQGfcZOy5&_FR6qjg~P_ zvzCj0%Z=g1WOxiq=8Px6B&_!c+B~9hY=^1aggv20?m)%(NA?8A4t;x;kVKAHBM$fn zHL(;5ffZyRb79ljNij2uW{vVYB;$^_fc(|eDaRD2{HQb{OA1Zp2z7ROAGb45*|baH zn8eT9zgPD)0htx-E`F&Vfwb4YpqO!xjV-p;)+;L0&d{h82ISC92}DN^c-Q+`^3!?D zq?s?&Ls#OQ!4~43CJh4L*mq*K_YgDCT}Be|e)gOR@hntDbg%uR1TEe)lWnk%|C!>? zU?s?9{JDmZ>TatEQq^Za{xg*J8N%O^V0=^QMYG*AK%jRaGJ*)^H@5AW7P`c~+yuI2 zkeFs#b*4kSz1=NMA~4>Bi66%)_xw!!4*{1jw+bg!w);3GEoIccpn#A+X0I^iH#a^PQ$FkGJ*SXuZ@c$C0j|H#;8?wt1$GkdkP3`GR?Wgz<3+;RUm{;R zkEWOqZ(Z%D@MgcxeYg7*Z*I$7bu26H)i`S!ue@Ih^S*&rv<5RO0lKDkE@c-gUA7h| zh`Q-ARFc1)5U%ag#_ryUWlBe)!nD})^>1uR-|;|D|Ip!}t81#$ zVl08-O6hD?zCt{v8Nf-W@ybj@a9Z7X2`oo!BmOIm3+H;LZf|0CEcnk=r7{fV2 ze!l!#p`r5AyDU_Sgu>kbyrAYuNf`05ySA+WT|Gcit%8vhwVA96|ueb*&uy}qGK!Jhof6K0F z#JltBE)TNy{D^)rNKz>~QR^(W# z?Nd8=_cEp7Sh@W`@bvuFV_*INdkU)W8yIF?mbyQGy!ac@L$VP12u^THr2R*!kc&kD zzmT7Btj;As`DDB3OmyjwrfS2OVkDW7Hyy=_#9SHAzro7!0wKUVKtoXkX@2uW7e742 znKR~F0oLVdt-S^fpP`6C^}q+%Un{$##dg?gcB5$(rat|~lTf{2;y82Ht908!ih3(U z;vqqSkLU@-nAR)q>dc&1ps0IyNXV62hRX!bE)!7h44gb&cSs}6bH+w2H2G`A90Nq# zr3`Fue!n&E(Q9=_kMudZTK}+Z!^$Szm z-oe?9rMc&n$5CgNSDO)xt#tv*UBKYH;*%3eh`yzh8JrjHSIOdJui5F&HlCZaF^Oo) z!LJakm3sM=aL0AhJHsd73hy$wq-MWf!Al4~GV26v`kpYoiPBvB%i66PEmMSl8kkQ^ z1+vb$R4Hze*;?=UxWz`R4cCTFvfI*Ir?C@F(jt!3D=gPDrT@k>Bf~|*hS_QzCPk5> z0nzD3IaF}hrfoMhJ`hy}_d)$6NLlh*F7~yo_u*6^_2K&upYh~Y%@zg6e{1o8%%##M z#VnD0J)aZ9*hv_>tH}&fH63t0ZX|N-2-g7Su;iU%I8}~^*L1ioV81FHsOqk-AH}l z*>2rM5kuAA`u4SZM4cZqGX8*2!B2Y5W;_Bn3N# z?F;+~;!j@qvn8ESFg#TnkiSXC*oAxXOS8ou$PWtF>Kl2_5*;?}s0F-%nR-#G*b*JS zCCvh+*X>u&Wq*23woxZ`4Z4-?_p+{@Kz{7U;3I2zUie=m0w3}h+O(C+N5NMa+~yyG zNxmFT95G&6)5e%Wj*?ZK_pD80-P##+ZVB1XHP`@oQ}-pIdB$FCEL5Qyd6*4fS%Xwf z8_I&?g5YYqynkg^+Cf5*sxX?rt11Rk3Do?L7O)%%y8WQSU_w5>SNhvqtutDA4V;(m z?~or4SJ`w4j#l|5^mwspc+LDVGlTJo??sy}y6v(N&>!%-camF}ep6V~s#0ZC__tG{ zCBV-w%nh#7`#HLuTlq^I<3F;3?|IR_nP`*5|%Jz^R*$kHk-l^0(8;0+aDu2dNYBd3xAIr zbuW(1bjoOoA4(8n7_KM%jl+$r05dF1puk}w*5>hyd{EHtQL%5Mxa8YDhaX$rg>EuW zhgJW&i$8d51agN1U&k94aqYVyZ5~Nc$#LDF@P_G|k&>V=#)+rpyPBqX-*8VmM(&Lh zZ)mk!6I7(%@t=dWSg~lNmZFEbEIJae4Gk#BoH_9}nD@F&RJi~HMT^BxN3(GrRM=U( z3HZUReHWGS`~avgfwd9Nn{=O!Skj+kvuy{ zXH_$k|F;txdP<^TFt>?b=qVKbjfwXA694}%uSQXTuM@2s{y^++Nc8^2tKoVE&EaPq zx$Sp;zxRLfYyaX2{t{rc)X>q=&($P^jr@B-=)Ww-{pW@JZv*_F7xJGM^53t<|ICp8 z%#i=g5GWY<-{!%;J`Vq%EtwbZxK3$8D$Nc+WL z(n0F0-B9dAZK(xF0RFoL0*boILq(V9%855jrJVpj&>S=%`IgC$Xy4DiR9F#5dnd3Q zBMQQj^mB3zJkXeu9MbZss{;tV9d%OONU-kseVbEai&(13$9nidEqXG3 zV(Ohs33+|Tnl}64g=A2OWslyO8TK0VMlI(Q33G7(2*8tsZtx)jGp;kX2Mczf z{lEsK`&e;33cgnL;sJXX%N@i0xFplnbf>|JRpkBx=zr`6LiY?&A*c<=74*%KJ{18a zwle4`t}h{_Jm?U+jCieVR|Gl+eqZAn3qgn6loU4mb2nem`H=Lx#mHAxS^4geOZ@K( zoh!&ZuKX#ABp>RgzWgP3a=Qbk#3U8v2u{-RuwFKXRRBe6pa8z~ZpN&^E zskuy-?cZC3g+EzYQw3&@&Ygg3!^h%4oC6?0bJRFcl)xc32bZ2c4RD*%j`oaWQ1{MS z8lh;e7&@%c;>(-&*!U{6$PwHNWfX0|bEq_a@q!vNph&g%d+8<+8md)+%;S==zGKJT zsfPuHYUFllNlulqX=}oxw>Rp1$efBA_Io!$_0q)^^ad}3q_<$jhytq_c!@eypiyuf z_e;6Rq3@Z~NIRv!aJPwX!uL$5^>7oUt%pXNLH^kS`t)+3rt1g)C zfiU}{CXd+r(%1! z`gCwdY5mH!v*TG5yLWVCydM5^PGj-aDQPoG=0no$?Ax}$x|}!c4^IRBYUKJvlJ^v@ zZg$GI`x>{s5#h{uIrFQG1bYuJ^9;^JhmOt^=1~Ep4eyC1#?PU?E7d?2vYW{!R4s6- z`Q|NK`?NdlO_n}gNB+6ZJ!hSQOJ?8}$;ux3wAGigWHT-#@4QSO5oh>&b_mkHiH3jl zO0`k5x#=@jIVh`}^mI!-bN`*!uEF>}2VF<*g`aWdgc7Vp0)-7&KWwHDP<8B~VrQAYS1ds-S(|mGMC4Km+F` zp#90<``K#et*LAb*q#Ua=qq*%?QOq|kl&H``6lmOmAN#NtJZm?!blg@qgk8}YfYQ8 z(vNEi(WHzX%Nl(QbNkkQ@ElnGFyO{JJRc=aBt@M=x{v$AU%+~gJF;HMfrr$q$A<(w}0DE-y6@0yPQz0;ASEkB_}+2C^$ zovtK+WEdx=2z#^GTFTtxB0|9fqq?A_Wo$d#W4lzH9g`ecKy?|PFnim*m$RL z(?87%074Bz>DXGoiTi8s{$Jvp7h(}now&Vf=U3$FyHw(%sMxT zw_yZdl*$^N_Z2!dZ0PUY+Z8F2m@c%B66VZYYUtyrj}jKbcD_(PF(Z9h^3EMBua)~~ zL{Hk`uSBi5Wh@_8g>`Y?s-o*mLTB0QwOYRnKtbmgrvno8HRVl;j$~?h#h`0){HNV9 zOY8Dzb#h@z__|)`_?7+-Zbyckf?KIV)L0y#`a-*^$NX6!mcVJ3?uvvXI$S>7(>j1&KKPt%BFFh ziMPJ`+Ns5O*8|a(y>x!wkMXlE=h-JV+1SyZnVsC;-Y~IFZhPg?M^A-cjy6QcpYxv` z2X<8>Ow>$P+-DkUJX`HyE$3$;ffeIg#cyG>rbpv$`j=I~uxTWxuYQ5kS#&qG zb_METRkOYVO$6?XNnD@caaS#r}WXPy0EOJ*7bT@xZ8z*19* z8$u&zz`p52QuyBN@l>ydHots8&p%lJW(k|ML*91B;Ny9*l zWbGhwAt2?Pjq*mqHPf}(Br)|Y?u%uiA$Jv$nDV|pB#=<`L!p%j>B)QuT(cY=BgAy= zp9LWouTc}*G_2XQ8T__!jdCfX*=Qn$JKeqQ~ihi|63YLlGH)Q2}N@3jKqsZHe&f)jbec%85{;tQvI@kMJuh;V$JDP5X zL@zU=ljzP@#`ZH0D5m9v};I${MZ05Gsfha*MK76BdP zB4!&bceKt!0mer{Ps@U%joHy!d*!60kD8VeVASqxv)Lj{o1>%Zw~owkrHk?Fz@Rua ziNoQt{NoD1w9N24zTMLI01fGv+|=Wc`PIEH)R6HdTbI;+CBfr3xG?xv4QTR0^cj4aW?)A zw(k@3c9_6>Z%-<$`VE_yIRJYg*Moo~^R*2iO*8PE1rNbp@P;^lslDBr=CqK$jmFZm zFgwS84%Eg)70Dylu-TK{sSB8m?upy*>j6|0g=5>}X~s>lfpN1m0bb&{c?{6AI8?F| zdx0l)=ifrWwr)Crh{FhAvwClf)fQ$1CA~4ELqv*K%wDQR8r^ZXgd?;PrGy}9J&Y+^tjz5#(GN~3-15mO_p^eZ8! zuLRw3Wejjo?XNf+`aiMQ7))0vUe~8Nc`@$?e(*Y4R#+6UIW?A{U8wU(Sgs!90YK_F zK62y~zGN>qug2ZE2~>Eh?hQyt`Tc63UcOYCANQ`!n4bIhCDIjuMfj#%mTYC++5KN{ z@L((-Aj0uQuxd*zGMJowCrLI*#h86!NW&28>=nj!2mM2B=<|vpo$bu0)SH&hISHF& zxeM^}(<)19gYSQ?16;W;Az3Fj$-?ICzb53AF2mF7IdyR!(&w>(3fzNzY_mtc(-FGJ zJ}Y8a>ev>TK6QHV%Qyd%Vz&m9u45SB zThG1(07r{TKmO0?O|3+osCx`6W?EDN7|v-I}w-`OfT+89wAFHQd5 z*Y$E*u$zmxZJ8orXD`U$S2WJ(A3K6+F3z@=$;C+QOuj608LaL|{*dmrFG*5f;#k7; z+V^Cyjt-bvy2!%l05s?$*077i(|MoEOM0_@*wk_!agHQgl+fS&$ML=9>M`c;gN@o; zv=qsjfDXZt_3GCBEvS*xyWN4##7b z7czFv5RYy39$6cGC(gH#qA(X#LaDDuzjXKOJZA|X_Fl7SZBe=BXRnfmIj2&qmpZf7 z>Kylay%|dtoF99ZKIvt(;)xlp3Od!AyVWki&;ajOB={#$1re7KpnIynl;2kIZ?m>I zSf5FY+W8^F_)!Ml2|#z%jyk#?6Y4M?X=EP>51xK&#c78??wD^J`RLOrJLm%82L{XfTvyZoKa+r;;D9@?KIu~j}S`Vm)S|b&34hCvJ0m*`?7PD0_2#KlPpx< z^8PT_=&v`^;3OB=QD?hd&^YAVxqr>&*!{76CZ(G+6!VroPxIz!xdUm6{fbudqlv$j zp*RbV0Y^me2k)_prBrD`Cp*YLc%|-Y@3)y9&GGle*7Gs^dkUy}RsXal$hKjTONUD~ z)cQ5{O6feZE^GA@1&tlnq^y@qy7&(M6IMt$u`|czqCvFd?_7YFlys7&a%tuVBDqrT zYb<9-_$Mj#bi?N;t?*bbQgPgB|&xFHvo69f>q)T4JeHE@L?6DNy){oP!_Y7YuXTn0=@oL4Amc>!jF~e;$86 zVfZi|WH$P@Y%13pvClN-F;3f4o2&-WTxcjLlPg3rUo3an{u^V|Ok7ko*_Q1?+NkA0 zL#)yk;8DMo;z+#BK;?**$l@%W!$LjBK7ED;L8oowN;h0=A6o-| z48!%N+Su5zsMaa-?lNKp+_lkJcqcP$&LO#t zT2OG^pqVjImZTSA{A@RYDgxu;!eN6mC3m2={)CJZ zv9@vf_rY)h6G?Ma8NJ`l{E^fd)NFf0aSguS3*Y`F0(0%Ub%z%VcKhl=;=2=8sWD|# zN1I45OMF2|k#}~s_!_iG-nsKoua}cV{Mtr%ZMBUEnxNX@4pO#jQY2q5i|qGj=)A#E zGr_vaVlqyULfX3@8u3gr23wjLNd;A&GX0LOKQmxLPbJ*y)uXr(8b!K0auToe*R8z; zq&5^4@@1{JYq4v5uF6(qWSo?Dp^7wt=!9cn12aC*1OXl2?Vc&g*^!dz#>6P5P&kL~$lWfgPuBBGe zq6zObt_4nIlL_M2| z)x19wTFZ@rfSDCEA1*&x;1?P=Ys=g&qgk>QXVVh~^RnBD(7Eu;peJtHxcb|0bp~@H zX9V<);*N@N3HX$Adf^!em-qHUwCiDuch&j-oU z$XCKZtF#iEAryUK;_BT$@`$t@q$(&WW9Z4jlEP9}QEb`b&-d?rdrE&^F*UOJ=xKbf z9{wq$vJF}NbHJ4S)cSI17aOBga`5q}1C}^KkzmMv0FP(ba$p8{?#B@R>af?p{0o$= z6C(i#agr0g;}e$4o6vEr1Y^iLB6|u?hMFgBwgZS!rN1dq+Jf(NFmH>Fmxw?003#`3gxr6x6vIc(RgZEoae-|9 zQl61exWo4J;q~-zZ45Q*V8Ea${io-BcJFK*u#?AlLbW!G9dlQ5pA-KL=JTx??RR%9 zLHyjKT~IX`b@e=L-_3S37NMne?RVYz^&JaYT_o$b~P zU?O~qxwcaM*BW}1emrQ~a7p^-+4jeJ$@$}>S0!3g#k|j3e_Z^CSha4fo8~M)>}AKH z|InqOCtbun29R||6P2|3uG&kGnnlDl3P`~$u=721mL^YvrPNSm1mL~ycFS3qPTPI@ z<6`!Hwmtf{xQA;2Q4Xt76bJ2x{>^_a@nO;8h)TK5zm6C#4sw4Q@BxN4#&A=3Y$)sZ z+L$E4vD3?o{>p_FS|b{qn;yoJsGe*PW1}RdS?p7 zDh~h__lB5d&{z0#qcwTM>rF+=A4M$fH#L;^PYjv_WN4t!i?3DYKB_|s-nnr875@sn zYVL%sc44LGNgilY0NUm7+-vwE(t1Avy1fWpK&B5(b-o;DhKIiUao{$XLmX>&3yw0DMTmR7^8=khD8NB;PH7;;J0CmTQ zA&$=~YaFlAwImrkK?0i-`@OI&EzdnyWpYou*oPeG5R8Q}yp6}0*vaEKvt}(lRZXx# zQ)z?hsDqxQ*kV04CGH6!5=snda&8_msxp{jjH!T=%tx2PW^cep6n=%_v>~zH6iEHd zkFa~WV9y<}=in~m&f09<<_)hP!v(&PLmudenRJjmnc<)Gzi*jEPgu#(O6y%+_Jq@WlmW@uEGtGED&~6#!Lwj3l zv(GHDtGvlxt*~TIhYgu$GIJRoe_U8ac$%yInBd*Ue!Nk)vTtUVVmq{ z8%3Mym+r`#kMi(iU-|+#E=e?VU5If;n^6;3ZClFrTGyK2gf=7WXC3#U&k+ANL zmJY+l+7CSPG2KNso+y9#DR|ifRVD?u_0-P^);YxzG<5-KcJ9%&HVFpF6&0A(&*kPh|PBN7kf8!*xPFt2j7u61OU%Tz5j|{sX$1cfhiYUTS5Ptu3oRHZ-d< zOn@=^` z1^Lx3d8K8hNAO9UYPc+Vugl~G`kmi}n90bdgo9nO2@o3F*y&&O$9bf3v6}SI(@0!C zxt5fS&nc#!@qQ#|bFNY3UB|4;N+My(4HY^h2D$Awt$w`FeTZ~v8AQR9-l7aSNKVB# zWF=o)7QZcXDl7}vSn~yP*7;P6A7o@CyrQp`D-8EcFP2d)Fw<^3`o5>Ey*@ddQ*!Bp z&`RX1tmYyiZ<=CDxy(ZM-c<4E-GL2+RS{BMSo)Bh%WnHfq)&C8Cq{JI!`PL##oGGl zdAcpJPq)>NAb*jco+wQS!?86rW7ZinUdmwSi=1#A@x+J(@%|(_q)ZvD>To&!0Og?~ z>}itII>5v6qJUnxoIy*kd7L8|=)_?6&6$+yNbS$Cq|E{li9wsoW*xHoxsQ3Y%w!NJ z5A&5sU0!WiM^{L%T1rjZ4wfb-$p7I^GZ`xke*nH3BjHS$6<*x}Y`r%=>tmwh)iIB@ zKromcTmRc>hjw9ew zB>e=_`@-mrU;Ny5dc!$9>ij2FZSJ7A&HcYw`=QyQo(pNYg*|muO#K~`BkvC)`9YP} zP)ql$cbsOnT#jiaGszMT+qVSh)OsEKchA7JS)9U;FTS6XO6==H9C)6DO12Hw5@ywt zfz8%P<2fryfC7{0dbM`=6ZEA>dbRVqKgQhbG~Z6Z9|~|vdr{(F<7k!VO3x*H7ee%L zs^mxRwgiaSxIZmER*q2KjUEL4r-})Q$6<7Y+j4`1WB$R?M4))!?VdNj=I@+zv|2GH zG!XA49{lSQojg3@gR1_Auh%=XeDH2=wXfkW)+9Zg3QI;(E(K#<#XYDGZgE$Zyee*^ zV1J&6#jjvBhx$4cYKJX^YDKj2%&*IX$N_$&5i`1%1?99;DQO3-y|^X4lJ1)V3ww(1 z_!sE=S{By) z3Tf$FZ$|L}l?_E3Uwn~8!?s~u3Epbc|D=do(LcTj%&?-&uk?G1R4w3PaF5#a>{vD{ z(ML5OTwu1+Q<=YFD)yq!7#8W~jERlsOM0o?mcT#U>VE#h^X*>mK5H%dlif^&PxaT; z*jwZir-&>Ej8m00wMM!0y$G?hR4Fn_KpTwk%JJ+ygW5DnGAA8GshqW?K52|bK=6n)Y3Mr5~*mE7GLU4?&!GH$)@~#!0;)fwY!h` zE7U61GA+V4u6kkC*)wR!Nc*z0LgtIfm-G*}Y_48ECwwfF1*MMOHgPKZb%DjZZN^|Y zWfVb29kkN(K(=-i*WA%lT;9F=8KZp7)J<%QgwP7Q>zP~GgXMO^7Z%rI+a<5&2yeS8 zT8{GSTm)(+<*G5KYmWOUpVph_3xBWI?*Oa9F`O^x!d2*#u}>n0^M!^%IO)mGiOOQC zM`x@(6cHFVpc=c5k|-anF2=-1`$CjcPRJdF zoc+4)f$mD?&vSXKq5SKtD7pR;Lo4Cd@+@;_l3b#qJ( zVQ5q?!xJ`}X{a9OUrGQ-izS1N`@XF9&mWQ7knx!Jz+7?cmxlLjc!bZ#Ux>i(9M)c6 z3My&7y4`2RPdw`}Q+6kJzPmRnYnOmkQ2M_7;sXaM8-b(L!~5K2`mwp;w17EBU+vO} zcDx7t|BHNn)Fom0k+&HUh?l?aNMgVcQkGs)4!=J-+Nnf1a1B0<9&_G1FXS)xC#0T; z%}$99vJ4Z7=AD?+p=sq-(Z_~KEPF`RxgTjF>ut~QK*;B`+Q0Q4&m9O|nx0)L2Q9Zc ze|}90{@!D~Z(Q~QC>8)8vajVN)eg+4h^<);KG- zNioR$#*L!iqwnuDYyIH*m@1N`-G7*XxTU9ff6xQ%f>_UFnar4>9o7`fLZgNd3H*zb zHOm=IK@Fw4uEgyLcjcuogC)@aP71tX%D zt$TW7OLN605*WgUy+s^JH3kNR$+r6s(l(sBqvw4`_d`1i`_eTtmMNqdz{Zs|z*uDh z{YtmFdTQVeRmFx7E1Op+=1r*^UpffXTX%s~yJ6-Gj%2)2qRe`Lt0$*;Mbbu5Ozdra z+<1JD<$t_dZT81q&z?m-)Jt56eMdM@UWVj+%pH)5bb`Eo5=ewF`@YxLOBH0`uRLw+ zCv~037i!&zHIdehm3(8^*2*^{5}AJW>9zarlYL8TG-q+@p;AK*9J<*MW7Cv(@}YO453>6!~*2;DaEV>0tyyJvAs*CSPfiYyvG zPUiHE-LLU7!WqvbOxGH4r~DUX|Ng2i`j7o|k}XH)Sa!ygxhjk5!Nu;lOhx>Q8d@N-N8Gpa6VS&m(R`wC(w>Npg zg#WuQHlhY`l@CNc$e6VolvXhyeGm6Rr#mcfD&0WX{7)QrNA1eFS{ZyQY{VSPpU{n~ z9`X|UsnXXM+2V1lK)v=4EOxX~&bGl);(qD#YvE0lRU8C>)RfC=e zNV()Ux`>5R%0ga%0|)i{C1wRtBTM!AhjhU*krHWxoD*Udp_;@d6??rYb;$+GT;wWm zOtM__k|uhz@k_n5Ys_WZ4p6;cDxLV+qcHZ+Id#`|3$$rRjad|bYQNr3HdeaEwcq{{ z<~Y^wgm51iQC{F>Nxhgsi9dk*y5Ffb|Dr#UN=24>1EqZU;^Gsn$D1G2AqaDR#^|zl zkkIdTXEZr43iy|A=HDxQ=-Sbs4gjJuGleAR16qu2C*)`xA0%z=9rTo#ohTW#+k5BH zOtD>OVw-q+wS9g2qC#bA$@+y*tbdAulZVGD2c_DXf_QD|zT6kaVqqZT5 ztw>=i*Xf!p!I_+q`NG}PG~U69nYA;ixp!z{o>ju3CS@ODz${EEUdC+$%C3l3k`R0s zvfq{^%I^NS{tzgd%!kfcA~Q?476SP@>l;-lp1QG?K*u0oe^@4+<}otBTk5{&MY8!9 z$Vu$UT)@mM!hZ5z!JT~l5T{70lwf+s-_g6WD@wfu+Ba1ycFtYz8Gu#qkp^eZ$dk?X zYVEVyTh#M+>|n<>Fi}%KaXJfxI2%{{W9?lhdfXRAEHmtw-Y(*eqYP`P+^^?#f5`mI zZluO?zV4y#MRU-N7tKEkLGFi`*u_0KY;Sn8-dyHzk%uLcUE}lwS6O9V$pM#f3wcyd z+_S>qzH+v}^#hk3pH0W*EeGx}Ti=4h4Y^g36)w^4AK86WX#7cu;1P|clpeyqB^%T{cf;MBLv&HSEeD4M;V}-D$j;JqG2Z?;sEMSCrGSxeAHoSy6LI5eBtv-?9_yWr~~H$I*fHC`bA_`Z8D7eVgJ zcCL;j2-~yRZhAxxTQq!r4H-!HT}X-6vH9RU{V}nAz>DMP(@hi7bxA8c{N4pn9K68p zRlmVM&k92>WOxneP_10e|MHeDoJC;vk#BYR3a$^hV7TnfZoIF^8rsqV}RM zt2UG8ZK^@`h=3y6AoNEc*m@3UO#d_?4_okQm2u8w@nsBmyNy=8(C;mExNzS4np|f^ zB~9Nyyh)TyG6qKa@agPW>B-|;JWoDyr9OC+shv?S9^=jv`orqEA~KZDy3{wvon1_P@(o??NEW@DgC~ij4KZ6Gg!R zDNlVT(krx-aSlf**k2P7vkdPZs!3eQL-^+{2lH;BfSH`Lf6=Ah4Q zmirw5m&-lAzPaY$m$r9!%_X*7`LMFx9_3%_i&J^AM^W5799^)( zigM~8@nl>6LqeHfxB$?^)kG&c7(npWIJW%=Z?+B62VJ>xt*+uxkcRo?=??9=1c$@F zYmE0I$a6NXPqjqFIN~qz%rP=wM20UmU%eY4jkbScB&F9_{?1OSsMiOkwf)o%>p!5w z_f()Xrp%Vu{GlpfJVEBhoqI7pFU=eU@sc*3uoWqRySC*%D-V3R&V!)qGg38Wy972PrMo6&Djh&4|(#0fAsGyw9;LKfxc27A5I?LM|1at zrQXa*NpfBPRA! z^rW56z|0JBR;(eGRFyH5sJ|Pg{{q3-)%N#oaT$}=(%Zh8oVy`pSc#TtN$kvWMyyu^ z3G?x55Aeky5l?7bgA>(q1Jj&(PAGt7QyeuZ`pv++Nn(PRqS1In6uzvtnL_AIRt*Xq z&>vPW%!7Amg54&hC&Q*ABq1|Z>_V7_83k>U=MoY5Yn=WC`z6ANFF9Sr;KNsg__5)A zi|!ks`|=~P)4$v7msH!zDC8<2d=Dl@QjB7e9{5>8dVo5APxvErvqba6MY8$bN@!gD zW5;OOgz8*-WXQ2UJ>k-(L91tN^t+G?Z$|G`wPUN6g@I4_C;F2sC@X=DRP&$@hcaVF z1Sp@;qV1Vjxtd%3Yx>}F=-W4BcB7>m3wd41c4R;#$IOY8HyMy$HRrAI$}4*J)#L|BR4xb{Eo*;782h*PT4jZbj* z|6m~pTgeKNpUl&@K6>X3+jh$H5B}l;{=^l;K=+pvEpnPW9WbL_Kqx;v3gs*-p{0L4 zGJg%wBNy{8&Msh_4|!=d1n&qR>xm~|@7$jBkolUvs-9#wGIjHPubo{RXrF59`QMgo zml?3+pxo**oZNqqY(2uD@eiWr*mdc-;BwGMCFn@0K5NBH_kNpjpVH3n$H|M7PB6$he5XnNO!n zX)+u?^BA60Rx2|UOz&)Ry||>XshofI6>7SrLqBD7;YH&&6X18Gs)j&&CCi+n{!9mM z<{3B`UB*sSR<(Hcpp;g{FIN&O@ACz{2yFCSO{S0KWR&S&-*EV*dL4fyqg1wb2Nj^_ zW}GccS}z1>5bUTEpAu~up{1pwVh{5J+OYe{L**9Fy*-iQ)xWZ)NRlM=?q83g;;t9$ zPO`B)U%b-n^p5Gp1$ZTj!U}fTu*I0 zQmA<6^scCAxQWq{6<;`$H5zZah%Desyk`_@@(YLi>-scB7JGBVkF*`M1B-GXeM-!`jibRB`K?ZfQhJnN|0hBvCUR=<@72bH z;rWXo$pGJBmDEJd`-5ZOwef;>Y1eM?Sq3C8aS#}5Z?-fIC(hN>_YTMPH#QG7QF*E# zt{zo;G+s`G5)n_kefC6J{9awK$Rk>8!4lKe@pexhND5DkG&{DY=BBafO?_o#gb#4- zW-eZJ4Jmy`_??Bh^}yQBkDH{j5!H?@P*AqwtTE^E69TGQt_b|Z_Z~=+gXVL##-mB* z(4qd6?UmOK_AG9|)Qg^-{X4)zf}Na7yoXrA^1rbpq5Xbb{_L9qjM|$W=O)$Z)f>wB zIEplUu^to)e{Rwz+6N~RBbdBna)J7{#CaVMGiL?w0!q&`Nnrf_CPi`=P(ALH<{wx& zVN9KlYF~1#yR)t}sV!sE2|G?aSRj-DNpp)6Mv4;O%dgOqdITcZ4^c|$KXw-C15@-) zYHgkSxN7IY>KkjjM``so!z24evOv-Lzv&a4v7A!r?}Z3@22b;T2p-AC8=hq^#Y#6h zTTo+73yQAod(W3Qomr3#Y)Y)s+5f4-F_;uq?7p2J>(%}p$d*I(&BgD=!qiM_dVigu zMPh|RA%jLGV@OSm?@6{o@7}n_@lKV%Fe2;#ryXu+9t)ZM=h8e%KZTH0%VN_NAXf!<0 zRNRe6o*bGO0)|a&a|C{3DGW(hHh0wNp4r){)UlL3ybxD*e(ta_AK}RfgMYg0s-w<< z-uzBvW8k_DmV{=2Ic-3#hjpd0Ps_Vf*-8=JJ3PM9)0Ykd1&vHfl|^fb&mZY>n4zqw zf!)pE*92VlNX)T3co}uI*xuvILk`-Qm3-0A;9o7Ika62|a$I-p&qut>C-yty(1z;kGrlfakWjhHB-%t25?TW7 z@B$cs(bG^&xxWxbYRx2~COk~;Q#>z#%F!IV*R89Ag2uco`u){1KB7*PR@;vA|D_yQ z@z-)!lqF+vWbOND?;PDUv~skaRZ*O(yLD}WirW3n{SzlrF*rUVz~n9 z$D_pY&CO0fzm&!>l1L`=M8+$3Z(ATOQgD?(S7x>C{QQ+WFne~GR=REz~8GCS%+M?)WL`gHLVnab~( zh4IRu-9QEcAZy~PN1OmXQE7vdZQ#+>*>`oqTffWWf_e>sxnc_$j4#V*(>9mKZ@H;m zFl=i8RHQskb#933Qxc>DV6YS|%ccBOXN;(sV&c%EseS8>sLDr7Z%}f_lgbQ^8l8#n zbaUuYgjPBI(T1Wfki)T?if1PY#54Yy<_p)vm!_*FBH5V_(a{#ageHNDm{orm@B-F-W&4@_q`i|V@z;+PdIHICp=O0eN{s^p(_jee+6Pj`Gj?RFNj-UIUMEw)Camhqcv2h>%3ba!@SdX`)X zcL9|U{+8F*w4T2+N*WN;zJu}ysiuaDSm!|E(Qo11iXLsSFz(KZwGLRCY7u6_ zznY$T_O}$3^Z&`kP=;0QoreZ~`KS{XNX{w43NBc9=e7jAPjbZ_aCV$0)7)qGp?&ZC zrxcu%+t@gMv4xs(*L}_k?$xK8q=yg^!#h8XfaFIwQIQFjD33ogn+X3TX+}}S@0*+N zFOSRHwQ&+{6ZUpzbcFG-BC$!sG%{r=Oye zgMhv)F&8dw5XLBxg>8GD-E^`8|MYFjTzUS)@QZGhoGLC2z_D3c|Juh^@5}mlC@1%k zm}A-Q(oZ>CSZ<(p9p>~6p+9AE|E+q49o2j2uc}xTi7KYwiTA;^9EP~cyn!cgFiV|p z>$g@g9h#+{Bmb8#0X@yks}}{T6gilydV%2TtQ5>uR-&VJVqFPH(zI(Pa(tW(fWV>t z<8)gUkRS^>se2RKSbX!O|0G*0lfyt&@s5R)vGcZf(m}1>dF72C_e5?@mbe~vwk#>- zS}F!wNBS+P*3V4pvE=>tjgZAT1{?I%!`%u5B<128o%fzS%D04_sp+W?g4z2G(x(pq zV~o~2qhW1IZiaEJfO@;#NUsl;o~O^pE|hl$ZFM_(P!BAd~5|q;#EOh4BQ>-2%ms=4z|Ex z9$~HT_pnU5J^f-c**vLDQ!${=#?k0KCC~%vaPlX}YU0BA#&Rm4t^Txhs?Cu9#|>ek zm*^INqoABz8>-r?t@HW3rekvLRR&^Lln&TO@l4ALVTX}7@%^a_d+$yM9{c9TZEV)$ zEncX5W6)$Er`Vzv4itvx)Y2pigMb!w_Hk5j#(xh_q4#Ey&wJ2bz~pEYHg5UXa8J@p z^fb(=G2!jd36ODIhDP?^kDM8&ij+n!2i@XKw414R6g>aR{>(G^IhHPh{Wy3S|-x!T;o_pKu`IzQ^t6(E-$mF)PM;f`4G7aeAyd#u4 zE!*wgqF7hkg^6v}juC^BKJvh!o&6TOxcLmt@9|R-IlA85od3p`)EY6w_xY^uXU%nn z*6_jOdO$1snr;XeG{+t=nF;dcO?#u~tR z18pl~G~)_I96?Tym+7Y3O?XKEK6-Pir>g%i6$9%WZUd`{p_CH8FR{(~hbxQ;1W*VA zIk@V#*JMQ*y~Bkb(<8r0412)VaIT}A=@ZVtcv)F2p1C&q1?IOJp!WA~XS zvb1+q4_Z_Ee_7c^xvu#9vKJ8LFiH8MV)qTtXZUt-70{=iT!4EWri>=7X3;N)9y!RL ze!1`+kLC-GKk8F+TaNP~?;gZTa=YFQ)>|a7J_3+)QQJ=wmH`ody4^duYT@H4zXl8f z%wJErh=SK$8J2u&bBRC)Hg_rhfzc-E#CesHK z0FhN=X79pj2e+N~I+84&j+%}DA~54r$e~PGRJcsvAQRgP%PG2Tej6tD-A1RznRx=y z?ZvUbL`xpQ^!bPli>VxVS#3*Or+cy`9dVKT3nNk|qc(j*rRcP8j}z!kqX5W~6r9a$ zaYD>k?`9tu=cA6SrLBmJ-r6J^2}6EA)=8>%oCZqVX+n~ucP~+S9;=m;gO!Iu44RoW z(mk{-Z9NGn%A)m)ieg<=wJWan5gTJ~?d(5kvKo3-l!ud|+;#7qE_@5Q{|N>}`t(e* z>tLIE{7iGiUZ0D6lDT4DdOsF@rj0$Zdf5vweY2gIT+AMEf68s@ahpwpKk#85`h3rf zXl=Q(UY&a5_K0)L?So0O4` z?>*!@hDDpJ=X!4_KqQUamaarqgo=(lH6F-`tteYPYa80GODuACaZN>kg$Xb9n1+H( zBLj>U2=1F7or*#p1{GuSAf}c9aIbu$8065I0fWQ;&=IdyfXfwM9?vPv{bbLyOyC-M zopUh64$2bvIF^AMLtnDPq62%AUEb#yB9UVCmquc`4~*v*NCAWBka~kBe@OW}<~OgN zXSTqfRq*^A|FC9u_c?Xir)H0DRq8sx8YiTW_gWe+f2Z}5Wai!gJW}k|gN7YXM|`_| zi7KdjT-fgd;seg}9MC2xEAB%RVsG>-{5h$^gh-2Sh<}%KQ;>fR3LN;nf7bT|ppoat9jsQAU@POQ!41)tl}^MrlT|4ajH4H9VhM}r<(&9f0O`y6 zBKew|6Bzl;p`btZ56fc@8A_$L3H8<7oDKh)`T1}EY88vWui@jWv$0iys;YOfRpoyH zPQfYsA4%Wk0bnzoQsNIvjD?2#=yw8dS)qso&)U%9*hi16Ge&t3f>p};z1@3G4hOo; zb2++*)%rDpgC52AxgYjQ0!%h|PoqgK>>1|d&mPjklA}#y!V{^5*3$AS*7Qzr@Lrjg zS{tbxGrhR!0FF*M;Ky19U}&}O588OD*4%3!I(>%~2?(vQ)8tX*153bRu2&hzT3~Z+ zO>u}Y`!^;}CNcK{!rlzM{bPkI>Ft@e7u}W2&GUs+EUb1TQ|k1s!`knl){PWbk@rr$ z0{ueWH`7FwUJ9KKwYwnMPf=^LpY4;R=X7_c=Y;z}X2xmBUv8gMM-8POmXnUqSI-`X zTA=#u;}6RBd<;gl&dt0bXehVX18XiO51Pz3>&}*3mu`1Dr0UzSlj?NyJpniQ>xJ1a zmK9|UfYkIm#ev7(vH8)7;-TY_bu}*Cub7tC#trX2tWa|INU)w&;@~)GAG9r5{WVc= zhI$f0bbR!!iK>I(1w4^)z&+VgsKnZApxkduzFuy&N3&YEeSCG0<2M`!Wb8*&$e9w6 z20q{zJvuI+wbtO^r}TB}@l@Een4Tz)ap4{Z58Ad*kEjRmyC^MtHh)GA4GxS8ji2X3 z*08Y0O3~kY-V99dJHcKGj7)tgW>EYMT~(zl&gSQj7dllJ!`V5S1syDc5yY)b2}%&1v^TOABb$`Npw9N&16rTeMGC>j|!T?}bfc3R;c!FC=j!-&kL zU3Se5&0`;=bUwB8(8=h0b`IAGZzS3bQ-xu=~AH*5kj#eapmx&73CQL705A@85tyE7GbE&8_ zY894q7a#;a>fTJQPh=+>yD=)t_wiN5X!K&waOd(m_o4C3I8Hv@-HT%z<^{jgqkk;P)fG+~rS^-k$QDDwiy($ur;z`g7 zTC>M2EjBpJ27WO;^uM&#UNUPpfes0vRo{ape6H6}rpxnjk_U4zZ-zs{n}MKIj}>x@ zlV^DHO%78~L$Yxa@M4#=<$5Pu-}}+U@eT=qs(`k^@Po{=$6*^MpzGA7UrFK=a@Yw( zhZzV2R_ItRBartWChd=VPc{R>`^0r&EcN5CRDOLXc2E8R?+T|WHG*Ig?MpuGwwLC- zT#k|M(olY>zhmg(D{W({W62l`ieFT{o5{C#Ys145Voj-oGy>{3!13cjx_JbmN$z3p zr7umgt5~*8k3Tl|qA(4Y`GanTok3m%D1@Cqauue_|4Y3Xe*3^>LXj1cm>a{bA0lUo zRTSRFj0-)2l~Q1)j_KPbAgJ3zM{&M`?&B2O#+3-6i&&MPVFhkHXWOM16s!Z!lzYFN``>^>@}rk6kL zAV&qtydZByovBgHJnrIYvwy==*x$95XPSw3lz9;eY5x9e=GlYfFKwe_8ZaFfW0u#u zYQNl&v5{SE3mf)^yCibD=l|$tWjW&=qI8mcqXXMwcl<)!7{nFRt5bEkCG&TJ4%j?tL-~3;7hpSrDF2A9$T_KXUl+(0)2xu2pzA(Tm>e@1p*KS%) zpUaI}ahaDc0%!JFKx>VKTTcj56k(T`CSga3hcz*0|b6F1ABef2ZSzgolhnbU;_hg|QGpe~rwbc;g3`G>s$fpAB(! z5JeP6JXaU6xc6FAAmmx}NB8*amF5p_JYek87kx=qiz@3WGz{f81G0`?{L(qQRYyUr z7nR=jTCFRA4A;ybr@gBhUToHHqTWCH;0BWI7F0QhmJ<9_S#Yj!X?8zZI->Qm4fIzK zJAZfm=^>+OQ3Q9ZZd&3&W;3!}b#E#-_oj8ule&W4Aqn`yJOtE`8Xp1z$-Mkfr|6 z9r!G^&$b0x4*IqxIjh=uJzHVBjY)M~qZxXS5M~I)Cf9>z(R{?jtQ4yE)t9ll%gv z)9bmjnQhoNhkg`;RF_Bya*@(KS&L`c~j6v!3G#$IMqY zzj&f$j@zJL2a<=W5wR$TM2o+6Nb!Y48$aj8gg-{V>nt_{J^mh{cYlM^EaUsY-m?p7 zZ}5&0UR>i~=_m2+V%%=PEKBmh5iqHr_xx_qE$P|-Py#xAnOJLpOT?V3)pMV9>r)(4 z^Cf2hg8Ukw;HjOdGrKnl;2{W{xxbmle~8gE(}6E?bQ1eDa28SHr(~A(o8~36xmDJO ze|bH7r$@}~S%JgOTf?HS^K9uF9l(OjRTCy)>n=4#XFq<@u!TeP-WO_g#e1izq$8*P zxM}`g9kvM_;SYH27w8M`wou$B*Ruz{JV)qv>%f8+o7i1`z5#AMT8VQ{v3h(_9UrDj z{`~tutoU(T^bSW?UXPeB0pLn$!pJ<9C)bB`z|bueDa3V`$J}i`!R&Pf7V7WtxcT`a zGYeIdWl`jL!u08;4^E6f|NRb1BTi)i!=WJodwR#p*YC6Y=`yrFyG72~zm?8|kus7M zy)tgUZ!bnAy4>1piU^1S-JG@MwD&_{gzYmGG&}%Dx2d#*!{aQE$L@EZ{5sunqa)|C znU(a|LNrGy_c3au#C&uxa-SBWR`W{`ITU5BYHnul$KcggxW;Jb`OCQ(0NiIVthVHo zF?iMt^YI33_XX-T8?Z~9@X(lu1IwhwB(25R%^UA&?Kc)VzY?|hC@*KFB?N_*c$Q)- zZ`*en>x!F1iTA1s^LXE`_7U2=2mU%q4>gk>X)ToW?a~cBRgLE5R2X~4u^9VE+~=J2 ze|9CLrYxoHV;x~iTcBs>WhVV~APam;YH((ow-gY*x+lSQm z10*tYh1&}J_I|4^tjvmuwWLg7^YHpxFp*vd?(vZBZ72>HS*}cxjH&w4KR+K>(97At z&#BHEw)#e*d2#6;98G)>>w*e$Ya|n0HV(|!Exc56o9D&dJ@7Y5o*&=)x=PP-=JCtA zxoGHlGwTQsy>>)jqE5IKuyd6HE@qIsq7@yz&}W78_8)1{MvVaAt3_6$9@mb~upYg< zr9LSDoU~h4?}{hmlUBU)Ye;sMk?xKApU3+~3qy}vFMZ``PBd-ldjCU%i&KOp(N;yc zK;ly3U|=lYaqF1n)Mmx}rw|?giUmB6T#UnRBJQKW4`*4K!|BNMXU~A2@!vDVb-Xp~ zt#8{_YSbSV=8X`k^mAX|G|uuxV=e{?B;>#8Zy(3^eD3`h$Bnn+T-RFbTxa~EW~=u2;nrsqe9=I^8j@jI zI1Y94Un2G;3L5v?=_Yi5#}Xp7v<#o+7plY#+dPki)WrjHm{BMcFid39M22-MJ}j1` zt5eJWT{Q5Fg(nV-9?bQZe|(|cIM9QU9h`Pxb4ga+*40*a6d3vEMZ<+FC&fqMf zrUJqi)DjWlZ_OXc;Pq)zJj&bu#ll0@+EQHT>MnXc(;bAQ9uj?W@5da+poJ?&PoVBE> z{e9`{rt3_d)nGVj$ADP#4t38nCDuc0?SFqN`qq0hPRA2mTQ#%-Ex725)NOg&QSH(a zC3iDead|Fnv2x3jQv0wu)arapz;&h3_a zlU5n`ch7<4i+!w9i8&kLMUm{s&n590T;4wdNl6&u*z$jDufR7nv&7nn7uLjn3ZeDg z!c*(RK>W>o66k&;nexB#yFw`+M1!PKlQ=B}j(CTjB^Y4bF}b@kh)5kJ`S+9O-ntbw zdUo*G9l$Jva(@H2=Ovz(RJy>IcM=HlZNvkW8xM2nMJ4`2kjQL!EGx2@Xg0x!SdUVk z?vo;MMQr>?ND{KsO)x;0YXBbn>UOAR`FOs(n&0ckg(|Z2F zs)Y;P)L#h%JF<^^eb447ff(Ta*hp2w{BdyP%UV#CMstbobXe z`x7!Q6C^&k`H!hY48eTliqq~Puou%ze)S!s8FzO|sQx>u{OiZ#1ZX)DtaNq%^&2Sp z`FHU7`(J8ahu)%-iamu8bin@moc&+?SiS*1XzhlIQs=+k=${i?Pbz;43ltGC-HE(S z-Qaq&hjbMKO4`6zDTbcYIubxd@ z8`Bm?dl!H2I;Ea%aqW@{;|ch(ZkZ%e7@+z&dgthAFoPY<{x36RB_tx`P2DYVeqs-B zxhn{JlDiTYcLk5==h)|R)x|GXF#!NQ_x&xcmjRJY=CSqY1Fp^W7 z06J9bKEQtTc|fT)IoXB&rh7>%Q{=^gV+kwZcFm~oTSN z*LMGpOIjZ*Q%{5oDqSfAqVlF0A+u?&!!PXDzLhiTkdHnc^H@rmghY}2W^zp5fd3c~ z+v(!BPqHg>v`ltv4S4&{r6;@O1}$|msr zbOyX^-w&HWG;jFEm7L52QI%wY&W znyStYf|v59>hSatSm)Br{)lp9eyLTbt!vHd4t&4%uY6Dpf`i>zRt+3auBnAV1o5My z#8)IAX=nS@fSSzY#k>6Nq0WRSKgM3=QE;n?%v>%XRDINPDROWS02;bB>dwz(Hc7;G z`@S&B9Rji5&TmDH*8=_afRm=1I}k_|D%iW5WIGF#dYNwG`D-@t9A_ulJB1hbL1l~K z(LjPwE2r>9xIZHN?yI^sr7hs;u6NVpd$$k=agdp7k!1}53J43jQR%;a(%;6@Zhyop z_A-KeBIMd>JB~`3=)5-$I1L6D|XnR%^TDGnOK-IF)lqiCTi8AyPGm zgF4{S;K~F<+9rWk(hGW}PW29`ka1m%S+=9287VvR1>!Ojorb?lZXZ44YaR!V1@wl# zBJcKBa?GlvQB3p zT2={~Szqpb^k_*6;WD=XH^KjZv;^rSczWFUJ>5nj+57OVhPmNJLhg0QzQw9a^E91i zR>k#td!cV6GX_{!tbn>9?K3HTU^!4NTcom**R*1p8+-nf2-ihQg0X%T0>!z>Q%I)v zS7b(Hi%1R^lWHp*JfCEqDcl{Dt5iXS;HVi*0F=m4W9LVANP&;RaVgR4dO5I5844Ff zHMEYCZh`T8Ido}Xmj= zD7_Ufcab{CCwX&5JPW${`FV?w6zX}=i#vgj!SjJvx>~y(S9tTQ$vi(0{L78a8Q#!A zi|{w|OTj#~HKkBrf3l4K945k@g^gb#flV6%oii#4@J&eEfoWj*C6{+Cv<^}zpgug^y-KSl&r**6bUW1E-56R;%LwCvW^u_nEOGGf3_z zx=)e5Cryx3=cb&*QIr|W;!H4;8#o3@f1*=eIgmuP2gfNgyMB>BRqYed>FePt?UVGL zQ5Bg9m=$>}WLf^pG^}z$cTg8lcoaY~ZB+3<2H0&VB7o6z znb{qf6jlJW=}A8}>e&m63|sJEJ%^gT#@UBN?pS>s&=mZo-UexdZ2~()&PcLx5TY6A za*e=S4SS860xEPzFtn8vOz6#53G3Owm*0L2I~5`(*Cy~K4jKle6pfhA_CHyVx;~8E z0a~B#aGn#8i!F(Kjx{T>+6S6xSp-!;`mY-DHhQS*(HnFg%9gtPvMX_>Vo81wY7~3} zD$v(c4M42d^O$AlPplW-B`>NzKRXUPyT&~|Y7Y7;j`S<#jn_EtC`s_~%{~D3BNyEd zpjF5~0O36YdSWFeaD`UJIPQhr4T8f5tHAw;UIy2yeZv~;07&R=&5Kz3XeNNaF*os& z+*J%*8Vuei-3~tAAzMR2j~`upofRNRr}N0aty}g5K618x45>!}=PnnV7NjG?Gw5Tt zs;5rvnKP_}(Nab-vafe@Tc0NAO(BDs8Rt5o6_;dez=xEqnFpi3g^TW)i~NTbhtE_IAH}?M{lR?OX$MFujI& zLK4sluYJ$3T^+`OLEYPd^xZy34mn53P_nP~7RPLXzdfn%+)dGaE%gLM@I@d)t9^5u zhv+h1f`Qu$95wwyg$YBXZzFuhv3kr!SK1O>~{cN$X6yqI?xTb)%P zSwO-XWr@xjIW;d(GQ2Z?uMP1n9g*&(Pu*o--jXsM$}C(tSmy3xs;4~+u@oX|NJM;! z`9#>-DoE6pNcVU->NCqgd7puxW0$vx!}2SPnB|?^+Z^tC&^#_Ol#bxm4fB6#e zW@;E7KQ6xwC*IG_VaLiuPBA__6|kf5^| z2mr=yp^9|`&LFB_P(Q9FT}1%pvrMchi}F(y5Vt-gZXk1z{*>Y3sg_c&4fZgi_YRAX zb0J$Vx7Q3|2iBOrjBQ0zO(mLbriNKfpau|S;PXJyaj4&Ob3E4I23A&qrID(tc7n;J z?}>%SNl>ZuN~Nd6D~m*)+LJRN;e{c$0#~IN+ZZ z{;JuJNJSL7SvU8c+aB;Vo(;MJb?zeV(ThGG|fCTU(o^$P*RjrOYNE_(G|%4GS>hX?Zk!UP;^C!uOyw?zvO zB+ppdanuX#maCsEN?KFOq?w{EG5>twKAtF+xv?v2V{YjvdiC)adOqlNx{xx?vTCCglae8ZHK<+Tjvyv?4aBI zye&%2=X-v5Q2BKfr&VJ{o1FM|)GV2yJ68Ngbmo?DkRrGE4LojJ_z~$g-IE>%9tz9~ zcKvgQgZ`dJ*>sJtlOv?JPV)>EK8-^=RK)c*(e*wKlw(H|tNS_}JcVCsRtg3#h^2w< zVFk$EA-Sk13D-9Akg0HP)W|f$<%gh*P7#*p+&o+oj!h9(W%oIb7`hk}qx$fxS8v3W z7bglkO@aTJ0T*!R4kK<}*wwceftjnH2gk?-3EP&(UE}71s}X*+8&znd+oxbGx}$<} zufGJBjw*|B=S9NDc1UMX%r9pwuJ!7{@kF;!JJ)<#osE|nWCrX!TaF1Br|WaB&JfF> zE(wMvTnk&GLj8V>U4TSy8}(Q*Pq7aJ6JYnAt46+Q(M&Dn!jw(B!!Hy*n2o?s{mg|` z)|OF$%{ByMdo@bska`D-&k;M&^F^QiGlL@`an=$TX2c0W&haeN7Lu)yONETy z96OPNyd)~j<7Lxe8kTq?!16wfuwjiY&8tM3`#>8((xMp^uQ$);b^cY&J2#w77&snQ z>%ID#HE;QwV#{zg?uJ?#-2HPTTrkrXK5;`5GngP4>Xq#~tM%Xx>SzBxvqOkBocyN= zbz5Q0@3C33Q+lK^#wzsq$ zk0m(~LPmAhC#tiBV1t3l1Z}LVSJa${crd-x{DD|l(8ORYM;eZEH6kSJPF5jjQT@e0 zHjRFZJ9UY2sN`aqfrTodbxdhTtkyK{dbQ9>7un!{A4Q9v;}QGjkA4J?JcGs@v2(Or z*i=eSWV3%hR>-9O`c`r*lVPhSpS0+-iC8KYWoOi~QwsmY!hk}je}4>rP1vcBHc;h6 z#uDsjBTrag}zO>6xIS-kg*0lm-5>!dq8FQl)X&B!2e#nx%W(Zy+hS&>qZ zVOwv?UcG!0gbk5sb(ohpKU6a!KyAQ%*w3)^R2l`ASL=#(vXR2%Dn2nKUUD?pTk64? zD1X@Oq1hmMx2Cu`MB-zfGNT%k&-Xm9tq44{yTQaXJKEuZa>aR%E<&thmPQUyKfq{P zDhGpgKJj5G;mm{osGoa3^@}q#ICbHUL(#VEZ86vVRw^-!5Oz0%J!d`B-;&mv1$JOn z+r%G-nxl_JXDf0cu{D^(yA72TL8FT1R!yPlqq=(RFt|*M#|L$zzKo z3WUA%PlhWF=h|dX$b7_RN)w)#H{Ke+BrF4do*Tu9Y$reELVs!jwjQs+p~~SfBO+{R z%qB%R`d%uITU&L$T?D-vv8kAk>Q{L2z`j1^?17GF#<$e{8D#Np$Wp|kPA6uw(l&8( z0*q|d1jdS)b2UWRm~#pt6%^H19WPwMJ{3c0mvTF5p}nT~7Fv;MTdrO9`y*glfG$Bu zt=00}POEX0n|j3f41`WRxW!nw&6&;+A2B-1Lm~)4qcdqG_K(_g_P0jXHr6AryM|#; z7;11Y?tgvWJHD+<AVtg~7*2d5^)TkK62~>DWwU;Ej)(os3RPJK~N)43n2CkHE*hEK4)Vf|iB{mSg z^V$*>zkN$s!(}POJ$|MZ)ZSTd_h2Kixrk>7Ob5x)y~8FZ_5ns5L8s0J@tEgZgnuI$ ztn}K#YVTS5c1bNZHGV4^A46xBYP#SJ|LJYina`7a`nL_lThjlKsLOJZ_0h%6+*Rob zqD*Qj>2Jk*E^oVPcJG!+qsl1{>LecPPlOXGQEY4t|Hz~Je%}`Dsz9&CW*v+d5{=xa z)(8>><=#9lM5vj3jyz%k^zn>>;<}ZEW(7MIc`b~k1nz8&W^|1sjMqF;G~LFYEuDfg zWr0=Tj3~x0;9OVYa0YVvXUgabA<$cqw$MYsSsY%u@!lye!LwMkz>2RY(PH_0kez1kUKud zb6>AH+=qM}k{Zt*eV*m*aNmZt;u8c&*@72!k$J#mo)WH`D&AJQX65|r_ctz)5>?P1Ejr7yoRw1Cz6-! z(ILEFhPVU>2DkAK>GOn7-pR2lG%@Wbuj7*wur72d!@Y8a?r@3oRbl{rb$%?1W-X~-SutQHV2IxTz`!b03G z*6P$e#&WYDxLq4%%2h+FET3>z(K~ofT1PZ1HVgt;EQ%jYH*O!22JzmKJ^RvMVmO27 z&(e@p*TQhP>Nc5&Bn*zYq%5SK1ykvUZ%NC}_N$C_GsSQ{IW8b?A>9JW{U(Rry5D9n zAc_wR&fPr-z?7PKT=!f=dT|%Od|>iFMxFoh(t2zFx9ySw+~R7cw!O1U$u@6 zfvI_fA%NvlCl6S8RGb2>Q;YG9LO;lvAp?WSRBiJ4nTq`K2==C!Rj$PE8{D06?d$PZ zW1;1DWG^U)lbBP*hqaCb+gz?$*OGh0ax3C?`D)&?(2kN@n$j3;FAKI zkQ}7;&({3t>)K+^v%-sA532S>VHw1@vG%LzFT^;kCk4w!ipm;W<@O6isiMci4TFEq zLO>gtLC;0(N0jB4t(DiVn=OwY3`sOr&{l)F;$1I;6V;VZs93N11<+57XdMYsOm8bn zuFd7AyDNoCuFYNx@3~#_V*dF$8rR7F@m>&|nsk_XCP7Ecthe-xb7hU>Ziqjlf17?J zv4fdBTk3a(FtdWct}VWVfwRr}-gGXO`1tjukZv_0_k%$?Ior=R14*A$h;gM+lB+sT zh)gqW4M2#@pc8tb>9U+29Diu!u@LvwI3jgzZrq$)rAXCaFWaq)6xnUkuyV4~%iYP+ zh6=)SAIL5?BV#QQ_f&@IF2ox!{QxdUa*EHzw27`}J^%}R(a5QTm1Xu&p%EQdl>#(T z#H>~0nc($J{2xoxhF%m<4>uDTWACw;@F>dTKKMk+wce){l7elpJAok-C$=wXVoY9k zH5FRs@p~d*t5m!vU z+*r$~LcK*UK4DL4wtE#4Oct1#*G8nEUBI^(T+w-q)rI0X;uMtmS7X?HSei1!;k0Y- zoZzx_U?@O7^n`9U3uR`q%<4MA^v^unaz!pPlT>vjTf_ascjRuM)FL2nlimL z(o9owWPw0M1i>;}KDvz>kf*hs2w~9i-ejg$gx4+8)i&6y2l=wT)iVk{yOl zG>{dK$z?69Jop{5hd_Z z#um92dJak~5?eog2=+l>ThA)W!a1#5J$<6YIXrvX=a)&aLW!8Rx0fB76iGmRJsFIN zSOUmM)S`t(wtV)h#a0$iK4IWIV_--`cRFKXB@jYvfSR6BS0uXNGPwn2BB{1}WU17= zZcj8DNTK`?Eh03OWgz7MlyF|>!)C2T)mhDWsy_LoKS>mC{Env6Fd%9jBSmJ^XHC9W zGx5!w(AXCOT}Qi^-MxxTM-n8it}ONfUAf;amMro@!M`{Mnt2tsHv#yluMmy!*4Ir4f}7v+ z&}Ntk#>EAgnH6!|kRlMO|EK|eZ~HMvUTBG@eK1zjjY#8i@6h5Z)Q#p-j$=-JKu)(z zeP{=`W)>5>u6Y9bS>89+6Z8MX#PGY0UEBQ8|qC1q_aM(!!+?_Cu zI5*7+1tl15me4-$02xa>Pge#~Tz5DrBf;646T~EDUNgfkYK0xvQ=vOiHSEYNKD_!Q zuo+yM?}^42%*QkuV1h@gf*|CI{;0jSzN1W{GENV90dTOD1eBA4!Sau1d`s?Q#PLkL zixM9oclaO)0(NKGQ@I$}u4BAb5d21}O-iv$@RA`4+nAKzuS;FFT24(Bk;TjyG{QRu zu!)MbKA=_V`F4I+E2WQzW_Dp$aMcUARmQ;?0Vkg}Xxp$eIkrH4T8I(nw8&*~;k5hg zc*|&*<>!_Qd{aivCgV8RdSxp75ip6=?&OU-3zow@3E(7Uz!x?B>#Ld^T-Dru@wV+b zks)(m9$jsEsZuctQJxZ#L?(h=z`QbHOG*j_Nm6>XsYmfP&L!C1aJ36zqchE9+7Sx0 zq~U#aOBp?=oIl~8Iuoe2DrnXk{-gnc2_p-_wA&yAxGX|wsGagZ#UfR#R75RC@a)); zx2Sa$Fs{V7OfTw~YjsW=?|6njA&tX-&TmcolWgV^TpDa3eg2xT2KI_23L1bI>$+Zi zee=saXjPB=4Als-wh3wL_m9reA0<*#r@|8wb-JzHT3q~KZ=M>ff3zfV{vum@ftRZ* zhkdh|N{SZk3x=wJ3Y2)}8s&YY$oc_fR6~0N{p^8bN1l=>BgY_v`-J+UD;Q(9z6}SR zctMNA#D-+*_8jv}fS{9t!NwI0coVNGAUk*`ny@Fsvb@v@*#!Qc9gi%QJ;tna;!HGT zMZWIhi#c@i8?;~LGSkG{9Q$XP!{|`)1p`BQgxA@`lDM`h?O@ZFH0tnvap=k4Xqklt zL~(e2#MXoPGz*f>NX92Tiy3oky|jmw0=<$Kji=pB$4=7!&}(FW>=vQgsQ(u`BNhYH z;WcC?k=qa9(fd-cZ+WvXO0F+->A4))5#gM6EmPcA&)Qj#oW(M})Y4-F)T6flryD_t z57dEYeN^wdT}FB4XlZ9>A`~%gsQP7=8c8WwLkSOI-#wgQui45I;~ZPT8tva zkE~K$9ec@AHH2Z`TouMx2!iZ+3l*Z73|#|1eE7LqHF%>gjRhB z_qd_&P%(p-vP*z>$?47m6Vo6#*7_PZDzBp-UaYc{+7*PA>$1|w#eqkPtq3w~<`EyL zFx82>4Lh5*cu8vQ;PBNfNYk1qhS~5>mR_YYv-DK;IuU-{Fl-lU!7jaM*0K3x5hT5-~6NNHBFFM}mn1qlr@te7{Ys%}xRkiAoT zp_}h{S;Jlm;4wH%;dcN{g)-je&Yx*GzSQjr`477ldTqzN zFfOm)+P_cb|0_NWgPcR#zOg7YknOrq;OKjk9VH>Fd9XVi?_^;>$xLWQCUl1c_~)h` zq%$+h+{oY?Xq%&!wX}U_cEF6M73~hj*(6rBx7H-C_BsX}iSB1|Wk%qI`0o?I=G1e* zRIxB;-PXljrKDdO1J5yaey<{fZpV4k3!|MekYlaI@3$X>1UhjcY_4s`Gcju7qE3K~ z67R}}Occ&7v&gwIMt5`Ne4b5$#>5#1`h4rWJ?Z)&H$1qB`IJn}KfMsf=OnQ4d)&b& z-W$Rq@+NT#@BSbb`t|9Tt;HO@|6Mp`hWZgnBF_D2C1+pj1>K;0(qJzy?9&GC%SLQG z44|~X*B;DVBD&2gQDE!~kkgbKN_qi!#1GG)xQ)KLQfNFR+iFcBOs#6*xssKIuZI3T z$8-*to)_^pCADsn9B3M3^p38JWj3Odg3VK}5F&3OeIa~??^p)i7HAc;-v+}oP-d{e zC2kuK5#ZK?y5b!_=VHc0E|5HjOE>AY%dn2wrs~#01-NS9ukw zLd%QCqHGl+SCT#gOQB5lHP2uxyZVkSihd(&u4NNE{@5orN;a>c{)D?Qu0WHl{tP^24rszTgY#Z#27{O8Uy$^2Z?7AUed z1zy>&ijkzfhd!(Ker8uFJu;p6fF>MeGo;nTGoj}DIpeYVR0iw!`z&{@%X>ubNl?&) zi4XOcP@FtCnMgSk`cpjw1u-$M{z4NagScy2qX9bp9hG?7dtCyD=ce>DK`~QUX@

^d0S*gVwu1Fde^)~v$@W`j-g_%S+(}kbCxvi9wvdb@S6!zdsk3{rB zwOtoN%2570w$?QUf4?OwT*S`4Lm~A`> zfp`c6G-oo~8-y#5_|li%a#+|WXTQ7Vxo_utOJitJVdO&cKh3oX0+Ju5XoR~->}Ckz7RA*#G9|^QQx3P()K+FhwFRO zhVQ{R-ZyEHV1VAgQLy?>F8#eJqlmn}$_tos_}!UqViLx$#KuHgSkr(67G~V5%LS6u z$MQAYD6FIh>^5CBeQ1ut_FTI>N~Xg>;gXqOpOt#sQmx7;D={id-JwIRfI7?gF6*K5fCmW}f;licBQz6^?-<^(5Rju2*{_;AYo6 z?pYAYS5-|t_vj()B;|K&*m?O5D|vTs(3^(dqaflFHkrkQmxq8|4(gY)<8P~(e~r~F zfGvG_4EyINbGRH&5RpfyI88=&#hL)tB~(3-dS3l`WPY5c?-LBl{T-<=C(<|vmOY7zW4*oFUi7D zW7Qhm*b$f`#=?rhhEpt^k1LZ}45(-k|L1npga@HD(MLM6yE1lrmRMOi!Zr4jYv+87 z!-w@xkWa9wQGZuBQ(sgeOAICO$!NUi_hzcp5u@b4)7DlA!f(?z;09@5d$_0+5GnCH zX|2aWwi>iMN56gVd1zlyx%duW8Ma5~un8^&XH0eEPEvWqj@li)W8Q^*Ief~)LV&h` zpvJHHy9G2i4n!}QsQJbw@1!~<@84OZG;mQb2jH29b)w0@=Dng+!gaq*uQE>CaFh%1 zc7Dt)#-^0rHS(3t)dTr%nCD3MFS=w=-`)6(NfdL)4&8eQ_v8lAWLEF;md;F5IJD27 zWamQ=vIdUQQ2>;EQLXC3GU4Iha#8B&Axw@Iz^3#z1hP%*=|%ek!*^u$Hz5qlnT{^R zovH>HvwW-b;%F^VyGYx?xr+m+VVA?)ET!Fj&?N&mleSM%Z$;O2OR#l8Zqn zbE5VkkN;K1Pj*HzIHL6^wRZMJQZt#|xMU9{d(rXLUQET0#P8|&aPD#$Be_<-yTdah z0Pr>7;jkowJC(!h@l}q}qX={Oy9T-@DTLX68)2{Y-y@gAL;~ZJBJ};dn}IuRm#aWG z-`tdPNYMg(+&9wt`e7zIT`!#(G2t>rvr-m-!<2obrZh!L?QoeVxDv z4AL-GVTT0CtU)tg89yaNU%<(muxw8^IZFefzj*`*0JNRg9x3))K7RlI&AQO3J;3bOF4+S}!0-zQYGn_6 zgkLfE=E@38i9RS{|%t`a9;%l)E@=1qHyy62IJv(Dyo+ z2|h+Q`^!+i$fVB8;C=+R+Ab6end|1?P_pL*0NGDhFZv#jPraM&`jGgDhUQ|^yElwG zcBd9PdX5$7Zyxpni=%T{5+vFjI@W?0Tlr}ts5a#g3`4JdFW6Kk9S~60sh$X3QwkwM z0QOWxS_-urR)j%Q0`DGZt56n zJ#PZQ_zFT?oe=^T+Gj)v-GBETjFYMX9QFt2n0DQfRyjb=QPjErGQoH6;%$rF`Zxi- zr%gZyawf9?T_jiN(xf@{XFBloCysYQ-yx<2F>zkm-#?6=y-LBwnM$A|&PC^K7 zK!D=i2enaW3P3P^_^MK{3`~smCIF*#2Er{v<2eW8g^%r_exVk4Usi2CD5$M22Q&G= z+1GSlzX0s2w$j>m6CrlMIW{ccMspyoF-^%IaK}95m}ZV+D`c3K*%8#Dq@HMbT6wgZ zH;G4QON!7sT>lqO1TWG87)Ou%0Z(SSf)`Q^{_SGENg-~wyl^_ZRBM0NNs(|-{+C^^ zAtKx)L=ZIX|40%Hu;(wzhjnc#!N$q9*BG7YJC9n54Hdlz3-Obu=ufi;J(zs}iY>Tz zE=kBW4-vr1e~;OaBu{B&bygQM>X{psf)f4FQAU5W13zW-1dFJo^bVf+pK$(G2V zv*Z_(smoebzG{z;d0!HQ3-uB?8`)J|AIt2Qz2rWamQ?yC(C%%$71k!^q>Y2KhjuZ| z_lys*yPHt6Zqc#@akUmc-dww!&#Ww9>Rx2b8*?@x0Wfc@#CO- zkYTMh9Q+3*4`G)-d**iowERP{<4XTkVq}PVf3(tr7nQw@o}@NG`DzXB&<2Dk2`kTm zKfLO`Bz~Oe{^_%&5BewBo+p69g#jpT&nrZ(7OIo3=U>A&fKj=jI|#&8fX&OcDs2NR zHkqLCSJES4?2S)+<8kcgC>gW$RLAM((QzYHEl<${pdW1hVHhCZpVFLZrX4+_d7jWf zH>=f3x0L{_)9qf^JHUrDlUX7~K6@<@<2*4xPiNu^$gHUb_LFSI)ImRVoDZ2w_1nxG z93+lnnr2^>nP{L@3s=cdJ;FjvJHm;kv-i1ha?vQojIbsDBV8&F0=#jC; z`z>48=fMsJ=qj`dY?<_Q9eD5RZ2_thJx&(CCCwh?jzTI%bgwq78J7zM!`#AcdGSjtfe zP7L3EpTfhrG|uiifBSkj?M3nXw0~&<*a%PGyL=C{kYt#g0?k*?)gQojE2I=bWuh`W{1FZzBh(@WmRpZA9ZyVAQaW-XCvfyz@>Q^?6xdbCPrm_}SmP zf(C%!iIGS^^}tuaO+zdf$K!f@aZ-|4+_R&1G_!mDzg#Zdms8zc!kl${svQTUTqIk zOcHtxw!4b_)S1`jLIVPu|r~hwa~jsp-8`iYV^mDUpFbY zbk0%v{$~SIwvw^}W=79f2k91Q(q6>I16TcyCBZ@um8pkhJFOdOOlm-SH>raQEnxD<;%h6px{y(We*M-(_smt2ytOCu$XB!tI~RRbs8E1Ellco} zR!nN~uB8bSp#>yP;HmvH;YHw_$Ds+lCImaO0m*5PW&B?YwRx3p0UYY;*c{pX;)z>* zAte{?#;Dws%J2l2^GFlB@K6zVqR_W}Q#qod=aaw#m@N5-H_BFs%Kn%#Zgxh?N z68Q+z9JTtu?YkY57XInnN@jWwT)^#axj4*pTGVu%qfMOV(%z3q zW=10`BgYmB7QQsI)YgR6hbiQ0z+-Ud9ow8>S&?z4KKaSoz_XZMS4WzN*I>(u<*FR( z(j88Ce+}lB@osE@Xd5&6%{RICpwsSTYZlHZMBC}%Ms?(@eIWGnv>NT6KS<+tIKkRW zgn;gXk4-iP(->Lb9`^4ocF6joUcIX6k>AS^2FlUgg$+`gfe(@? zh+sAZu^f|}J!gfQUtwG*`643TsNoFuuVW+L2?$st5io^8eH zWjtxB@_^Cm1XxEdTzr=3K0f7oq&r@kBZ-361?~e8(2kD)X(ZEV)ceZC)-8%@mxEWo zsFaf=d3EPZlX$&3sSt95qz;MZ`biPZ{j(pkQRA*~Zb z&=FHzw0+lEb=z!_rz}1E5SuQ(XfL2^%DK0cyItOYnm3Ttz7=F3?%u*5lU^9P@X$)T zLT$d*!~$`_&m^r}nZid<3PlBXyFb-I%a~gF!?OBe5Zn`k0&K)M=l8M9srfnSEVJ{s zGa(Ya1ub7f)p=K z4{XFqTATbCPz~*i`UYqUsP;&4&xi3v1A64S!ztX*XZ`R2Zl=J`z_i_5N;^6~+v zs)xy2qqMDwL}DF~OT{T>fq}~}EvW5JB@yBbs4S1pfBNqvz1al8$rVVL#E%2?Ws3IM z(d;UIKpn{2Cc!{*k=jh%I)SPv;JWSvqL*BTk=q9sHi_maTtAU5 z-7$QUe15SBnv-n;x{UuZ;PR~#v>gM0b)HplZNSMT^osD-J5`_{ z`-5qJbnkCq;J%;5?fn%_vQ;m|zD>@$#1?qvWogP(mrEFDiM+a{Z)PH5O>au~abslGPO%p7i&==grG6dACE@2X)w9U0!24KaeX2RAWj<%t5O#?kLxeUSz4o%9#m$+7uXtl5Uj2VKv^h?YWv~@AOR9C zA+c*=k*NJvQhP93xo7^w=*_tpm^|QLJ306;t9^9O=?v5r?(Ns+A9%Y#M7Si|l2AR9 zKnjhHeK800mdXPE!^K5+!>je4Tr)Ed*uv2{Wd`;Wq6`W3-2IV$0vLcRZu^y#Tz*ZPe6jFG$E9^td*Zf-0zt zyo#$A;`QF)S`jZfjszc4cr#)M+1>St24kjFT3te$Dp25$DJA+H-fQ_2bOAyb)9))% z``d=1N2#5cd@sK_20U%UlgmbMf=b+A+F5Del+~Pj-SOn5Jst(M0#^%qyKbE~Faywe zn*>$xi+L4qD>j!;c4ecj4TT^3kkWOsazShw^I{pITcY2l!_z2X}AnQi_=5T3zXbpfm)W_2t=T~y)d_ZQxePEUq_x!$#@w~pWkUHg7T<&`% zlgdN~)QXD3H72UM>eybDwiYWtT5)Mh>FfM$#`I%(^5V~3aaLGVmo$h?@kp5Ry0@6M>~@>GKjlN1I@_WxyMc78^gN?T zquTO1h?FmR#ffz1+GpGwo)mz~h-&-Awpr~yn4sU>nL&EZvs`)ZBlW8R5l+};gu?kr zwSEt}6Pt1DF6dyLAz?F_R>hMO(3=kGiob@h zH6)8%bF#Y|JOx-}X8mqL(e3sleTIt9s90JV=3z&7kM7$rFbl?!SK62w z^yrPPI*KAFnSJaI1eG&39U>+lO&#`_ialRc*)}G2oI&-rqF3Nw{wAospZ2O_F%_?nu%`4UNu*5(bk)M3yA>l581~h219Lh5|qcZrH}UWPjBn} zqOwR!#s&U}Cl;RI`Dj4r6Srq{!+a~nL-g`iz%ik&IECNCgM{xwG2f>?hxvfk<3pEg zi?;2+-1dlQt!Uu<*CUmPCA;BM*E1EndoB|b$Jv0>bJms0Wpg{<^41iM=IGQF-{A{1 z+M}H>3-o1FItvEhj=aFP;q|-{lExzz z@6wf8m&D-HRAYjJC$3Ow9GC?g5tnzzfknLe~N;tLott4 zzNc)pjVm8dW)(l=Oxdm}U`Qet9)Wsm$G(q5pFrslLLZpF-XQDcHjH(=Q=l*Eb6<9P zk@#vfuD+YY*<1Zo@+Ds+(@Ce9IIHhKsK@ejyRfy*)x1I zvL^xebzj1NIbh?qPMV7giKZ8+ayof5UKUg|#=D#Nz7H5fUmerj-fsOItv{uekXFcy zr#@LLbT6f9D~iUL#WmD`8wCE^`DKaiI6KZr?M*pUgQ^jX(WQYObfRrayRf3$D(IWr z;b)(f8UwVCOmf%zuN?-DT|p?;D(Dx!*{?7s+1V!_+qQZV+Vqpd+M)*anZ}h{O?NOy zZKVGlqVS)1$5TARDTK#kj0R(2SLYlsJ+));(X963o@umE%e)}*5sT0}T#txH^R-33 znI({(Osgr@HTqf&)T?&|51wLG7}&qAh&o2z>Adq=HI zvDuX~deaT6PkGTR2yN|F1@UDi44hRL--RO5qc310w!)`Lvu?Fg4jJ_t8~Aj+o#-l6 z_0g4oLUj$DJzm1vT1sw2(X~El_$S4R^Gkso>WRh7FKYHjM;->R_{x^Kzq1x%(T*?J zx0ym?ErxKpi@F9KOF+#v_kn8~KT~=EJtAMsW#wqla9JP77k-S)*!nP@?w;AsT@{%O zCY|c*t`ED&1+2yT=w?^612?nUo~^DBFC!Y~;~7$4iakkC-#l$JID8=JVAA8~6(=%7 zF_E{6()ZcA&~*M~aHQ&eZ**Ze$MP!X3*T2Y1dCm$2G%4*^PpOBzc;0Pez{k7pQMxi zOmzkSjHMD?Db84Qb~I8K4-XW!+pQ=Ve(7{rqcWOxCThbUEeoeO%i1t)`+P`p#i5>K1})%>jBBKu9K5}KG!WB=KHqF``vzSXZY@Uy+CYBLXwWzHN=F8z|pkK zz&od=w~(b>w%NJ-&%{>T}Ce`v7H8YgYp{Wc_qLjos zqYe-TUzq*HiV~!aNL}*JeAHynDg&D)cDJsZR#%Yzqn1|jifBWM!B<&1IS}rT{_}@V z29uyT$0C_I??Hw5_ruU`=ew8L!{k8}opZqW2Oa2sAZj!aTa@n1z%0>pO^>uIjrI<# z9pt*gDWYZbD4|#_t*sT`>@OI$PULgcC4(6Hg^I)O+`#f}pYA@I;H1Q!#iU=FYn6=P z%U|GnBwX9aKvylPGsEmD9}A(*EvZC3+q(mf?xRpe?K?F{S_soiWTSzoPy)~@`(7Mc zMcgFz8#}NqLHPC5@ii%x@@z;SLV7Fv(nSWr;F)T9lCP{ys%1Dm(@R|0HiB_vZ(Ufo z@SMh3?H-TFANaU7mX8nU^=-CB`GUm3n8ftNl=muq3367v&oL$}dpwLc>S z3`GZDy!RxqvH3`-`9l>IZtARA()#Wb%99^#-sOl( z_B%d02Z=M4KQLW$7=$|-i#M50%4l_>+2Ntmc&$)X6k(icG*~eIovvxV%b?2aLVNRw z>#S&B>#IDR752Ny#C`Es{tXw6*~QP|a)MUY&1V`cs`r2x;@I`LP3~Z?M=IW^PC{VXs10LgbK<+KB4#2Cf zt`GGzUCCcW&`n+qn10nBY;ge3okjX56k4tM`S0CNC-QRe)eXzC7#x zs0Te4FDfaY&p97^S;}jhPLS^<+Qp=hk#CsRn|`89u3jT4Y!Q#Kzknzc3zmn~CF^<@ z2w4w;c+As__7}o)vnw9t?yFf;$Vmc%T=y26$EMUzzGWl^m_W*fesGTE|KfGji@?8v zz;-91?VX(-K|Axjjv#PKZYJkfS5_37xYF!HUc<%=ZXmxb-Fvi2Ig=;I1j;#I@Hs#l zLc8VuUwAtk6Kc#R5U;>+&cCrWw34371w}4XFtbT>F?WHViup(Yte@~+LKPTB)xtS6 zUZI4U=k#G|IN^QLof1SD&?&dRR@(r#ec7b8(13$XtQ&2e&j#qOsE^yC5?g;S(Mq_Z zXQi_O;3?_slBt60DW1SvUyAys=114jsXP$nVYK7fY4C9RTF;z-#>N@ei&G@JbCfpF z%g3Nxw6B0dfg+KXh`rsii_>Z1qn8Ct_ynNS_mp_LrS}-#DE^`+XNQd_wETM$L_RqUDofh5Xx*KP8JeS>2KcH%83R9dm2e+RPGLs%pfWJUhL6ANYUv6MW43H41hA~Dt$OE-Q6pgz&$)$H$6LLd>HG?48H z$8=msbC;cO;vjg#Zh;yhxMl!ZIEm{}-;`{hO2w^0V@Xd~g2v;xQ@jEO{`Og?p4?A0 zqchAPaBanIJmERJ>FdgUk~72=tiAIU77s;q_vc7Uu+QYOf%?;7gnL^jsHT$wIqh~? z3fZD~XQ-af+Yeov9i&K|Kh7H%_uBE5E*#Sc_~T7(hB^>H;&ADvPIOfm3D z#d2s+^>Qo7U6$+xEv7QX=fx0v;mte3Z{g7w#IqB>h-+!SKZ%;|gYyNuLt> z_uVM?g!eIFH2M8Qd>=J>V-jldHF{tFHGNHm#Bi%a zUJXCYX4g>T&244woz4TFH66OiEmvbJY+*U)t+MsLp%}}1JY6AIWT_ZrLP_t>*CdzJ z!1fBfX3v=Eb=r#3sEvTjGc;lsNoNE^&EagAHhQ)4r&!N5;=%7EUeqob>HJV!lC6c0KC{3}`d5SI-l6%3KBV5EAY53Rk9ILpsMzkB z;%*p-Voa5MGQc9@>KM?sI3;tXQDv6c`QtV-`ZPD>hCL=#(1ud(-$s2Ynb2HY|fxrd_jnJ)|X1yRPk zEbsC@9d}Ewywb+{gUbLoWj6=;FQmx&BxNO!cPrhZ4jnSdDSv z&1Rrsyol|^WPLv<%O#@sGF8)4EB6zHGv7t{r#OKEhf{ooq~jzK~y_>Xe0N5Ib!<5y5)**srS><3G|mq{uvp?{H!1^Td4+XO~3jA5fL(~Nd$ zG%stfUmKc8P?A3U)9E^dXqbrbh^Ez=Ts>XHHdfOq&B)$W7qxE=-W5cA}`wM`avB zDZWk&-BV0#Y!pmv+KUfGQRxx;m+ln&w3|-))?jQ$SKuTmp=XP?f?(if+VKHT+4*{+ z`M48PG0KhITROh&Xin5KLc?lFZd)0qYe!xiQ#fVLnAt*v?@XAo6ejSqJeb5m+Tf#L zg%PT??7}s7euZbhhc#YWs^_y0ynt{fyx-ZYv*!>rRKCl?&RR?GPOh!fElh|E%fBrN zs^p|JF(XYkEz`1dMe7CGoRbui^DK+a1G`aMEg!yH7B*WECCr61k7uS5Y*b~l-ifbN zVbESTDs^Tl_7+BvCANU4UbU+xf=%f%5{ypVKfV&XLcxbw?#zFU$0+e#7DPU6y}Q>M!$>I)wIGCkzUSRa%x^!HW`J#V6-8e^ z(_RU-6xJI+ZUfJJg2*Sj%%bs{yfkk3VIAoi|16arglDCGR=6F&)&yhnl8H-&@0lc z=XllC*M-R%7$yMa;Lz0=oVT3-zb04-5^t%@plWon4oNf-!%C>zKPJf-n8{=)0bZmj zAL)Lw4nbYjAqI~*sPSe~eLK=E(e@UVDT7YoZXm33H5TQ`u^3rNLohn)I`=_Tf=lg! z77=Vj1ZchH=o8R;elo?<+-x$r-b}sV%%2Xt0WR^0&Qq#L`tn7a5-AZx`So8kwp!U) z6se*L$`eM~>w~XQKi$enl){A%AHv_&xp3nV(df~3Pkz?T8_`6K7kRIH5+|@-tQTRt zhqcHYQ>p95dgF*DXu4ln<+0DS{*20~j^^Q=qBOlV{afaxs=}^Jp;=}UbB6_8CjS%d z@C##yepO@nXT4PeU}AJX%QvZPW3&!_Cim3$}D8PF3PXLRJFIaKxv~&%+!UmzTk*Vc%K4yU*d z@}qk_O?F1gtbF-#t_)$|_X&J^o9-tW1)&h>BJNGS@C&FA^RInmFJ{k~8rr*+JkQz9 z@M-*ErCyWio_}%?+8`sZ6i^a}%^Tr#Z}`5ELyv7QBQg$=nhZi4@}MT}B2q#n{5v0y5v{#V? zWx4m|{s{n*TQX|5;j@i}i%~fgxnz@g4ibprxckk$zgEaUqL&#cXU>V*Fh|gupq#?) zd{0GM^Li)CZ{<(Oo??L|W2R%TM0nbSbLzwXE7KUM>3DlaNh>%CUe(oEIJ+-KTas-k z=qsSviMQ`Exd@>u&{R@=%ow(T?HV|pHCB@NpTLlj(&qp(YtteoHW^TqKIiMp?2$g# z{jxG888y{Pdjr-=-S=A^Xn@Rz7T9O6)~$0*Vi}V{;I@NI{7z1BH!Cm zOyFXGU|*H5GZs>tC=U0MraWI94(+U^g&&`CbfY+v+7&M>4ks75Odx7Y1_en0G#LW^ zPph#o63D_gn$ICpn)f1zhp*rl)R%S=523B(wRlD6r7%dg2z+@YV_*7avv-?h z9r>jY%mv(pI`H>XW#Da4TD{gwMpgJ|jLp1VvB@CH`Z3sGT~!LKO)XeD$u)pa+^R_E zdJ-7b4Sku8dt5WR~#9>*d4t@UqeEhK*h27|$VsZIy$1*p>!_=@1C?2RU7v7GxldJnCHT9_^% ziEgva1rO&%skTR((X)nW!|5?QC|Y~*`kcJhU6OMrAN*Ur zjlP|cQMH+uY%{0Qs6@s|socL}HyS|c`W)^>K%Qcy&%N$4!m83M0*&+hET+)wDC6Jb z_tu#`AD|n>$?RsrwIvmQtH9RWfE>ZA6$j^vVBa3hg*B0yWi>uS^H*QE=UB}`#>bK$ zbWpn(zR%~4AJ*nfx3z%D%5`aOTcy_L@IKytS__UV$-{uniyK=7bUf{odlP?MY=d*B>{3VD|ZP_Ti-LdLKYH8NoCH8QW`?^2}gBm(dqY5 zn}ei0T?URJw<*2A*hj*(`^yHI$Ki>4XCF&GXx@_E!I-;#X+#AAxmQ`vB98tJV*m1g z9W9#ITn;Al|M-cF0)>+fwV|}R*GPP&`e|n9@2IW27osuA2SQMzdF_G93IEuvNkd%O z0nw;)o{huIChna;NOPRMwUdtp-9`o3^Tb zhYaP-TEXh83`w76sni*hb zAlsf)rC>^`pQXY64k&DzhCn}FLC+B_GDV=%aOWxTwwU@v&j~ik03j5iq!tH%o{3cc{LMs=yI>mi^c;w?vT)Il{krOQXy` zU+9gk&e+G+dP~mS0$6tbElU9YQeDnFJq1CHL*T9I3ldGN8u3CO+d3GF(;(gu1Ze6L zjOJrb7y>6B2x$Q@k>#bC@a;SY#LHhf_e5e2iWCf@f;R=290y!zwuHIWPVX3+4GUXIdi-wUi(sxJacV)OK3cyj4ohD=j|w^h0s#wbk?vex3D132KUqa% zW4|;AIr2~ylPG04Ok>rZBgBuVaKtrJB~JN;tK!PZv)CoR?x3Q}OG?v3ChO?xI6Mh; za*dzo({Bu1-*WhWu%ZgT#3Z5i8pI~EPMz9u$IcomIj-x70~f;qZnSp2ZR78sn{vyY zXe^|XtK3}OT{(HRSU{;`{rNR7*gn?oiK9o_KH`=BfG4NHglkouFHJmg5}27ku|xQl zzyfE4Gw!*B&UsA$j(+0T&6})|=YR}SKvEr==+Rawxxm*x?bQ!;pI|p7NY{3XJ2QWn z?UOu+Xq!HzwxTyQB=k{^0NT{ceUI(rbOv|nq1o;uu0JQ7Pm+N4mpJNjz9ZbZ^p=V{V_ zz0~7c$5vjoFK+!K0qd@d4v^}bihu5uW3WxDss^lJsnJl|KAr1cYm~3eOuH&?pf5ZsOfEM?9_oe1M6fvG2}s~GGO5JIdqP!e zpSB6m{vK1tN^zm}yw=B9&*E_VPd0BybuM~CdS%(d^h{O|TGp=Wy&!hhRGBp8Cqe|% zdAk2PJG4w@%E%oa0vqDRhOp&xhDq}9?%rUqyYA6~yvE%M5D8TO6$vVi0}k2vNUjzz zF|=r?$sZ_u0xqO*9gIcFg(%NnYsLSZ7HA1;vPmkKy+0t>z-mV!gsNZJ0u_cYqcqnR z$r7pH?59)!aF7hooydJUI*{C?wfvI<7}7#XmT1r>Vi5Po+*u0tpVELtZJz{(9sN3^ z*DDSIbGi7a#^i&}EOhT9w{+@gX-(rSr`*E2Hy0$MhW_tFpmmx7V~=?9vVSeQ)*hwT zTy%lWdi+je1NB@D+Az?^qhhvvW$Y#2X+N`B=ncA3^GNO7thG3L__?KV#kl<$jcTa{ zs5!m7aTUYKm=44+k#afY>W-5Vm^AsT?$Mk0zR7sH%+v~}DQAdJ{fU?SSKHV|57c%s zS-q?hX?SfYwLtM6{osc|T`3V0pxw9RT+0CZ=r=rE?Oh)f?T!~P6=&s3X2n&&Z<_=X zyEvQRoTdQOtf<-iIIY>sVX)lP(0qz=8xRR#2YLU5ss>7J4e}L#y`Vb%!?%Q#tD!+F z-S!D{bK%u~AeJ?n4xKe#!#*bP@M>G2CwuLDH}bh`aRvd;^eGOa>Akwcl;f)zQNq>! zMrfci#NxR(`H@g@%W!L8O%v;?4O*a*XRO~xBD=}c*C-R5>(hGWMRfRrll4P%L zE`EM29Nw)3tZBGnCW+k^3g2vkEjYb~?Ltbd%7citjd*UBKPvoYRYA93E{n1REbd>@ z>~GaU4CH9l&n2FgxhyMo)?IE0t33jK*#)Lhu-SF9^yG{|%Atd~t!IiEQVw@&(zovL zQSlWG!i@>33+k`YOXGJ$=P&YxGMk}crVxBm8c45Dhb^EHWgz2xj!*c5Bmc*>?!38? zO5p4Fp>!4w5k|$E+}kcJbcpMv=b_7v;y+i`3R+!ZKsV#5mm`^cAdkA;=zO3e0WAC{ zWlQYowNJ}FJu8$V16)7j zJQ>DC5ZhN+6dwB?@$}`ocyB%)+Ql_RpM`l>IiEpj-MqBb5a*^K=*^{?#@~55F7J;$ zws3_8jpZDRn{Sd%;A?3Q@zPkIsVa$rP$7F@4hvTjR9#5ZI;=*Vz3*D(5z1{&rQBx? z6b(r5op}#do{9zxChr7Y#(SsTMKB4!W$W=Ybi_8T5fA|#adp#^)M7dF5Er095Vg1Y zi=f7&S2{OQ^=sK{b+=&L8{mX+5wj64#~U|v%2K-<(SNGnyq(`0{zPsl&gUAClIcTP{mg7;fNzSISfs;@+Fb-Qw!pc^US|rCi z;RIo3mIIb~VBccJR~QKIruTg~I( z9!$ldc zv}^%)m(+~KRcD99*A3MCt1eIg?nZ%P<&pK1uDgYY2RaOT|E+^2Sde|4Gy3sq47ULC z!*&kp{sKid%gNrk-} zic{^vh}Wtaxzww`jB;H#{+gLMmXaa)9gu7xW0{OV`jX*VGMKa?n&BPc(YTMk>7!vPpQ z3q2udycRm)XT|`CBPKFerw*CPNF{YL<(PeEB5W$W!C4wk>Zz0n(AN&$lWiKO!}nRi zC2$->8SPBNoJuq+x#hVVSt!z&V7&z(w zkE$PmcenK6F+5D6Er!6mkYazWNYxG%?G%a1t4?-QT6~bfYs|&t-3V~YpL3SqB_7w;YOvy8w;_)iy<;ZE4>+L=8p5y7^c>yK5ghuzH zG!DP`N&-EUvD`>T%F^Kiv5Z)!g+2QwOHRF0kJM_pHWO&GB`3CqkLGVrOU!MejhJ7x zZZnN}yHoBN<(b^+X0Z*!uNOz1WyDm0Nqbd7sR!O>pH#Q68jc`q-7Za0ek9>sq2ftb zM`5coUS=Pi42y+-CGU7mdR%z<7onh8?&eOt0;*O%X`fLooGFt+eY)xGakwR8IQr_2 zJ~<}}t2xz|iwOc}8oGtj(7I#*~6}tmV1qv)b1nMZr&O-<& zG6Nq~M@cTE#Qp@HK2PpmU;g;cIwTmHoj=`O$S8G1KIYb}zlrxi;M9Y{m3!Nh#RC^S zCv=S^xU_S7(GJ6dvTmsq6h zo-Hw#x`gIdIQ>)Fn~3Y{Jo(pA9S=A}*e|h9qBC6t-vMZN0)hIQW17+j1Q2*#shGa9Sm%rSfNu-4rqEYQ))YsQ)wD*No0+5LsQ^ zrkMvWzTuNz5V2j^dgI^~r!)A@L}}TrqAUmNNG}GA9_BRx5`aps$2JyK6kZc4c7F3} zqz7{CW_|Vw0c{z&V>=3QJU+KRKNaalGkk9uYfyl_Q}T6j6Z-yFvkhifKx`5MP+Da) zST|(!p6r?2(pw#BYXZ!kXCn_#@}HU*xK=e%h4}}pH{_FKOoXYko4i2XzxY|RL+B-7 z@Fmv3_S;7YYTaq<%I|R)>Tz8)4*={sf&E_P4U6QnkDQOK-!;cB%L=;J=__gOvy9Kx z+V;&xYj>f^Jblk-`l(f<-a-gstt1WDd*|<7aJ8+s0iu;lE|LCYujkYLs&`eNR34}C zbr@aO^Q1cq!kKQWl{<4Id4MH67&H3=yn>(lqgE%?UioaYTF^`$y25b%vjr`59mP{q zqPg9i3WrpRII7_KS421!h6QJNmAw}0=tr+g%A2Xk=3~m7LrkdxIjv51dR*aPvtCDC z`57r;Xpp^zo^!nSj3aB(Kpl|&@lm9&zKe1t$d&epWhLi)p9eM>$`ZKdk#P01)WiOR zXia@^kuqquJm$E=(u@i@k|0xtiZpAQM`t@{&*5LhiJgmC8L1M>RL;!=)KX+v31E_Y zsUISEc95vp%yZemhja&|STBRm@!Gyck}Y%1{Cfeztebro1CKHR^pZnCxiicxv?2lt zk#Hk>Doj9@&~*Rdcd!flmz=)s@)4(=W9vb(u6LgKP!wZJ&H$(-{T@;k#_vuim_S@T z7B&?^Sy*pmK8aZ+P(v`KW6XtEVBpc!(S zPphcO=CnG~TF@xBcFl(vjKl+VIH0Zf@nW(5kl3$5cX175zs=}n82yk|Lz{2vlKOP= zKEl1Bhe_b~@YmcaxCtJKDk#|qR@hY2<{vIOt^C{s&uoS;-7S0_6Wty2((x92xZWK4 z`r}geubxZ>aN>nMj{#c+;2Y<{2zPMc{_HCeV0I}H{eFWP?Hlv>SGERz{27%gKQhcB zb|z|p&Yqc9g6b<3ANLA`p>O$Whwu zt&OKCEw=PiPs00)(qOyb?U_+(^VOtaHo?n4p05KH3zcXOMAnn%k>EpFV6^KAD8LY9 z?7=mi?u8no1rW-%=ajbU<^FXZ*K6QM?4}6xn1p*N%vYVpT*^o6{;7~ls;t;|$UTC& z8-1yu#f4@lT0Lo4L~i#g#9mt)T83G@g}tpJcoHh->PIa)THkg1L6*_v-=x(jl|{*# zs9=HA4>n&Y**mDbrft$m9rj?~m3xjL(JQ}lS zaFp)H-+WmH2WXfWkfEv6I>1xjHkASCYtXk{)w@0@7X|u)Y$g~&jo2s+NsVEIt|G>#x z&a;V9ERRf03Q6XAPL1}Hp^AjNF*0IyRqe_(14$F@?yeyBIz<4^FB!h6w9xF!;F7n-J^)*hJaIifBXH#T71sS~JAte^W z29Yo^z|@AnJ{sNK_Im++@ZQ&OL1&pt;-}*Ml=Vj`As2Sg!CmSBi$J&(^a+Ps~58 zvPZ6TGu4NsCXj{smtx`FL7ubGnfpbcOrPXv@$%K*yflbI4571-6%qKR^d=CPT30-v-_yeNIr-IyW_Gi(&Psn|oCu<%LB?7qA8e?8Yfvq28usJU zfI^(!cjowNQ@Uy|2JGq~^~^^B+w@!hlm!#+H^ge8e6$NnN775dfIOhxux*{04 z|J85`E7@=0bEY+8sbcWKjZZ)v~uKFN@g`7W92bE`~iw;K-5!eMy{=OfsNEe^4DI>BkHH(|c78E=-3Mui4#O+@+ z*yV7@E4tX7_>bEIj&!(TrrAM^gW{e1`$qH6%j}4#BhZ-itl|cD6q3~2eGVZ@(-vuO zqj)>NG!j$+$a6g6cWb>rv({eaXP+q&lQyaUHa_>~NObSWO_V7~d&Ud+XGw`PV0%n5@1&r@mo`lwfpY9Rr>wH60s&WzXlfwtQAlfk;ONj_)8%%lp$E@#3*PF)sN!hk2iMU-${TvcdW>fHr9* z%XA_x!s~$X{^e#wci$&!2?)1HC`Oq04@8hN(zYSoW-uPCxLVGg77&$YAH(C&>$EPS zyYOBA;%9|OQvjOA1HsVyD*AlWj~RIp%86aJ+Eua*oNPHum964U6qanGZ}QWAMJg+h z)7gg&iDIft7uGbFRaIj0moePEW0fP6)n)GyIwfz`UJn#7sSATM!3671==Zr|^5DE6 z6}q+bhvz-C3ot-7r9qFo9|5)#pC}_np50LKDaM>tQ+^jK%u-__BDZ4V<;SiOGL`cK z4^$28Ii&G-VezTu=gxh22YjuRP*rYy@0)Gom#Bfv)`6+KGU0`bMjml@c5OP>5nl;% zXzy!m6d^k->_WxArm9hIOglnaCv9M9$+qiBB^ByF>O)Dy$+NdfI@t3=UPm@7ZezYW zqpf}?Fb*APL@`KdX4qJ_&GfotIHW#3;27FYZ=`1$Aa)L@?;<}7ZtaU0(Yg5i%)9V* z7)pQ>qh9m+-z)$^DJ9w6XSnyz9rUG}V1fl@7Wyd{W7xk25X%#u({V@NJMOZ?Nk!f< zyY)Q<5Fam|H$rm)1rhJ$zLfBIPE{iq1*Y`dTbGV(&AMFMcJi+SV^_a55A;~wf-eFV zl)8uFr7k=hj|5UZ_fE5qbT#ZS8_Ms}7W?XGZn=}qc88lZh2u{BVJK~guY%mC=$Eo^ zOM6J`AGEIV8f?-n7StE_HXY!*OU~oqS#AW!rRK)n{xNNJc~aTvhr|c^Tp*ueD^FX6 z7i(FxS8<`GODYJ*w9w|?m~+_1oIti3mjqE{be5W7YMpi2A;GOZE7*<-c3X_b43GM^ z(xs$f2KF)Wz5a+ISW$e;H@&9i=mz}?!^_DS7RCmUu^yNRd)zdPdt)|^>GsMoqx>98 z=s{OXIbH=9F5AU+e`KMGVK?IlBj2&+lJda`Eh8o#9$qmWFk$+O+&t*94iBy+}q7 znm_(#_Mj<@)K|+!e(BMOnN*B>deQxX(Vjc8XF@+0Ki)}~{d|tu;UK8uK6tEcl636Y ziR}GX*IY3pqo3&8Z%UHHj{=EF?HZMqB&8IBNx!BTX$>4FQIf^d-jFexU3MzrdC+(c z=H*1j7|x2XiQUk}lO2cZy!F3cuBRkBwbxS!e?2*CD%ye`3V~|Qo#gc|+ zMhLQrTb!k~L1<;GP%F=;5kRqPw(4@-a*-Edsz;)D=rKJNl5Dz1c=JVNm!>R^cs5x{ znGIFykrF2u_=UAV!pjn)m14tDR{~;gheMC4hh&-P z6(zp)0Gv)MT+xaQPf`FGm2(GJN%LB>)S~AOMc8ZOak{OBnaA?AdPgQl?#d3>y(sEB zLB(Jn;_sB3f+!(Z;HP!fZo61bXdhU{+Q-y70^v!og=7WOTPzWt7b@evSFmVXYKUYZ zf!y&9FQLAe`+(Zhh8$mx=@RrcJ@4e5O<`^z_Gh)9f2V@`CU^9Po9otQYDX_Db#u}3 z&x%$#FE0*u>wGf0B%j0n>qj)pMVn8A*=C2;T!=%<-8~@8H$<2=+-?_M#KG zAPN^l@uDy{DZc7H)U9H-d(y^9XggWVj-u)`7ay&s;;VC0_uJEu*rtSjOFpY>643QQ z;K4ZXI3`{8+(K8(&HY{CwpFYXz>IU1&2dfMc0)Oc)(~6IqtkG5ipMr$? zr;MaJ#~O)c6oB&M|cwLB(i4_bN!_=hgz40TC5z5>QGzZ z@Pb^%L?>r$(&3Ej#wM0}VXj8WL$+{pRM!j2nAb>Bi)0lM@RKau{VV_4pMHjPHjVXa zrWy1`_Q{7x+k+4HX$5j*#^uAD1FQnG*~Q;owO7Wk2RUcYl2xxLt=E$)i@2KgPp5A( zRLws9O5BKTL+UVOerxW0_am}V`?+8~$@FKhh=JdouWv!tD#ZUtZAhqRysEc2cY1dg zcn=@e<~|1hjNmZWHd~0!gn3rz_;h7o5O8hlZA=Eg?5xI1CtB%SAUb?ulr(3Hen6g5l)Yv63CMvk9V)RjU_9R)2b3-`UXiU07ttGq=s9 zs9eUM?ag3J>`;?8#>udXVrecEqBD(M;82WVo9V#XfY*#gt+#X;*+(;Kd^?-Fy<72V z@_Y8euVxK$Z$>4dSR4qA3!`D&$9MJ)<@^d%bVo;BWE#JKZH$wAczouVb%`@zC6RM3$d?h~;|lLQ%NiQz`1IfFPy}8+(0?{YeG5 z!A?SM`KXl~8m3iT%&fg#i~+-V`Kj=-n?jt(Ytg4z!n>av+iV8CG<0*orS9uz>nnRQz(>O^Sg>qW`n|b4Vu^Im!>i+*2>&)>w`iv~4=cokXa`WqsiU{`B86_;B4@U>t?mJw7%Bv zbT#)}Q0}e+c6n+E^q!`imhUZgJ=#;6Cg#1Pj%@eS+rCAsbh#ImKJcb&s3kz^tC=D< zGt*`11s~<6mvleEpPWFWZSJ$*0CWutv1V2E`HQLO*9PN~CgI;Qvv}I0ckcWWQf->| zm!24Ecsf?>yZqto3QhVKzo_h=xHIP(yLhc`H5pGtzY2VruwVCv>L(kDrA>JW+QrwY zz+|J2e746&eTu$iAXy(}GH<-u&1|P{05E>t5P(*f^Wvy_2sE0Sz`AlP@#2Z+7ICJ! z{mOLh^#mov7JDAB^wzam55xx@3~BBzE5gHS+g^N61(0ORvP@-LJ4q&yj8+W?eTo@N z;hdwIwYZd*Z(fe35`zw?ohc=p%)Eay9iYNYpWLn!)EGUFJlc~spNJTIK4&X8kcgj0 zxf>+4^|YRovxVpS(yRx|Ptdd?jFay?qbS0oneQBjKrL2S;8_LgbgGY|`1a`CY>J1#U--ckykOQ8|Z|PFevA-4?)!nr0BHIeo=`eas*@mZIj9Im5TTAd?*4o9=c_>}x;nB2ST zR9;_ZZRBhJt;}V!C`g(FoxOaL9^+4c_pdRSFP zGao>e-UcP++hLeN1i`-yJXTU<&PNeaTyhtc+iH z+bxkP)+QBXD#Wfnjd$Gd1JF+_2WTA?J|VRK-WwG-A3pQ?^L$A>!E_YSdu^3z)%nmP z)tV(C>eL-7erX%q(0t~YYJr2lEzNJb(!uxEu^xCG3jTiTW&V{96AY$cD`^35R!lQ& z78v&Dgo{A4jh{R*HL*l{LMgqJ0{?Bj@RjM-w)BN2>A3m#5 zwys27yfN+hH3m2#=x}{U{u}k{$35@mSBguwdAh*64(oqdPp%60#J%v87I19KYN!4y zkJz;%Tz&on+c&%5Rg}lBEU}5tzi7%CRP%W%bPfJ+y9PGOTE<}4TX+?e&Z7Xb!+16I zA}%t{hbd69r7&R8-17_XwxfENxiy&|mtu|1N;2p}=r=HDS~aGUdwnqWa|JB8P4=~z z<9wkYLS+hNfh-RiXMb)Prd!@{d8xL};hVAF-dkJIwJ?1;^4DM-FT>#j5AqKU)WlS5 zF?|Rt+~c5*PWK@>zN*2X?m5Vd9bk+ckD>jW;tBV3#%^*MWo2)$ciz0{0_?m5Qu*hQ%)?xl z@S?@sGk&w6Y)UM-Q=a22+2GanhSABH9Y}uAS`r*j8~;SI=l2P;KOHp?SrY>Ic!{g8w|!%29~Q)~PNRHUD}!DeXqFy)mgren$cob! z6aqEx{p_5?RM!`}oga%V>FIG#Lr<_RIrdj0>xH-=(4Hl&NRs1gcOOQ6y<t?#HNh^ zG|^hgbdBBp!x5fbHsr^gc}~O5S%p18tK z=v*8`UlNH{swja;Qs-&q9@vi;1kjRd*Y|rGUs2kDAH#VLe3hmC;A?VEJNxkvf+V2s ziV8K#`qmduI}YIKw_2V5Zp=UY!zum`inrGP*&9-4XlBFwXX3t&ll%Z|TxbU|L;%47 z_mtTTU?Hqrm&@bRTp%=q&J0{rJMngl+B3Ot#fGX87_ z%z&3(3!V5@$$lvgy%HmbN1x9gk!U>6+3Mv=#1Ontvfv2{&j9e@Hcvc7kBs?4J>yUs zqVtaR9?mg=8vE-|qH(*-LK@o}G6V#9EL%+34p%k=TAmkB1D`ek>^bv;-l~57bI<2# zAdbdfOhsV;u6?DcKnsfR#|prTU4H(WWc+m0`#&&WoLA4aFfTQxnxB|=>h-VzjTgp{ zUS_0ezi|QL7n^?H7&f?qZva}J;}fxi!w=wlj(fpM44gTK*~BKJya4j^{{RjbMtz8d znZ@A)@YNuA=sjz9u1xH(pQg!05WitLC7B1!v{v>L^y&!U);d3-o?3|~eo00rWfQ!W5Kg2emgY@?B&7U#=YgTMC%Dao|KWF=| zYX=yuzu%trBAd(g{*SNwAJ6N*ui^jyq2Ylz@Q3n89qM%d?-lZ&_vn8Mxemv9ksrdX z@Ii;;zkh?x6-n25M(t=u!+@ncz+){j2|3s{fA<*}{>MC|B4LQg+=wed=xlfc>woo)*zlJ{$C&uD70otqKd^cs;B? zRe_b?4Fe3wElMI!)yf9!L-0xNf!3GZmV!to7V>Vx52ol4)r4=Y>%ik^1NbY7zV!7RYvJe8iE z{NLX5Cy;nGCYwYn^EDj*8X))DKXI@+e(x7#9)XvgrZR@*fdo1*6JUg`ka}HA2;Qf@ z1|BVEz+v3y@4Gg$vxT zvi#8<0H1TIm@xqEJO`_G%8wT+zARD|3{iDMd<6c%mM4>iWgyfY0Y+MtEkMd@xjlCv z&(f%UZsnzGsf6w+4nXJnmfd#KcULa!8=7=%n(`_mxliPY{r`85>z^o21Vm}#$&EgT zF<=F#*%|yF&fYRC%J1zOwkZMW25FE6=@M}0?oJU9X^@Z*>F$>92BoDL8bKP7?huBK zp`Jb7{T$Ez9{=C_g^x4LwXc2dbFa0|wKS|=;~vtbiYty&i2!s2t<(?PIy`A1y#?Fh zkOq?tFXjWnH9jD+)Nus~(2=IqAyf;%D$w=3hqu*1UB5Zma_{!{;_Zj*Xz2wCk1a|> zY{PMK9wIc=HiCPKp*0h@YuW;XL?t*`JSGTKGsw<+Ac}CS``AUYz|K&H{+J^E7+ic* z4p0+eZoPu<4u!*A3N)WkXIu%_J+-EA-AJ*(N6kdkvMcbNSYlI#-On1pMFG`Zrxhfy zIoN_i7U}-Y^lL0crMd<31Dy(Jg{k!&|`CCm^k&f~qOW0xKfk`;WO zOL5b;2kQCb585sgRx?Ec517iT#Is2Q9*~E+xmZ!XSULUpo@h#BAK2PCQ%c#-UjXky zz4{Ub%R5p5u???5y@i!qFf1;}ug;k*Ry{=WFYZQTG@BJn%65(3gP!WNMv|apw`s$A-GbA|GSF(?Sj*h@E;QZ zXevKPEY=*tZ5!A}FQN;Q>ReB2KnG>ZlzbJiD)|qLE;c_ij9MeX_>G z9ulcl#NT=F8tozu#`B{=w(r22w8gdyo!7at6<)I1#JU9{$=NEho>E`@u=8F8WJcXV zE!h9-e_I_DSQh~>?`#WesBF9Sa2;3R#j~ntSP8WSPK1u&w>Kj4Q97TO-?pxVGR+~Z zkn?VU+PSg5WzQzE^wF=Mh9Gx5*J^#q8*H^EgbBQPGe8)%4u@PU-`~DVMW7SF#y;Dq zQHsK=M*Yu>u^;*zI1}~tAyWLxA7GxFg7ZP5*UL0#H){qK{!JeMN7`@&Yg<&=+)TA* zt^X;G)(B3Xc`w8TR*tQu=1gQ#$jM+y$@>V$6{ugEkjUM#7`ToiPWylyX`}__1>1dR zvfoW$Np>>9u|F7<=XjX)5+G;V`xSMW8@xezR=zo2B7rH z!S^A~LEIUy4X#WF$8^UR=mUG;I4#Ys$X1q`7ld!+hSi0s&aZ^A?TcJ=U^W5A`B@b@ zj!sTyZ*ZEL_wwl_q)w$E9O4^xzTCzgX26xc{|ycJ0)lxaU@~!Z{1wp{GD%nLQ7k5{ zCD*p4QP#7fya^AtqoR%c6-6%B#3R?vL69zerCd@|)6!?O#5VUVMYCVHM2{NTXl8$a zHPdgFr2Du^vPkH`*ux1Iw~SNZVeH{zhb$m&d1(B$zs!?dKyk3)=n29K4-aoZ)Tb>Y zqUoZrniLI9oo>qC!yq=#Bsy|2JpW`B7-DL541#l5eJ$=KQ53M&=jOdR8F%#$xoM~P z%CMIr@_oRKIIMT!pA;Cg$WM!JxMZI%CFn?c-lw^{4qWPcOrZY^eGkuLz}^Qwo|)sl za1h9g>)q_AdGfs;uF`{bw4BgBU}uQMM!aBGmgwz#0Xk{A9iz4rR55D97)s)dhGx!{ zKG5V^kmc1{RMzl(-L3deH~!=t4Jd$94Ken;)%a2hdXzFGo?Q2QB)T00i;nCWYP3gg z-+rI^Hgt*oLR!>4C;}6x-ZlZI`xv}6O-0%fU~3`XJL&)C36ha7I^DY z9BO*SF0cD(IWnxyfZ3#Vm$B0Uf_$H4u60eT^L~FSihW=noMH8G{ns1OZ1K9W; zqI+z!ixZ~6R+eR|i@WGrw8VgO%h8*c!w0mdT5iXoj_VfibA%W3I`1Jh>&*tZml?pL zgx&IosDw^;L7KE&ok8VGLbL^ydN3c!2D!;IOwNoLZ`SOS6WThZ<(D{iq`~q#zz5Q~~RpjeI5~ z=KZay>Y5fNF|F0y8B5?Pu|~10sQnN+klB15>K_mgSI4qy;;;=Qf2?C?B%>H;GTL-q zpRqOl{i{s<&G8lZ8$LoBI>nQqAipFCFUp&`wTgxs<^U?3?k4Z-X=x`hjwbA7%hf#W z!*1C(MU$3oM7V({na}m%KB$_WmuW%XM~ZOm#b3a?+TA&x<=W&h#(KGV%-n=F%5Ea6 z4jIiqh}^?+ZwmptB;r$ABVjyf2Hw^IwX9`kV%pmsrZlHyrJuCRbA~_W({18ee*Rfe zwH1&rO*zY=`KwA-Mz4E}(m#N;%=$xa6aHb9KEqy&TmG=Bw{adySfKWXby!sa%@kJg zc?|@P%BwTctvPPt6c9nzLFvi@BQ>V$C1cVpfLuI7npLhg=-Y2FV? ztwEa-bu7z4mb^r1j)hs8h%n~F>IH~}s%}!QPj9DsXdmX(jDOW9sYbJ6l+s|;q$<+@ z$T;{}393C%nan=!^=F#*B*L^Qu4Z}#da$m!m&tYwt;?#Pbc9t`lJEr3-(l{igh#{+ zTA}virC%pH>b;49go*@us|1%ObZ|?Wl7+RVTv$DX2AqoS+%tQ*7g_Dk6e^JuPNM{Nyv#_!LNgQWfh#Yp)Crk&wmH6Z2II?mILO}~c`7Oy=# z`TDBsbM-U)^NWZ8u}Yy`aLfIYlbLrXtFsqh)zH`2*P%Smqxa~m&p+w1-6+|~XT7^p zpJ@KLo-|e2HP=2oFjv1e;b+Yw-F*vPPY@Wm7y&J3un6?#sBhsimoGnaQ!@+qp9A~#=Ig1Rhlv+7wrw4P1GA{x{jMP4@I}A=oQr>+ zlJ2bl?~TA^SvfuyhMe5&QF;ZJw_01(t3v8sdf35o!V|goW>fA(NVlQ!%Oz$nlfB)X#uMe97LnJ$2@%mBGTX|29Ro@VEat$eb}U z7tmnt^9_oR(rRFiJ4f9x7t*fH#jtw7#K>D#VS36gX^N$Ey$`JMsSWSfbd|PMxIvjj zU7F)43@_{Dk-|(AiWSqbgqBjgdzeHL`aR4GV<}p9;r@jt6nX>UbqC6T3C#5wj(OC!9vl?=DUZT-rW0)d=V)G!8uiy8{Ku10X!`8?bpi3iuG8dHNBil+1Lm|P1-wH9 zrpG=IgD~`76IQ>sO%G##p>+^rS5Zucd8COh)l6GE>!V*ijSm#MR^}$c0_F99*?>OFC%{!P-SS! z=9aq+s^pGIEvt?#f|g#=%WfB90BI&&fT$#$m7sLn;J$0dYdWf}IM%h&WculD9hdKO z^Jyi+I4DOi+*wFJT_@>ea$kS^cYNv#5KKz5NA_VRd#Jv?h@ldVR~<@HSDjDIl_L83 ztT^ZW+=n|-vz9c{q8rRmDnq<(V>{ZKvM>f{jxss+&G){iHPgyg`0K(}y$h!1gw1C# zM8h9$HkBBHTQiFVwsq3YQXw9l3kj{aYGoUq!((4o* zrj)JWbXqqNh9G=7k!}S>k6PM@YusR!dCZF;8WmOLKMiX*&k&b5PQ{xnyu<6$X1H3u zMf}@*&l>J-S>J?0#6>%{TUxIPy_^K&Y6g4IZrs%c_i%PUb9ca3Aj&T*m_RLfN@#$~ ztG)&kgGySkJHY{c+9=2Xj+P(?=5X$gNF38oh+-uTwDhDQ{UF}#e_x{idld=Q0YY6V z*KfQXyjx#0iTUfxQv)wACY22a4U+`96->Mz{s!e+KM}8N;?~=i(om}K1KnP{Qn@wJ zeB?mxmmT!#>O&pEYu_q3(=-#4!9hxBaHwZT7o=$b?-v}hVlrC_!^WE`_6F|!_^6uj z)OpK|z!Ta%q4%o*Dck5n;2&7=rYfz{F`9I70>1{kXO%+(6|kbLAD?Oz7B}ZE5ytkg zUia9|YqlPz!<^lgOC)8O@|S#Lzg$?UVO8$ng_t&eOFA-_>UnotgjksGb*zEd7n(sX+u?A>CrqkN1^l33=XqXP>q|m^(M98BgLsqkVx_Fr`c7_M! z;9aHGM>49PHx<@eF&pCG{7=yM{~NGHti=;N0H(>sKTWGd+<^xpM1S3TZL-N?=Pq|h zAFAt#Rv9}^l2UBl2-U^aO0yaZWnrYfk9*=@$b)xSB~E{ch+<4FkK6=^f7MA)LVcYy zZcaNLguxd%D!Wq;W8cI>wN;^_0OHFfsU&pxx?hmwI2EjmXBNAk25*18F*7UbGi_)J z@iqWkNJh1u%4T#U&_St&DK47wXOBe2c%7XjAedyE?2Ief+IXc*0)=w5cL#2e12mDrA;HK99 z7LL~paBAifOP9Nwb8DE+2!oTa#Jp%lFgaSz)3&qP$_UHH7>t#ms+*|CYDdJwvNeWO zGMJhP=^#SomVx4$WrkTr_eB7vSo65HN_g8kzI#O%xw3BdSHWLsLo4wTLE^CgI;z)P z+t^vC&T0yEYitfvjbOkE<7{LQMqKTv8Q5gf+TjV$%u<>QBi5s9MzmW>uwkHaQ#<-h z`?FvI6VhD8f(+|ouS2fnwC_6ye^05Kp6x+L9qXM$=;i6Z5Y&fHmQO*`mESiE1(;bSyuS zw6KPbcUa?+255#4>K6!be@^`=%St+chu7;TdWu^mYjz6q-h11E%Wv+*C>xX_l!lQC z<1pyKE4`ZKK$676oS#_K5hH+gom{3xI*$y##;iV@RG?G|8Vz+r>4+w@DBNkimLPADm91%x* zBzWs2!?eQP(HI#Ux^W7UTqu@PEpE<1bfm_|ZMUA1%y@zut)jf}O)xH2dxE2uJPQv0 z0a*0`VtnjUSY2f7Mrr08H5c$7Rz$4|5FzF-vI{EGiZKl7%5-KB7a+uRCs=pFJwSA9 z50K(lpdOTo2Q{7P+I`e_QNm~1AtYLco63O4bI}`MR}Ze}(h>!vP-1mH#_+6NIzod5 z_73+VcyC^hYUm|SIPACG-I@#As$nS^@ke|$((@PR^y&nxh|$r!!tliQyWKR7e7n8r z3=rS>e;*$I+cD$oO$D%VloJSwHP8ly!`%QkBnM&PIgv1rMqo8_mTUYpDyGlh7X`a^ z<7vg4&?}_kK4I}TE0@Pbnbm~iluAJKUn4I|jbFRHkKn)c+$pyz)D3`X|=mgkX6x9~-GLMf-LPM(EI$ar_RAU(dr+dRy z2z)>nmyz29OToS)$Aq;7q;{HnYJVw}bBR}Xo`aVt_A8a#VC_%o*|A(gQFX&>s0`$> zqFNQkR%XYel~KV4q#~CsJ7Sq?%X!4GN8kg-P)4WaW*KQ?0d-Sr{d~CE{ueqS0 zqcnNAb~LO_?M^_e5%2rF_~Q||Pc~<5cxgBVTgyr8+wv)=3aX3^g3<_uVv0~Vwcj(U z1RsknG~m5+w>mr4YW&KZHelqB`M07*iVW_3HS>z~c4hq7nlx?bIMR~_@n)M9`!M4$ zy@1!O`9{3|=(s_U`Ev1*@8Q|<2Oyax{D@ZTFho@xECj%VlF}-)u~220(Xd3+p%U(5 zcB+Az4JUZc{&sp;N=&Mxj(wDryVUv`beBD#&N&0g*9Rz>?!Xv+SNj%dgkaJl|F)-i z7W?T*+BD)N!P+c$MmWU#RT0w81pI9Y++lBjDeFZK)V{@AnS(7dq}>90AK9-R2{o#3 zvnK^3&`xRImkOws^(mxqLfS$_tZAohJ-Z)FtEi4H14_Ya_bmsl;EsTn2bwL1C<02j zyaA?0DQ^pv_b0vV!S4V{p9GU?e7rN9*&H09n&Mz7-6pawIcfct7w+_#W)OI)ZAIKZ zq_J5Pa8>55;3_?5FA-BV%(ZAL8>;OTXAjh`9feSmLvSz-uc13N#fQUUsaa4K--%NsK- zJ)5{69m-gkk2zEdx-b*Nl*dfC5_X@`lG+vpBgw%BIB+|a?};}CpsW09kLNQDb<-vl ztCqbB@u`p=hq=?)QkMUbBrF@qDtn!QxU*v{n8`9=E8*~} zBk=bQwzTzl!O*0fcxf-b^?0=dk;!MM@@rloP?Y2^e*ps98?lZezd}2ZDDk zA2Wo={1%saWUg9GBcqptA?@P`;)Tzl3QV2Y6p3e5ti48TkX8ZN7pXDeWb=pkiXREP zAFbs?9>6Xww(A*_=p3_# z*;Y&9+3@$P)_bfa;F(i76H_ zLTfGtRQKLjtS3?G;?uk}5# z7&1SODIHw{uI*Aa*^@$tVPUllzmlUstJp_99lLE`4W8@*I0KE2@X>HuzXO<0FgloV z!{4N5fU_R~cVMTnD_llUNy>K6BDtb>`eBO9;7RNyZ7S=CyP+H4>Ht!s&A%gT@n$6* znlBP~y|c)$Hd#A{|6}W3%WEv9O^(y>2WsKUtgp78<0-aPf-UzoO7p|mvf6Yzs!`4t zmGxOk6h9Jc*gTknkuQoby7A|cuSo%r?7acj5C~%1qyQjm4BczhS#k>+Y<9nmT3#f zDh!jbn?T-L?tnc*p9zbc5%<|zoKj?H2)eP2h5`@OG{~&(RE^OW>wo($yeg|6Fo;pJ z!RFLPdK3Uq`zHB#5n#Ad0dXg0teUdoROrhpK_fveV_BH_ug(G;u+FCLy ztos6n%y0Jq39Uj}Bbrf5MV?q}YW^xCm7}%H zH&81cOg;OEwZ}eX7#wu29}r@m^}_$blYtr&_dmd)*1#_;7S&6htaCPIO-moQzys^% z69b_PpZ)<9o>F51A0iyZhelyUO^zZC;5Ej#H?bG(Tm^GS<#t}BU8yEzKTtFU=50r2 z-!WL_{tPrVXMVcgqcOfJrcH@Z^GY!sS24E&BwJFaZ~0~`H;VPx6Uf0_p_RR*$&O8K zJ&sv*&xlQf9R<8BL-U67b~Ik-Uy#qy?13OYnropQR{)Ex>*Dg6z1G7v6qozfB~k}H z`hAifFE%eyVRiR~32DdE(G3y)y9X@a2EK4cVxfLeb%nCE6|>FnghTTyD|=9l3_2gA zGqlRWSH>2N=Lt$HDbqppj{#meN|Hcy*6P@`S?$C>y3RX4D)1F+D}EdQs@>@OX0vMY zu`H!6;AB5AmDj`~BxcyHtCa|*_xo?Vws{4?y&7AfkaI=Id*{qn%G+Ly1J#Pcu-+R- zZ;6@@@DNL;wm=`B2&0%=AOYXSH*_=M{qb^K9V2sfl!;ov#~}Mymh3Nkaa)k-4VP8f zSTSV8TJN`;5wu$_bz7|@a=z6S+^igMUT{o04)wDI?VdA8^sYpW@g}uf;HJMNR@|&q z{$Qxn^Mk$Xscj4d-yw{xwE;ml*VGs9r*P(^`?a*q{`yP6F0^19ls9`ke(4G_$OU~^ z%F{sP?mQwW5z1v~Q-J@C#qA6qJ6)^D9#|fJdRPkL0f)hVE1zI?CTjxG_Mx@$<=@MD zN0RiKfeYT(xRE+o4!8#d)gG^8wvT#u3%B3IQzP|S1G+q}jqSRaS8eX+vi^`?9|L50 z!E?d0F$X+6ll^;YAB`16&|bRM8Dozo{6Oq|q1xb#YQ9jUOVk&>ecY|Bn)X&rs33p$zc>lY~Yy;VCx6U>IPm_V_ z0RQMqeb2PeR}NGwvq8f9)}t6N9{_ny%CXin0-*)_E_7QUNhY%Nj^O{7TmJhJ_W+PI zvLnWXd{IgvXKk-QHTPd)(sq9i^AKM~R5R!^LcT|TMKFNv=GhoDikc&k(~ck_&-tWQ zAkSFlV$y+K8t5n4#aE~8MNy;s((~FFop`WWTe>?m;~y2kr+;;2@ng>p`c7LwMCxlZ zMNyCr*7$D?8PsI;Z$bm zM*f5T4BU~JS9HiMQG5$CJO;=BG8AF1yh*;7? z$MV{#uhukz%hg8ze(y$qR#lyU-x44C4{$;14keS)B%Wb|1ePe6(NH32C0^M zf;-3(P=YvI|1Fn^I&V3&dr*9VTkq%%LLCJ}wti_KH@Amza#?xG`cLr8B`JTx91g*4fxH`s4PV>X*R(9s`~7nMsm~9wKd$Lk z!;Jyp3Hu5l3*B8G2e@v?52c4c^)-RKin^@3i~Havb+_Q6 z`uzX#D4!BPcvuTa8KZvgU)`8XvJoku#LlFfn2tM+8kG>l~qyo~zSn8q<Vi=CfP<*O zx>-R)vD|A;dpd{-pCj?>aU~d^hn`jT{j}kr4 z!|qbaggwm5%v;ioWNKw3m1o?_LA$?UT7zZ@P6%g!J+>Qj@ImLoUG+#zi@s`0jfQx91b4Q0YqZyWp7i;I zF$r0ZZOQKA-T}GV29@Cy&G&M-x(qizIryF0d>_}_{^sJo* z;K_9h1=*KYwtB4dj{}N)#TzUv!R{&;TZl)n+Ni%S5M)i7Ll2Q!_kJE`z9NVt%EfT^ zKSXQv0JxB|(+_Sf&OcdRgn2G^VqHBK9RH)NSYq98Q{nu10#XUd6)I$XfEvtaq#1zZ zi;SxtAlXO0S{`^U#}&!W4wtFIJCJ#nj`Hre1Mo6jaroz z3gi7H@EM`6GAAaZ_|w}5)Kc1}oz23=PY1m?3qcq*mUrXYeLN8t*3 z5L}m*f$LZHgu4RN+ckTtg`FPwx3^JAnm)u8Z^=&PF6qRqL zmzC}g(q2(h+-yk+UI#Ug+qpBX3uv!zdWW-g`lL5bz241aB3L4HYX*VlgYXT^q6jfm z9IgQYn{t^uAr2L?^j*Ymr2fl22r%EE+p=96JHJ0GFhV8;GI?{N3_x_6^+@WO$!QS9 zhd(_6#wN=mZyyW1#+okXl87(Y)H#(^G8eT+h`*ubS08*nWv%{xeoYwGSh3a^c`byj z{c?R7gpW(~lHb2PM?DkHHEVq=`8nwW;_`uIcgjx`)W-eh4JeqQfwbgPLkQ))rJK(T zKT%}WP;QIkZ{%zAhv)-MckIUILjO5O^0(BzlxpY36cC{_mylOtRvppcnRR@$SKqwO z(w%3P!?c2MBv@bDy;zN+(7*yi7`QauVW8!0f$lkFUg^)N_2^kK0i`Q3)I4E0r$9of<7IrDcWoh*b9@c0a7$0!W_*tjO#t{bv z_LYeilq6APUo8`^?#q9g^&-MNy?8(Z#|MZ@ZFSn-WYDk`BstC3`YWs8Lgkc8lr`&1Z2vpfOEBz}jw%A}RG0^A~D z!rvQlaTyfB2@rxCz7bf9MF=f%h|^5^h0HccfZQyHR3*c6JwdA{{Isqoku<+VOh<}L zX|A{IRS?6<7Xhx*|Zm~_3U;+s<{AB}P3M1qUb?@QbaXwof5e2Msne7~TklyOXS?*N3} zCRuG3>X!SQ(eQokq~^MRlE)lR|Dze)LrH2qEiAUeOu}@JZ@e0_y5ExNC5yZnSE}#R zg=v8#^M2^P?*aO{UUv7Ps46wJL|T-^z6YF%tETfDEfGIZeD*9U+Yoa-JE1UbT_{O` zNJU6aul7_*ML^=^;Q&)5so1RqrJX+R+9odXiaYxbJK^ zKo$aIcnaQ#^a|~P8}NScIT%Se+=6~@-kzT6+`w~EIH;6GtXk-x_5~ND`Ant!_Z0AY z*mDy8!m&ew0fcRfPc7)^_@-}B2OQsu7}#0{Izfqk;gx$ z-p*avr)3UkM}#T#obcctmQZnmPw$%OLRj_edI~hva;@B%#Nq4bh8wQ!chS`v4Vq!f`4P zym~xZZvYD7kL%wJL*2gC{b-IX?d3KA^C#v)4BOc>h1)@>)4&L|s4ZKb#IoL*B4T-vdgmG9;%;~l!R_|?!`5a=fTI4uoqyfR7 zkFxW#-9RgrV)%mZ0Q$(3dt6YTo@tNQ*6)Lw#`(!3tE3D_xrdFsRO5Sr{OJ!E&B9n; zZHu~{)o=wmYXF0;DoHg0RrSQB!mD$@+cp8E^yH@KGQ2elT^U z{l)5)RvUaWhg1@>X4GD|sLU`%=OyaPa&bnt5Oz|S@-|hwYVA9a5I0I-3fEjH&Szuf z_pxCMw3!$gOa8k2Az$DA01emuOe?ez(`!qj8^+#TgV)xvbGsd zY_GLSxhkFfVkBfgt?v|}XPF&<`sEtW4RWbw`^?1xbpAx$y#VL^qX@WkLEBBSp?&cnfc>Nn@QmBIjp>ofKzfKlr0p6Pu1(ese@l7bn;Uo7ak72HWQMb72%ej zx)Sw4lHwbnyae0Cw%(?@OzBo3nlW22w94Fh0h#<Bz>?!_UOhLKh7$uSVAwB=T2E$08I06ok;HN|DHp2v2++Gv!AQGM(5USdS(FgNb=SY8IM9J8`*YBjo4Y?NN$JI7>$#fdgUhs zAC2{M+w#h`@A{Bz5(Pr^*I0}iX%n?`2#s2DA`p%UQV@w5Kvgcz12m-DZI$I4-X3vE zD38)S8l$WJ)77{|mmX)7u7+zw%JfBur-adDSo<}C0cNo4GhCorEIL2xWc&L4?)>i7 z=n%)VSmJ~PIK^sEwyE@x*wm;f@yoB^)W4&U(&57YnORXgrMnMC%$dgMMB{NdquW$_ zRgAbPDMCCW!sXMYW9q{IcHcc9bq)Lt81yg)UVQE&E=OuFQYX1LR7dyU z4he2io85RYv##;=AidFE$w6nWolu3`Nf_Y_YsDlb>O1yRl>if#E_cXSq~-Dtqi?!H z9T)BTvF`E8B+Xo~qp6focgC;jCrN$7L*0EiPId9jRAy8+Vso~HkG}tRUHXzVAc3Lwf<#H zX6+v(!(=tpk!(A!T|F{R7RQS4y8xt9Or1}cF++mwPxvGGk#>>hi$X}JH)iD(z3e)_ z_vC=^-$)73aV^SSad10nL(GJ3F&N4_cIFN}H;VC+i;6PB{X*9%bBp#7Nc(eQ*6< zD@@%4+||K>;c$}Ne7rEtKtkIKsKB!$)|+lb*i7W}o9IlZ%`k>PDWs&QandZE)|7QvpsKZ#l!Ya2YBm z@I->?-)29=*-@QZ(faOvzo_bjBh(j)CSND#R{1IV;B;|XzjqxEL+%b+@%tLBnPdh(-e$sE(+;U+Y7;1}cUuevdYaybx3AqB#3BP6C+EXRJ0UWrb`NO;7}83n! z8jcz>kq|53lj_hGqp~5$oI3;a!w`^0V$1B>rR=KGm)pjpkiA>>TU38>c>(U@oGWHrFedGORfTdZRNMDaF}^GH zu@zt~xF~^VTq3(~%$nI`%dNh0&#%~=bWFv_%}n`C>6+JPClE;r25nXTxCjm5ytLx~ z$+WVq6!0g%SN&+4cVFOo?MN}bhckXdC8HY7u&8>Bzd?FZiB0F+(USHhNpDfG{Xn$j z-wKdc58fM>O7)cZ!bVdC=_L2yr0adUe{*HBV7Wr8WaOo#u(u&yR9&u8z5oMyX!^Nx zbdLIZ)?AIJEhw-DuRu5s6st+qhY=0_nGpT+pGEa25$jS@Q(M1Fo*kw1nH3A{ViIbF zl$@wNb`?7R=94JIn_geTp*8tcdnlyXwj9v^i;3@vlr3A<8M~&5*&^RV7Y0T_& z1yuZSc2QQLB~6u~Vl{yM@!9N(_pRbV3e? zL8hj>vv1-QWZUwBIMJs=vVJn+5FS0FP9c=leZNo@-JN;c10$@Ck<{+2z7LtbJsAzI zhbkXI&yzpP+NM~CFX07H4hR+K6THxfA^27GSrp$xC;nB6+EE@y$a;&F=&}-4;!Gay zo8+fxw+Hy8$COGMzjX;K`t9t5i-sQZaqvssf6z^A*PjQChEOQ1b?GfIbg9}QBb~v8 z&7avCV=LLFk4}?+V!PR0>yHy0OLWBC4PThc5`Co-+s(F`h3jT=Jb}$lr}iV zr$EhTE_MY{sEG;UAS*n~aqBw?|R3~M0SX=2;{>SJd&@(&aR&wXQd2gpx zIK#&fy(k3+j9oaZ!?IRek_G;%#6U6E4LLzqbDv%O!`d;7Q)40N!M$ofHVyYIrRbwb zn-_U2#R7HgZi6ms{xf90GASqFb?#3UWYaUcpc;myes>+k6W#L6F zS#-NT~KGBRd(!uuvZddtV9MI1k`#Q|9n&fDtx9sV7(Gr+LS)(a_&t{E**_if2{ z9%={6X+09A!JOZ|4-Jg2D6hIeZio7&R!07?rV3QzZ2UCVJH6W1A?1|J>bST|BDUZ| z>53xZd}f5}oqSu)?-$#0S5u2_1i5pKFC2FGtI4Idc?qxHuo9^S4;FLv{q#`4B;9h= zi8Bq4cMORL<6z!t9#1}IUft0i(OXif6wU^IB**J4pyEEfoO@I8`?WXSF|hykhZSDS z!znd1pk_kR9@(4iWmn)`w!!r&i+MK|^s8udS%AffEJU;xmbE%QvViIn$Fi zaL#$2V&kz<2Pi{xkM0e8I*E2IeRh;{;5sHlAAvfCd8sp^u!K&!J48S$x}v$4NqY=l zk^s#ACd&d=C+sgn-Kd=N_3=?kq8FM!8UxvB^$c?})t^9AtlM|BM}lD<41{$MX!iz& z&vFkzSjtg^!ej|FCv=Ve%TR#BR=_*_(d$}BK7mh!X``Ej=NIwuJc3ZHpIzhE?@!Pu zmQAY-|KQOI`?S2@Hd4gOu3;o0e7}KKeL;;Tl($@Y9H$g8nDg908h#tv8VReWV7?yk zk3f--HHoi^kANy9gMR+4di<9SB^{M`=U_h+5-YUJ-H!tem*lH{0iL*!c1jxI>@Y^k zjd=b`+yw_6Tr5*H5%sgI+jnE3h40HO=PX^9SQ|7A8cJuCwgSB3#Z2>*Sl~=ZriKCw z0DUC8PjYSCrLx-fKv|o&D)Lp@xfZ8OURHuryMf)=sOE`3Z$Va^n#GdLG!ed+e_!0f z%j`d00un7*t-JQvvDL9K9k8DTV`1zbD)OyiAkO&5citGbOjLqyQ3EAo5Qv{36$;d5nG&Tu) z!AL;OXHvAJ>O($P`4KB?{xk;@eoNY)_MYfBMv>EiaXWtgjkbAVR0}H;~sV%05q#^lZ8@g-doP+wV@Snc(wS+=Gg7Ueq~{zQvTpuDlfG% zbZVxMX7LN{^hx|-X^PVKlpILr3)sU!F4|zjE-NyTW!XQALz1Qrauv266G6CX#M&IO z$i7(<Z*`YAY?^Npjkvs5jm$eN!|pD(42$M?G_t@u#RU@GqG3 zX{D?2ayIT&i z!Teo!+<51%=0!LAvKpNwu5}{s}mkHR*D_TS`a=za zb9y%7Z#+{8B}=)}kWS%lZ8u@0SCPoY6i45}A5NgLmkAn8oR-+$2?ej3YSW$l{x5Kx z?#TnS2r9#(_=Fd&0-rw+N~J%hVOc6rtn9b)cr<7`O%~vJvRc289Ly4GnQO$ zZ;IWf1{{_HiAf$%S@!ZE1-~j=T#AaorEHmDBB5uPmG*?cLzXchc z=*&9sE2S5&#*vwY#WSkO8kW9D+&SbrYs)*A*;I__wM&1T zTiCk?D(dCLQ454oEdJk=pEp#c5Uc9B(7nC%6`-k?GNN| zk%wn$^OZSC&oKtHtVW5pp7?3VDtL^>C=5l{jM@CD5h59m6`5WM$g6yeiI;!K6!!jv zm*LgA>Lh$Y^R8@H2(6hxMD|xq;l$*8qgj(;y{+E(Y9x8@%QIaQoi@zO-4`A|>+)Ok zdW3lDx-ko9&2Vi$f1S|_nAwVHDE`PX_Z5)cS3!_xZ8bmTDvCCux#<0D9$v$Y zXIm9;yL;l`pTODGjcp{Q6mDzG?n@_b3o5^ECm{i4anvLK2wA0`@oYum@k)n7-6+ zm5!>Y2kW3ip5W<3xQVCK?7=2)34w0`njDB0LpJ0!Y6FE=7}ysQDeP(U{a(teDyIZdqJw>dEAd=`j48Ce)9Dzag=+{`Xah%ad+~lqj{c;;z{Fv4Kmv zQ}p`GWqn?) z!rVy}hr?eTGRP=*IWB9#5m3v1<9*W#P}IR|b(QDOmMNZ^;_%s^M_cQ@v@-uZlVEbu zU6Q<1I%vRlCi}(Xz6KL=Fm_(uenDlL85JoLZ&_u(e?q?k@7VOasw&*y%%jVftd}6B z)*LwNTci*Mrtz#0bR&hdCDGE9(SE(x^BQTL=k}!bZ8jw53#SFO!9`9*x3ryYkc(_Q zN;{wWGa6PNQkq*_9+OnC?J7>=E+htVbQxcKAm)$1x3Qy`oZYaJB5UzpTcJ5oqo;yLmnXfAs3N)DF3iw322?33em5DJyN> zEB1-Mt@loHuB9?$us|`=eUd-2E9nB~TZ!S!^*SX^G8@p)T|idynYNPqyRpJg=;sG4 z%6cFM-ulb5m(gFJC3$~rmfqy9k=6`AVlj%zd=W$xTt7VX*+9SptU92apKiaq`LB=_y!zm5; zSL@*n*{1;;cSF0f$#gaxDV$%T77OC3O_A#ZBVcUP*ak8oQcDfRqi%&N0Nobztk zj~bOlnR>*YYxudJI>EhyWlE+nYsUu3^&YtGwA+%+TvX^m>FTrFt}NAtt_nzWpaiTM z20>^d;`NZuz0XIez6W@?-6jxGZUp{AAZ&Paz1{y1V+3R-%zZ#O>E0B&^ zY**D#uZ^2z3GCj+PJ@z#fuZaD3&?8+i;{QIHay6TcEsP4CAT-trS-j3ShFBPTB`5& zR5{YA_aUEfsAhis@xMlbd{F}0e=@94p45rCJlbP#DM6F=SeEziF|=hXC8uOtz0}KW zDlz>VHUEDoJL|Y8x3yt!MNtH#%ORw@6{HoUB}F=6addDgnuy5qVE(a7{j(hiL98)=8NoUFI;O4sD!WruX( zt#{WXwS9@&XU>VSM_Q(mOdib>79@X_-zIjNjSIl52ze0*T=N0>L%di_T%_gAu`h6R z)}mwU`U0d=W2WUKN1ib`bb1d?OvMmJBg*Yl!`&oDIOj?Ko(Nb|vcwyvN{z*C0c6&kfs`zepPmX%$ndqSz7uW-m}6IKjF zG^i!+Qhqez70vl4rVd(H)}QvzeF_Z;qV{*q?c`>xQtbbLe3tyg7Ib;-q50 z2TS29QKF(>z!7JIos{=Xmqj3VtLs$N@&K&1sUya~SEu3=64f6WQH=o|>c(L+1g&a~=%bgnHa3ie|DI=^TUDXNH8wDLBZn~TZ-Nc3mx&th)=9_yMx69TWict1^ z1)J!<79W$TuHa}Gytqz=P+bS^;h^8%A0-L(6=*hjl^mKblF$)pRt2u2uDY?XB!-lL~>a zL$&OZ&Pwb=1hitT#;9Da)I9^HX2Rfx7cyrluU^AM{j8yaoew=G}zUZwxp;->di8>+kA&aKG_Xt+`dwio;}uWTKylvz?!uic_ul5jcD|20LT?70iX zgo0CS3U`^bqKh_hUhuF^9qC-k8zeiUe2VT)#bW)I(%ZUM5sQ0eBCkH$IkBFf+2vJ6 zM)fjmq*z)ODLdt0H)q7XmH*--x1rZW1xI54IZToDjr@57gtoroU$>=1ifC{lqE$g4 z9IL~`^CGZr0n@0G?4}+OMMUZqFR{(Mh-#+X^o4O-`~5(Z#kxvjW`3ps1)1ATrCwJc zmrN(Bijnu(00ZdUZZkSYrekHhpT>HzRMcsjbZTRKllbmoF~Zh+(ad?g<9B$Myw?QO z4uP=ZT#PHo6Ay@UVEZY$ck#NGFYUp0UKMeeI-Z~Kc!=Bqn36&GEE*n?5l-5G=&EVeSBX#`gg<9+h>aw@vE9KhB1WGWh&4|0EzE zi$c0{9O&Nfmu1lIS&=C^%oefkzHwC$o=y0N?i~mcqWM}N<9j2o?K_Z*DzF2_cd#6_ zi!WZQCOVq~_cOL=9is$Ma?+GH>*9!?!Ut2%htggLVglkI7J@6`%%@!?I9Z1Cqy@u$ z{nj?3@-8dC{a-3SjNo?-)(^b&r9TQZ;lgf`tF`DBwl=D*etNl8igC|k2Sp5Ft z;o-27`FDYpPw%odyKVVS;^2O0=FE!>IE!X}%$cyln*1uV&$=gL|zKZ1>X4VR0l&kbwS<6^tss1eS1)?=Q_ zW6}CzMu8|0ql~BLCM)TIJpFLJ znw)^%E>dNR;&S5~lR&kJg5~JE$II^A$UY0!%fk&pSD#27|FC8Hw}m8q8(hK)`W$>% zzJNhzy7$0Vc{DgLp_oCGOCeCZqw8i`K>QOY)07Rg+h+3)vlp^-EL6;eP}>3B#jSaz zK8{G4t10VvnfUEVqy2{#*W9KsYKUsAqorP$rnZz+Wtq(><3;qy+@F@TT;aUZabJ1E zBHl@D@yj==vCv%5@}<+aRhyj;mSz>_+q>&Mx(WTJ8uiifPrN4XT*}j=>)xG21;4pz z)l<$!s+Pv$^{5O=3g5o4FR$B*e<0BmSaG40tTj<$E%cP04|XJ{_q;UDceYZxH&oQ0 z6mwo>>A_m%hua1Y zm~low(J z5wI4@&qA|4*-r@PUa;D)kvqhB-EL=%6BbQ6NYa?;+HZ?&EKiW;J_Bm?()oRtQqdB& z#mZ|Y@=(~MUhDmD9Rx~MOw!u-YkTF`#{vb(doF`_^JwTJpA|xf$%KbVf5?1ZCkA-d>e@duUHCRql_c2j75=?KN zE-8b3K24Qksp<=$^|*mu;?0qvsP}LV^gFo{d+)dQcdIfjb?*TfWkgsynO6th;7jGy z&2Is!#tP)d@k(f%t3C_s`u@DvMD|wlm)Sq_vc79cnbfMX;}vko%Qt~yNPsLa*5=dm zT`)ACJS?vz(D8+odLt%5w*FdGue!4{vLHh^7lpf-lb20$-}S?Y*5l7C_-q-3hTPpA zC0e?y4L>IeI*GXU{0(dfv@RmWR)Ie7bo#D8-VA2?{ERT^l;s4iK#2}Kv4wz}6zk=9 zURymlBkq+7nwskeTYirG$IWc4Fl~7wG)AQ?c2%?fM&{uf5&9;y#l#r&`((qLaS9^sPlilxeLA%DD%3X$Ew zsge%sR0%=W|)6L!nZAQPDCdc~Y0Li!{=IMuak^oUK z{X7Qu=I2u9kY@)IW^YE2NBpqcP%|Z{5BgnI&AG8_Q6*UxW48(rx;6Z7c@dJH26F2& z13m6hg-PTy+|WCUAKsZ3g#^=Bm&~41gtCKs#g0{+t}HnlIyw)|s37*A~g@aiJ?(j_1g*+p;BA$?8zClG0xIXze%! z@voN*2kW1QLVy3uGK$+x(Q~tt>D+jKd$9gfOp-rB?^tDz*~gcg;t{Fgik=%{gok=^ zwdOJN#$#uxurw$?qH})TbdUr^CR4y$Q00pCwE_I3P6fOA^|X|{S*1*T2~#15;t$9h z*xwOA=eNTsv055>q~{w&qW4N#RE+DMW;UZ>g3dDR-gerOn0=UbA-*50&J&WPlk<^7Cz)G(Q=&=^slbVxUF3jMYIt$ZT?wE;F)??eI zr;f(JLQ0*Ry~Rjj-6QM*Fual;fGki5}X;e?qU@bvn!|DzjMy7?f#Zy zY7pma%g3dl+vG4zF;)Sl3;DRUv+X3o!B1>Urm5A6k%&r$J-TqiCmz_@@&+AEQG8la zp;`8KG#Pvb)anh+*Vq`s$MQ&`o)x42Djb-wNu0N8Ml^%oy`#b?OIDcECM9BoVExcb zr(23vdLZ3ZXizmdErPRkbipxf9P8I7hUudjx$(V;?JPHu;naF7>J3q-bl}}@f<$6) zV$-hn%1s5Q0H9M$8<>+fQDd052K>l&V#&-2z7CLEXU|d)O!kO`4cwvStxr5B8q!-U zS%x{g{?w@buZ1<=1Pit6yB0biMT#tICT20d1yRY&tC*jak!6;8y6?ZPUuEZW0Vm{` zNRd`Ue7*FJc0;LWG9grdV562jK)#V&q9wkqBpK%cwvi>(V^YI*BJrXr*0WmH9pN3{ z!(lpyN2&F4(*+>;-vB5XGfiIie=ALO=S6pKueL}Jd<+f0&0&U#j&E<_D_&q_^qmWbbJxMT=)M#)BG`JKsJdWG(#O)8GVcE}Bn!$(u7Nhe0r``QX zvf`44JdwG)-aW(3>(O3;^(>bIMukOiPPP-`zO7eDJ!GuSI%O}&cO>OIiL#kJqic|F zbKE>0R<{_h$ng0g(A)E?={552dA-+92tZ%)gJp!7YQPsm*z|eLWvnc9>Wjdh2AF*V z?OCf44gQ9-)$%Rd3EUT_-*k#5weorWamd(@NQlrUmj}bi_{&_!COt0(l5unGncoOu zeIsGy9fd`=GPwwgY8Ibd%6b^j|A)l{T1C-8VXQXo{H{90Yl?_M3+S^gC6xmTT%)PC)Ys8Mm5XVVQh@ z5*bx3-z_)sSHB@wLhNU*O``RLXL9FkvNO(dVB0sUf9j2W5sJ<$D(8POm;cs8!JUKJ zdU+^?Wt)Kxhdxk{jWMJd*<_X&#q7W{}%^(LuOC5$?1ul^@Y$kk55(TFzniPF`>n zhVvSqO>r~Qb1{cf$_CW$qL+vS3o?rF3^Kroi}C34?q_=<8oNhtFwFm4xM6OKye%EU zmhcklFv_V>5Rr?uzj&Rqyvh^tnrTOyeUbLJP53lr!@Bu0>r&(Tw4`WXYq9Wc8oHl> z17@^%=k>3rZ0P}1$Xp}e&)!sje0|C%={lrkpVFj=q!AkXh+36iaF7)e!mUG@UYd9?2fMBC%o}uLh_8M1St+?R_2XDYwkp9HS==#^U9ikq>_t z-le$ngFf%qIdJDa;s)~}SlrIC3NmDVi*>Y6fo_3b%QvZIVN$I-*I+}qYnnQ)&Ekn0 z75wkSNcunPa^E+JTPJU!xt}-6{}!05YDeQYjjo==J?nD3EU%6V8)S2-(bm_OXP?fq zue1vDZogQi1kSs1HtqUiN7cfikVNKzq}m4Z5iauFm*cKVFE)tmC003$-@fvU&d5FJ zaCu{|&sbjI3l-p~z50hiG5;zl%k)(9RX2U9HZf+2qtVlP|CHOj$Mz~y%G(Em^+Qd5%D*$xX~lxxC6@l!cx=GR zzY@>70iqx3k4uo3jl=5)?yhT3{N5goAJg7{4>G4T5kR)ql%ndjV#sxUOX#1jYYwk6 zh9LYQ`FMz1c>+!z0yy{K6XBIB{qV^#%jOQzp!o z*~0o9E8TdI?z*$`AG+%u6YkV;B3=Or7MEf&7KV>~w3=r3K`)G2!&>}LR_O#a|LYS; z4=6U`huv|v zG0I0x<0?3_@kDW^6-e9~*esW6KfV zC24l?yPC#A!N%7T-j+8jMl_Co+YQCg@?%M_=%_Cpk+LydE z1huob7{x>h$LDb(trem?`(`ugt}%)Xf8U{B@lhc>ggn%w5Xk*)2`iYo#`N4|7A0Q zq>*~=OUYf&&p%399jlo_30kO*WsC9B&5{b0JO~Qyg2f91FGFC#eo!vNlU(}+D-<;Aq&|#}%B7{>tSd$C*q(nQBH=ge^ z8O&M~n=^CHD2xWnu8CY)OWm65o#dP`r8ZvhnY0}Y9i)HLa&UtL1zEIh>)A>lSF>7Ca%tnN{+gtK?ILLw@M^3CX|Yz$*2GxG3;nX@5>APQpYs!G)-nWtKO2}fx7RJ;ebL#o5aEd?M z;!8I0xi6eBiJhi4JIm7EQa{0?G*?n`m}|GOs!&q@oZ?CecIKZ2Zi`Vb3h@?avvMa# zvIwL30-L0jm00LUY~?&L?Lv}2ov2V`eRV84*txaKsp`1sHOSk$G$KhU^+BEb`#g@6 zud)uey1RkHPI|;}S0>rxV|RzYyDeYB{N?RJ3I!iYA~}y?%T^?Tl6iUW(J(@6T7pSN z{gKBJB7^g9!Uk?e^^gseG9`Q}Y6IgZ>`taP2-w1!RgZI?j;0SwZ}%Le&{E5|5;8v; zD(zHFXeDf43sdKl>9n;Y_8|XFyxi*oU|gjPyA3gqbOZ?K%b7#p)*$c6tE@gHPEHHOa%n2_MT8qi%(HBG0tj&UO0u zv$H?m|0;&UZt3(H#K$v_@Ael=Cj`H-PC4Dm&4;*qlG2oC!6+N;df^m)8pKMZhOdK& zc&_$1W)e%4q@+`+U`vg&VrRk7I;#)o;r%?%Ku_HS8_^27^Sr2vT|DvA1>O-Mb#>VI z8roTf!X(p!p|URhBcTxtLm?CCh93lHb!kd4Fgk~X?uHx#?fovTOi8{(C4w{^fdQ4d0y*J5V_@^|rdOKOrK{)}jj=>BStQa@ zn+6*uubE~xv4|j?rG1jb*`~WEImh)J9x76qPSHuVXlq}UhRnabcwkOu!!NF=AM0tt z*A#$NQC#J>(Ue~refL;;-$k2DY~^9QCP6yb66Af6wS%U5FrjknSsXV-q{m`CtH_?Z z8^5lEznb*R2L)SMbni(gUJg^Fok)tYX|JD1pOB{@SV&HcqlWpEYZ9NmocgrzV-mjt zJ2TX?^Zw6vE5p1kJ2w+!YU%bnt@hdX?LzS=I-LOtaI2Eh3J>uF^P7>!M(Z@c|b5NVnXtxY>rjT4b`C=f_q+Lp7T9~Sl zOh;=m`2z!^lX`iv*YEK?q5Lh#20_#QQH6ArkI*)R??KRI>8fvs>WsdT>8A8XPFKP# zS>$k62HDq{&VLa?d>NPg=+*1q2M_s(sAbJ|Eu zqb$zNk;X%ik4XD|!07i6@K+R=SDD(%CR$`KhZP54Bk|A-(`#;r{8F4Tosy=MyF-VPn8Vv$3*4sB1r_U0#{AX zI@zV~dHnd!%7@cgl9@+wpo-Xw;6**m%nq59y^FL{eo1B*#rp?r#{_-jJ`3%hTuwBi zCxubwtu>WNuS4575k`8-V)>?%B&?Q%DwkzhW!p9`>Uf61?yq6-%>j1NK%`noR$gFl z=l2vP!X9ta(ux>TQLX0&XdE9N3p4*`Vb><&zy8dBk-({Gbj~Hpgi?8t>Te?dIrJzW zOo^;U7_)pgF@I=B{4&BCNfznllHE70HIr*YnKLXe)-cNSDuDL$p&FIe4h3^3pJJN9 zp7+?Ej`ybs&*u1jljBZ! z_Hva}R3O1P_|4GDO9JCt`WnO22UVsMAj&0_thZ=uBr^@0n+A$^tminIeWl zYaAE>cN;NP9;DYM&zuiH(Muu;e5A{JMLZ~KV^K}x-)fdUHpUCKx)n#h@g#-E@?6F8 z)nps@Lt#}-Y;!{`_M5hz74^i);ct*meUzr8)qrRFHtFo4G^Ub4Nz(dd&d2u4&+M7T zzNBZ8^&%&w7++h!e0Qk4IFa$JdXUo(5l((A&BI`0)!Ewe%&&R41LeBDI||GKIA^H> z#Rtfw*J_XEvQcroQk{JBA8lD}z2}g#LJ{oNABUsXDMn(QM_;CL#fXO)wuL?EklBy7 zjgQ{`G#cHiGI4=z$cl0))pyIJ?HZ|jefv7?upyWk4Xt+$l+bCd4);C3JP;?-{_lnU z`oX8o+pydl^b2GnL+m6Of|O3sxS<{}9L)ll$w~{4*g{4+RZ^_}_p*aJs`G4W4&CxVYh($N0sQ+UX4BAWIKcLFF6GF`|@T%!v6hZ zOqRcxUL~bfVGz@6gSdH&$G2Q3qqF-?%b^KZi4ycPLFROvYNeDq+QB)KrJbse!gZ*oUmQ4eNMG7tZ0puG3 zgUv}+jhG#~N{CgfY(i>x-opBITRvtlDdPI`jS|ZPb;-vMWREcOew==x>}jciJVte% zsBT-C*Kehh_7NRAS64EHdawO&+aStmN=t-VeWTvv%KPnMp8SAA#~HFZJu=gbh$}Dt>4?)6>>9?{-XK04FG@DO=`i^_T&e2rnh`B&SC{DxpcsY zadl(hQLkAp*D9&Q^UQvXnsh<(z7;R{(c=owZ+%|`G7lC3Rb=f_O@ktu7W-+bTdRjg zu;`adRm<7Vl&+MWRtAgB`%51MZgGFP$m>bLn5XZm$|NUtd*!~=4uL(mnQVb!+3$G@ znEx8vU#)~W66qv8hMoHRy=qP0_fGcV)C7j*Ntxr_sy9l@t6T?6oHHO3(u5&KY+DCX zcg>q`Fp~`Rd{tQSQyx!zJEdQA@$B(qLAE4UPD+#4ZOu0=gc9hue2h0yK9<$Cs1mm| z(+Td4+a>gn_ajGDFMV(Z$KSL#KO22lg&GMzfcDW7D6C$Ie=K8jF5H$UG_XnfTg@L> zxDRYTfG!4mAZF_K`>7;!6U8c#oSb*2cR0Uh9P2sarvg4eE#WgwnJ8(dkJoGNF*KW{ z-Z2-kmf8+{GK6G}-k=O!mUsfDve{Q9@&>!SWbX`#Soit;{MIe?vFtLV$#Bd0E%lM7 z)1p&g;=cg!Ht1dl!QEoh=#bN@&s2l72fc^YU*@e?PncB)C=vDfnJ=~6Nv^rY=^pim zf_6gr>n*Y8Pj=0@`~*krTeO5$qqY0O0@YTa5`y;gP20flFV@KB%-aq!gRA(?xuYR< zqG0w{5w&}km*Q%=odarj{EI~kJ$Mm;_r`=%dO=TZ4}RG3m)C!o`z2Poa!`2-coKYW zG>6;4eri(y`H{*HIj8Ao9M9+ldqDNh!cVMj6QE7&xW`L(x3q4?X2{#OU*T&nzZ(ur zCeMbbDdj!FCnsu{s7X!cqhq}BY!#%W-u>}N>ZkYDZ{lrnUXf3Uv0;G>`o#TIQ%*Tszx{7V8VL@U zp~h?skm6yegVTqio-PzeZrIfJLekP}fJ@*Cd-K`2Y+3jpaJ0b_*C9$I6!SBKwN9Gq7GFShN9lZu7dqg zvCOm_fz1@viwqwFv*~OH)AVP~33Fh;pG#uf=D%)TFDP!Q6Bht5L;}Q=szx#1=84i; zZulDNYgV5yRUY(3<8Hd0nA5G&=4WT!4Qoy3Pyj5^sxJfP!`93 zPwER|b+5}SXX(+#Ch2zNq_Slyi@y0Xi{ziCjz@7wV@WH33=!34@{`QR$8j3( zpWr~&pr>J-t`L5Tyc!#+v;qQ&8{p{$h*p(==7tx4+)@|RWO_;xq!|F%62GuumvXJCCxV0 zh%ewQv2?o7HnnM71 zRlE|5%UHuWN^1aW;0mFBm@rCfMio+Vz98Zt^xzAQTU>3YCMW1Fyk>xN`_>ASt6AEC zF#pj@U>J7=#0Vw#=i2~(mgF5x z)!*$X{|^e@|IgF=*PlK00RNGphqkT)Xte&$dlTTB)1fTrEV!_(YU(@nM7^wE`s-&* z0q{D|F0=zUYK~(n60LgR+Eu9BQ&#YIhxY&Y-5_T;FcDjXKqg{=pr>p%!H(ni3*bRh z82LebY|^nvWqjO^MfDrlcHJRrD2U&yzW~Z-3lJtxuyhP(mY{s#OMw|H0IZ-sXz71n zJh`Iux$<*v=og@|S}+7pX^?#DRfX4tm8QvaMIS($YkWMZP0xN6NM^Vu04$XRWuYYh zGQIKdJOKaD&_5{_l?L4_)hTXL+ywnbr6K?|J`31_>kw?8GI6TyxE7*?5BaGe(g8_( zf(q1Gc7Xh|)_W;dwrcZv=_)u4EJLRdtC4H)p_?cyGkDL-*N__a2XrsC&X%6N28T!w zp!E35#JmK7w)UgTwy`|9TM+FJ`FWuU1UkVO4P+!y$KN?AMx;GhR9Y5@Mp_Ot(Lhi1mq07f@BaE!Ql3V#*no<10X30rM{Kjk zzxx#bp^X0@-WQ%wXxk>R8ieA;4ya)bi@t+6d@z5)cJ*#Gt_${B@}XG(_FEA6<=Dk> z@OVmp%x6D7%N0me(@=e5;KbU`S=mk4#X9|DiER$ux54dq7M>2&Gzi)$`>JWstU9YE zOO?ntIganZR_`gerpHD^1vXNcFLoLDrSE{mdl{svT!Y|u&42=FaT-`f_#ZAV9;| z6~!md+nxT&ZjRtEY4Gkta7gNM$TN)=n}nb!I?uS8xA^LZgg0^8RW)z^>lX8Gyw6NG zpa*s{^2q?=O|Jg2aWw+G5=&74A5%(BUigjgD@PaCXv313ZJO8{4=eWvG1Kc7jd7-;ju%qm{~xAu;|AHo;nr(h?wd9)8H zzI}lctr^fc`^BzLQGcmKAvAJV-y6Ul4lRI`_Dzs))BB_{+Gh~5`_v*`~4rnyX0|Z7h3Bb4U^UMGSAa^}TRlD3yAAi)@4g$v;tBznC7@W2mOZ>O5)5~+0HV8?8N=o&i^<=i; zE3)2chmAnci>D(2wq5gIAiEV?YxHRGE1txDZ1fX2wT%~t z-MwMC#&y3#xAPHbVN>0CL6ZrNk%nBxXZfMDhBoQR$!3@HLx6*o$}APttZ!4C&xZYU z-Xfy~#N=N9`LvV?U&N{hn%gE4Eq&fDYVmSmfdjzW%Dw=e>m@)+y>S8}t*Byf+takX zdVs=fKAJL`=T&YypQzYjhZvM`k-U^+{Z~NhIQd5Z7eM({U!%b}gc+o41Qigw5Vlmu z5Wu0I3bgUbj2O+G4S87jSWN0zZhv*jxoWsN{yBS(z4ZI}nhS3oX7G01&+;m-S3z(H zM$vq>l3wjfj!FiAD@Z7A(a!q-BC4_!(e^6AM&CfQMaTsB@t*z7FyF6tI6>Kz6{xf=4M8lKwvLZ2~Oy zp9X*)8dZ7E(DUcdxunSOK2kL8^Ivpbn_%=*@p9P@uy$kw>c*HMZQX!x0cz%7x=sIn z`Tu|W=+hfF=T5te{~MlU1=IFL~pX^0yFFt>b@h}J}3B<{W5HV$~)0RPMfc+$OG;i*(K+t?0jj$xUee%&mgOp=MQm!j6Uj#r-^7*pXX`2JeI{(L}Pg$A*yr?iWOn!U+##< z^aLkw#=KT;4qS?U@y*FH;vgtZY}bz4?-;;OB_ar~wj);$o)l&{D^LeL%C|Go;z1+6 zd!9~53Q~R>JN+^2I|o}7j~!y#dlEwXyZ4-&;DbpR>ck!FAeXa1Ki(^v`71BV zDK1Litr6(yls(^Q{fE0aj{mJ&gY=p6Lfmu?4ahbix3;Xf!fb`)?FpWrk>qXAY2C91 z>PU#&k5w0v@$+@wg; zmHUwV9rIi@gg)bNK>X2C=kG)DcrPS*X2pD&d`YAw5?RJ8`c4zB>fPOiP2mG>syhQ> zRO$k71<@j8XoSF@mTRkr7)T$7&xuq%x*9ZlThjUnY=qv-y8KtX^ie>G(QD=0=-ns%V(z=(eS)I2=ftbi6&$;58V?999!PuBmq#aR1#5Qe|vWaT1;>D9H-LY*g@9+20I-SQj}Qpqmh+tV4< zTMdBU92NX1@l&a6S<%lD8%4SMpKI13>K$}lJFq;&Kp4xdFzW!!WAVBj#@@vMnH<4ZoPRs_;LQ`e(Z1Uuf}vPZa(? zzv%CJi)sEWb3Mh3buAYwT%cI!o?65mNN(4b5o|q3`d~2-Q;k>y@GG=GTBn#7Qj!b& z$&QIV^He6ea??A)p{1}0;YX?Tgk{uDqnAYyM@$`ekH>J&a1fz^jiQ>l-Gq?urM?i_jD zh>_VPhDbzgz#ZN%M5ZjAUtTh2I7%|QKX@q?<)rUHU@avYMOIE1g-UW(7ucn8ctPy* z#&Qrp8l{}SLtYknv!gy@+vKk78Kef-g1hZ>EQPU`_np^|BtL zRZqff#}hmJo_X>y8P&(%^HtifZOBtndVg8KcI>MEQT@!IxgfN@pyKR~nc9W|P)PNgW zpmf#z|8ODuzfb0z(B0F z?#s$XaA5eH8i;J0K=0(o@Eu~pK2Jux)=XzS!qD_Yny4dsn69>jWayG)?`lj_~wm4>a-v!RxcQcfz zD4Saf;>`@c1IUs@PO3U|2G2Qy5Ph$t^^QMx5DbvH-F8Lh&y=H0eN7RxBMMQcUB(oP zRDdX_wp8O}L;jTJC^S}GZxmCI+;JkmPBhok;M{rQ2r&1#I43oDyNU48baL}JPAKy4 zDIWZxy@}VQt)+q1eY@Omk=Vkz^Q{8i;hvASu($jcuGB|+O!Xpo?j`W>N{L4N=$1+i z=!%IzB_N|yl3TUKyWSp;zf?K$bV%@mt~HzB?)Li=$AdX`DN}Kt1*;i8LMhRHm2zpB z*t|gh%mC9;e@Sjo0k{&bY+{jI1j zc6wfoj18WyJqCZazEsa#(2Ie3-P>NVj1|}Cp8dW&m^j;GShgLA85 zJl8rrYKDWq8%l{d`!?;iUAR_0m>tu~`vBp0XPkcSM~ExLU!MZnWaT~{Mjl_9O%R;& zTXCzK{KW`JfE)};cInvhFgh(5TVrfn<3*cDGX%lSO>3jPLDFbdl++6&(Q#Pf_giWv zixAlc@NLQpqxlkr`yk)J&J~L0cMJpbIEkqRdWu}JaSDuV3~E=Fet|cSq2jyqukH&i z#i-+j@a@V|`Yadyr0YC*XuN!ie5oS#*nvKi2EO30qp@wS(z3L76Qcp-TdKM5@1<-8 zuA1R23uO|S=Hc!Q43wb4R>em-=?r#2KJh1SIg7+LN&VK^YR>8LufIor{*B}W_$#kD zZuT^HnszA{Ggqw-Bi<4udhR|b;7u3kX5VKEqP;D15W>eW$eEY;ZT!@peP^ zqk~f@`EN9%mT@_k~!Ogtjhjf<;iIixI&8XmQ30}jMdV#vmQ`W<{ zBHY=#c*B8)ONZpIdhwx(LS&unsVa?#-qOg61LIiQ%)GDg@LqA=yTqamkV=E_O{cz4 z(5vKE^@;XUQIGK=ZLH~A?rsPVv?>Q*06H+Q1ed0kVQ|-<$$OHc3j^B3^g?rqjekf< z^5P`zFGS)kDhN{^eUkUy?nZHLlX_`g4O)uFC+A1eQX4vL#)!Mu_1V$~rN5H`Tg$$+ zFhafs?FEv!t7pdJR%ZWIyZHC+kAKp}M1fo<$!qOZmPSZIa3*sMlCg8iqgc!tG41j6 z2d;laUB@e(JoD=Addemthq5>OH19Wqc8(;^&kCi!s}5aJCe^97Igu)_vK}T zY9%(IH2zGLqlQt)=^`53us!s88I?PF!}vnBEOqN1sx{v)8@HNF?( z&Qo(f5M}dpwGPp8+u{fuL767fW^#$jm>FFzk{vj|^u;M;x+w;F>BSJ39SPTgHzq-j zD@`nS8~q^ILkp`bQD~`$%xG(*H{ZuIihv0bw^&JnVL$W2+t0KG;Zn*(Q3z)aay*}Y zwb(0)1$fyK-@XPC6eR@Gn=yWqrSW-$M^C-a;9#f!8|`{>)u730Qr^t9=(Y zoku3lgSU$T=hc!=BJG?k=%y0o=pB0%dmIwH_bdXEqpeskQpkcGtNbfLb8nHF<4=(4 zH+`lnhLY0|^>}>M_0bBf7{(2FH<46)j=n=@qp4Qq^v%O379`OY`?9+4A4Y@-4a5tZ=ohvYggPa_AA5uJikBA$ zltZqfL$0gybXHqM@Z`nu7(7^X(l5>nzd?3{g{6{1Sb!UK-lq#M4M9f*A&YA$rog(* zWr2^n%*by9RDLASb?W9Cwicnqna#UTKIor{W@-llR&}fy%aC5FPMch@s4o)?GA{6C zqRxjZu=d^=J(y}(CA}=26g0Z3dB>-vgp1xdK(NP!diYUC2_dFv)1&UO%Uc4p#(xwa zl-q_duO~!05i+%mMvccG^Zdv0V+|9elG(rPf!yeDBq@&o!LS(NB}hViov-$%_G)Va zS-JfaN)NI)@tYL`_zqb<$NgiS-fAS^r4hu*wzvvD`c%{vMMmho~)l*rxtM9OJZ@*XT2%e^CrT_PypHmCgsPJP@Gxmw$t zxE1kkMV!O0r$1fX#7vbANal)2`XuK)AiDPP{ik;)tqwCQ!uxIh%>1Xus@;3#5wWzP zys_kq{oWBHhkMC0MBo4^xoD*=kDVDtVIfZ`WI@uE)6^YX*;z6Vk{TB|NMoe~XK=dZ z;jOTXi&2*7mh5!w$xTg=Dt~n))$8CDo!kleIk64 zbn1!O274on-va^qfPVuC!%F7%7dnJdlfhjVAO?X4Vrm!+Qtia=W*E{VGM=TJi2 z45Fn9`sLuYK}M>MB0?UE2BqevE{je56?V8vncI0o5-&~mK_x_}y-sH(_06SWhL=^5g@q3| z1K|V@g1E``7QV^h+~r|Q9ULEL38xpo4MV?|;Cb7;7{<<5p|>ApYsHdm6Cx{;0rG;L z*Gv8*_b-)=I?=atsayPzt)C*$s!RxBNClzu{nFsx@SlN*AZJsweB)kRpfRzG@RI*j zIooKjX&hH3{~kH~Y&qG|j^M%3#$j4rW0Xi{5M0>E2Tx|aaqu((4c8|c41KSqjFZLl zq8TJ)FL01|r)p{E!1XJARpp#qgkR=*%Ti(!cH&9K-`$l*Ec zSv+1!cH7-Of$lLo#uM`(IAOt-u33l{vFv%>aR{?S7@}A65a28qWk$?#f;UbK78&7qKn8qM+)QD zln5w+{WZt++MbSLQBBCFg+q3D12qjKUelD^5tNVn)tVOipAZvf8cCHNIl#%fj65en z$X$0jY|N{uSc-J|D9X;)9e*M{)pU?U$w|epjpJ1PpI--$2*7plUg>npl;QLt=9Ba* zcq%)c=)^kxJ;ohy=@{Cw5*D(+!zgC!oW`TSwVkgr8lQ{7h-(+}S2=~ckD*k*_~n|N z+@cGW=8t}u6b>(O%>D-ZTnSo3;0MnC^>C3XA*cDD|V*Vz}FQz={3te(-fdo>5S21cp?k8%9>fQa?zYu6 z5JpAtdyx}{I;G`lSv;L7_?9o49Xkw)xvUk4Z7-lHFqaKWqJSwFqcO?6?%bwFcNVF7 z3qf`$hMAO-h+#N|twIemrv9i*-i>x^3?Vo>147$wn$3<~Yru6qKRIXwaf!(t+C}o- z)($Sf)cFTBPrV}!$K2Ws{#DQ^(rH@Sy1(U4J0he}cGv|R8CkAlVBGPf5TMkj+^~n51B{xAYb@>i<28SudTFJ#o`+8P{Y4~>X@@V zaIW2zj2KpNkXvr?&pMd<@C$_1-OI`vd;i;{MIX+d-Jx0nyK@DH=v%o5uf}f=f7fa$ z?89jX0=h2Fn(vYn^HBffs~>pVVkjbWlKmj*9Z*dlKu~%Ls8!6Z_057JfsTX`=ozT4 zOssSk?FXNJO%QkjU6RTqngrPINPtDT+oSsE0XkMqjF*5_3w%nHSuBaMd^YJOXphU5Hz1Qze;R7vK<~LFb zL6QIYYUBasRbz;HZv!N_8iK_M?Jvxszu$e0ABW-q%v%n%|FDozzy_70vGnF}_Lz)wl}Ff&Q{H*7v^<~K3lWqycD{b{ z$^&#mTTb-Gtb!sW-}~}=7EiW5ck!VLBL(NCTQ^=+3`kv|RG?PBWD03y4}nS%B)zz9 zD}nj7)IpX1{UuCmbh73DBJHi)q72{lUmK7Xg+WSEkZwT)0qO1rDFrEskp__x7`j`! z8B%E^9J))oYmgec_dR}pYwf-E{%alk4R8$4^W67UpYu9R_{GI<4FtI~OoLxK1FR2Y zOSF&8G007L2(TR1|Zt)65o-0$Dnb(^RsN7zAJO&Y#QafPWgZDfWxeCdC= zjPn!lNUP2D4Yj~sb_Nir3G}sK5?PBrQavlIC0GHs5am_eRe%W*3=3bwd>Hb z)x-MJEWly%?!|}yZP~}6isnP4d9@l_lc)_}r?A9ObLbG%9F6Ny%$0&uQ`ehsT`_7E zar6glRi6PA1xcR?aNctySE>ixhvdBTK0&w(m|(lVhxt2K_!H~qhnTU3Vg=ulB5-_c zPl%ox?sU?IJN;wY2;6P|*T1g?9cHlR-q-}AmCfr97{C&(A9^N1^iy=E_Wx45geuGi zFsuBBC;^XjU+~#?%z&xu78=?5`)6=W%V@R_T_*n6(JWtlwl~py&3o{rgR`2v{x!Ie z%q{~08=h!t8^83g(EnebKS4CwCFnc+=zm=@{LglHLNNM0s9QseD*u0?T_4bB*9T=? zvH#__`~S|Q)CkcTljRZcwjKXR;t-jj?WfWFX~O#P!VSSGc#x~7$v|~I%Tn2EQU(w& z;QIH~9fZOeKnpGaBtF-^8_HAR3U2#Z;9*3Cwq6!EhYzhteL2pZ{b!|$()#n!0(=yv z1d!u~Q-Do`GZEpVtyHZ`!A$bNeL)ego_sZMwUiAXDz*fh>8(2oo(uA<6CL97rXDLhw)ycU#fBfTTEwGvL z=sgHREr5sx1yG8iW(pvIhS7dRu0u`W$<~7rANNJ$?C0rP0A31tPpqt=4W^VtuAGQh zS?ydy(BcBr2Gv=o)C?e1vi|ZDg$*%dUILu#Gn!X9eg_VjBXCLVU}h)U{pY_5hHy0? zgn8~1SN6-w+c-$fqisZp$uAfxr?G@rn*iPY3l0DHG#K_=&D{_Cn4-A)Vf#aeh zfS^?K+%rO%hd5dp4`3xEWPHcBz8XQtG;D!^bhm2SU(^0u3zc-ZK3)D_(jm`z8Ub{J zh$Desa^2>*@Y=Qx#9Jfn`$Rp;tdttYy$++Iq8nE zH)Ekow0$OHC|jD=Q8gPXq1e&`kj6b1N&Iz`6Ccli+oqj>r1QVNJ72p-5YBVgL-8s^ z;4o5;0P%j~!2^@YlpWmV1AAc0I^!{8~i ze|7r4Tk4U|0(g6o9(7HvYSiie%_6{h@32Aa$5!#h@Wn~<&Fm}ifZfrYxRL860&4W~ zY8h=tYnK(uhs36eOSSKDXPOxLIFD~?8;RoAu~Mksul;-%@7V&nPCiJ!d1G@+K)#PI z`LLRA`C#-d)Vc)Fgg3|Iz)&wG(7a($m!1ly(3ggT?L!;(lE$u0O;NyH`?p2x?z4$W z--vWQjf!WgiGPg<%vUJGr%r|b`$O~7!SJc?{h+Y2&S3tdY?hki<5ryLo|Ur&9vl7f zR<@D-SlvFF8#}8HdLe?wh+IjbuWL>MTta*A*UuYK6 z#kZ>I$0<^)Sr<$chCrJ!ZYi?)@TSTe_yJYMK>&BOyFE;`I+%$9zF2uZhPxjI(r}MQ0)WZusLCZ>3iY9L{{)6L z>oqo@owff4ODP8qBAqrt@{8}Ph@kS?pWdot6H|FYXQp@#zTY3dzR@t4n?Re{YTf^N z!VWHNVHeH4v_yxw29T6sq%TG|LRK37I4Md^(xJBU$(BO9Atyfg9?#myh4bZnR}$;Y zEr{51v*YS8=k1TuAnuSJsKWK1ggMa29$fatNKK!KCtcbG_KK!U?cLXC+-SWSX{Uc^ z+{>!E%Rm;SX5(Qy6G0&f(hj^9bs64GD;vaz*s++VD>Q-^o(NgtJf<0;j(35Rs69BEA03&w{`68coZo{rTYo2B_Zrld)X4!N-PM zcTV_FjNJ!gmSH~)~byEF-_qc;3D3Xsg0o?q|3kz^ip?*cg)Uq_HVZ?4{A zt%v|JR+`?qvtThWP9nf%4qK0HxjW$+ZIum6`vzTR9L#^ZH0fq<@(KKV3b4;*;6)ML z_KDLS|1nlmDqdJZC8fq`}k zbP$CI?SSMT&lnkMA7Rc`*Z?To_0z3NVf7{f-lNM83t- zq&~6f0X`3^WY?}}<33hKYaOev-Udk@mmLlC9}xBf(q5|C(#VQi*x3j(wnx9B+gg$#b`Fb)28Arw^R?rMn>uuH&OONcu(4`!p$^=XMp`xlTBm z1H}2Ciw!A}q@M2^O-Kk2+A_9x`I~KSCqZ~L4jqo+{$UW#3i~-Wy8`d^4>`#(ta)@X%b3{QB-VOWti=-#7R5b# zXP@bs03)di)X&JPp5f%qT}e-mo!}1);zkrouPwA{lvbLI&E4<&s2Kf}f8quHH7%F% zoooEtijakvdpdG6U|^9mxB)!?fg!yEbg+m#pKdO%&}0JuSy1V&sdHA#vv*s+TNb8x zG`QQXf)o7t?=i678ByjH5m3}lLZ>pI2wA^;yVH>4vFzaXJMN+d--{r0JsjHst&cl2 z_`3*-!&Ly%Qm%*RLgS6RTv?k5$ z8pKEGkjnTT?$+-%2?=fyLCggLy2fbTV43>xWQ+Y;J|F>uBlcgKmt9X(f z(EIN9B3MTgyVpe+>-@JlKRancHoi|bo(M&*ast^(?P@$AVM8s}8^6B1xgT(`MAE!L>3OJfQex-X zLl|~~MguE9L!u+R-w0f-8N~6Q-GG?a*WHMwUC)$2`CM+{KLETz`JI7nhWYLyg-iao zOR=sq3F81#n{|Hf**n-Q5o43sJv+dcQ|tH@jNS9MU0EGyos6LUIi0;6xtGU4sWsxN zH5lg5u*M6=%6i{?RgR4}cLr1uvHJqsuRs^>{q|eEGh7WcK`XJG2K0wJAcuAY;*BCM z{plaIBD#HOV0H(T{SIKT_$eS-zL&R^PTA#CjM~l2(YQBVE$0MyLDN_0*_YR6}S>ne;z(~E%dW^WzzK6T~ zb~~05F@@;>NmGi}n|h!RsU|E2&D(#%sfw3&-WSuA4zcuaYQ2st(*>IM`VeCv*X1XR z=9_ad?a(HoU5f-wVs`gcl9qmVdMG-Q$B``Gud(|v`5t_b{0xH2jr;+h(CsWfVi?vQ z$`>@&p0$OiOI*rQnWq5V$_0h#{be@KV}JlxPJ=RJ7O?3CF_#JJ3TKU*r!btBKlm#B z;A0>9jrg>Nomfu{IKvKo;;d1DMoXg$@HHk4j8$RSkPid-Ld4N5=mYel)naE>F2iKy zE6tYttzzltqj?p4Jx1qbr^n#qEAon&=MF=|o}|s^wDFbuJ{9O>?K2vDreyov)_`!4 z(c`{W!9Qma0v@ifjcWND7I7ypk+?Q9pz0T4AYB4|-P#>={k2B6o5;E~(W!pu>cXBq z>dGmvUSKut z5iBD>Y>n%(tJ=T0@V=sLr)N)oXlN)v|eHfTR(GgmdLDJ)V8&|PubD`Hro zjf6)=E(Xz{(uPt_QsVZ5Z>IRMA1G1tKXkSGm?+e?dv!8zM7cfzI>vldm8-@iLfn5{ zRgjtRDhw_PbvkUY?s}K>RiaTs{4W9%HgRHq7yg^egAXSPH=IU5eK4cLvpdNijkT3M zsk4Ms1gQ1q7o%(P*i}^vvs+kpsVOg9R z1^uYRq>l4hU!b)?u=Vq0`U-9z#5PcFL0r!P zZU16yH087-XLJrc&i46)@G`uO4Zqr1{(ld7Gcy0M?8EWX$Kx?iHmgBOEaGL zDUZ7%I5tn(>B$|H8VCW8vhk0T?U1=`#hOWp7pU#u4^4pDwZn))Y@#nI?dTdPK^9qL zN$L_A%)_^JfSwB%FYzlcnJJM2-{N3fCQ)Gbxi0aqYtVlEwIcaTsgNkCJu1zL3`Mq8 zo6~65)#MIOv5d*~s72oQ*WJJTHGP4KZ=Q+IW4jAoVooW=nrMp(pSRwUGpx@&3UKk< zE%aIt(;h}k+OU1C!d(nqBRe8%99B;YAU?_bbc`?zTf+o0qA_MJ>}8i0PLVY=)brGY zK)f)I=pS)q=g22|o~5xJkLg_Y(6j1Q6n61~&5csnGU`E~t4k_;MlBwqLe;vk*-oY> zIkJ$iX2(MmBK&{~&}^V2MrGNnP0wI6yl(yuhD(&kWFBX)nm>`NJ@|Ccy(t8XNn|c9 zTQ&GYMVIko`QA2@tE;JsjBT$*d^()EFyGdwNH%ZB9f4C8S=V0LeEORmK__(+3AZ-2 zzm)tqK=v0F)ZxqK@*a$kz#kX>;1SK`FO=215yi0>>!q!JY~g6fQsjGxT{{6Y+_9wfc(gBJ*K_quYz->w9>Ds#MHc#2=R zMF=lsAt$3X2hOr`3Twny=08@R7z(fzeY=#R0abHDMYX2Zvg5RCc|IF7y^=Dcl-#|z zxn}$*uQ8(5Bzu*ILTbM9hxiU74cuv)0`!`JxvXIEKXX~KrA@8Gzd7sb$W7dhi`MKo ziZsde?>&qshQ`Uw%(_3z*4}x>whXcJ(=i6O zq=)*XpDt>g#`D1_Le@m28Mmjev%teL-RghrdT&;K8v0}O=eDlaQcrd0<;)R8OX_&n zBvm1Gy0|tXulo8??K3*N&dBiC&bqk-0emjxvZG;Syxr8Ee8Pj~qVtbhruyBdDTNr@ zV$IZls#_XEFXg@546O@^XuS%r6M3cxmNR?j=MuhiU^Oy?xu$;PR*XoDvr7i|neI_j#)qwJ4gq8u?v%dai(OhL0+wP4Ii0xlLA3BR^KhwJzZh zJ{`HCm4AfDssOmsKG-nt2CqoTe8|+Vwr)Mu2n;?A@KTp7s1UKlw@NkCWS_H4q-PM9 z^(YBHq*^=~9D6v>j$r89+Q10LyzYS~$i|t|?`HVeI;2$Ms&RXUW)*!u zdFym{P^wXgJ*Tzb>t~E9Or}m{FvmY(Q54+FZqQd1uFbTHayPq$rr8tfFkz!ZRV;DA z*TH;W8e^T_N3?wJr3#?tx`j7Iprn5uO%=^4_U)#yV7M9N^~uyh;@1e7TlQ8--$nQLC)DjBtVqu8VFYo; zLa5yP;W&}Usn>D`lp$0GpBwk5wZ3)YFUh%?V?WP4hbJDCt#pXt#P33!x&ug{-y$(R zqDa6d6QRAl`TbXtG?+x;_XS-|Yc^kVu9(woC-}&IOtykCGKbum(#P4zR@6UgRY7L( zS1OCqY`p7bf<~Q6km=fVV9i*_O-Vf9!VLZ@Vji;JFe+XOy-4y?sy&Bu9SACu#Lz(A zywhH8E*s!9GlmDb0Re!kei=RGpRM&O(Hb8a!2+SX-k8S=J*Q~q^>~$aS3GSfKmmpU zkN_dwPUVy(0w;j=*k{Cdu+`CW7ZIw^qzt4lS zaA7774DKnWj!Yl(_Q55VVVZ@=;Fb$6{7+-1g*V9ZU5edCXBmpu^&NW#Qz^`sl14+W zs7oNPfe>j?Zb+~mjk6pf;euM{xv0vk6Z|uGzaq?)8aiN z`}Jk)-3WFb&N)n~!Os%i8dHYDwk+V4D@;$d9+4e7t zWl>$*JRBPtW{>LDk{6A2j@So2dqi5}_|{9bB%dBv9@Zx;=EmH1RX)*>3I8B5`ujA6 zi=8;OXy-tA?fWhVo;O;x!T;x%kJq<8X`zuba&!7;6pv6a%4stX z^V!_#d-^)_exvuB11*u~k2f*@-D;D-h*8H;*=W48bF||9M1YHK+(+6AE7JHM)FS83 z23L~7m%Wk&y?XkVj*CR+Azp#7M~{}_e(S+3(%#FoFG=TJ!U!!0(fg|g`?685jx5za zw}|%0D?quU0-D^E{<`Wt_WVTfbcxz-OIS&-<*u8-qki53lytXX@_BACl-F>EKQUauei2Bt0CoQ0$5+kB@nYQc=VKGbUpO^G)3!hH91#^2(j**+BEeW5-%v6 zhZH?}+1da+Duy0_W*zr-bS4pP)6U3feF6Hui%1LKh?ZziU_0PLh2+NA(?~oY|6M16 z9Fh#9E$w81*(;o|(Am}gSkwxdJ}eB4&NkSr@2y^J8UqFVe0;-+aN5Bgt7MvCt6SS1 zEjasbiN%WPOG?X+cQ69}E{JPBj-Esy5SgKhLn(C4M<@&1b#60%sArOvy+~B z_p_9ma(VbI?YOgzi{xIESHCv>7SC1K2Lm;vo65ocRVw^O5i4huNvc|4)jkKpP7LhQ z93DDwjDC&1JIPO#Re)WGxgtJ3){5c4D=uWoS^xBo8j(*VXM410(S|NV1TuFl+1Zx| zF#Oc!=8=R?-=1cTp-vc&2mdjcWG&ztHafQ7@5mTU2I{k4z)h)L|_DM1bmG zJZAD~h4WU^+@YqwOiB;+YT;EUwvzY;**se|KiD%d4j;;8MDmgRJ%(8#`;0 zMGH|}(+J2W`v@M;*l0&IeNZ^pHEJgo`SWS?v}}_c^sRw?M?A3=o->;=~=%jY_4kB&aEOUS_TeK0lvTF%Z zZB$&CzFE=u{CiL7r;PTo@+RadtMLYn=WPAij$HCJT{ko0m$uQwW#FQ`wC*Gsx9k`E zOF~)Ws@8eI=8D>!ko`$2op{r`h>C@Y2M&yHO%LpmDPX1Q;hMdgcmsh;BPVe`+*sSo zx8B;JO>40=5+BQNeqk$Nh;?(LT(z|gZHYS|nGs2MP}OdXH%=zk9th`5YI*mXZqusW zj+;GTlQ(gst;T@SUBX^FCm$1R-lWeGap3HT*{{GGqga-fn+jn*@I9{ZjSQefJK~x? z3feWe2xodK#I$c|Vcu)5Z`^E8$u{BD8%gY(ai6SPC>zpYlRals6D7 zw^7ne^=bT@>F9~6p*50K(ronA58I*3VB;57Rti%@kI`50j5Bw8CBP> zXt4DV&%x$}h*jFA3C!2~EnhZ#Bb8v&HXOT;u8*p zT6A+8dw5N378$_MMP(r+k47`$()j%yF1S|L0Fz+3`jw;hZf^7McVFrPw%!ekl}2-7 z2&kx$EWZs_l#kEZtdr=lq3s|PF&_|HAMhl(e449ssMph7^{N75!<2oZmoSj1#IqUx zyNwrju?qI<{1u(qYcsrSr>4$FsD=&kB#si}8{1_tkd-ojW3{$e4a3d20TI*T~>*vbKkcYNm7vO=H0nETNlQ}hw;NLqBzX8N?;G4L@?Qu;@(ky6p$Ut z&E%b5HJ-+N>cC6aXO|Hrh{N2R3?p*6A<+MQOMQ5;(7vd$#s=&`7Pi@`uF%BM*vie~ zN0>g)!5sREP4<8q`kNbN9;Zk}8QEYel+}jVAr+zXl8+=r7^}P>L+mBMM1Xg7@sMC@1O6t zSab~W578yghHjvDXO9G&n4|@D4{w4rpDQnL6)85!d>}(4E`+mal5Ng}2;~A9O=Q^K zT`eI$KfgO-t$vh@N9C2AlZMO{rlu+VMSYSc4+n4@4M&d<+M;`7T8|Af-;OO}KlsR^yH}v zLw18$i4Avy_kDGY#=pfVW$`VLu(lLmD3WG5Ce{G1XJTl<`6kZIe~Z{th~|MiCp@F` zJ55ofB4hODzFmd$q%|Kfp=h7r(ivh1_bYKPiMmo7XN?h^%Emsc?Ls^xJ8xB5p}2VW zH5{8B2W5tCD;3lwvYn>ZRe{yGhmZ&9d5;c47GSpx&}vT{8^5>0L*|j;>aKZA5|l%A ziF{jsN2^OqF;2iXztWBxh6?X)!TKWJctfI0@6ojLVBwe8oK)5O0xaHYX;{k>+eHPY z`pDDb)gV4M<=63_%RfX0zuJCgY8HE0Jb?ikMs6^xEN{HCoJ~6+RoT~^5m-U^-&Z~J zFE&4>4AvDe2yz;X=WY>GXdM`8IgoE!=?KqM4xTyA-)|1EB)%Fy$DZQ5EQ@i`{edmQ zCT7PXME&4gkiBsS^W=|HP1h$87V(B$_vALiJUKlp9Zi>S%v(`hje)|H$+U0%of=Q~ zsOfO(oP^6?bFgX={@9@X&arKLWoz|~|C{F|17w3U=cCZykG7U|Yr|Cc_6Q>hB#|)p zOUq?ro$$(?p#gfsA&u}z&c;XkNpYz!r`B$@O-C@&o|A79fw3lySu?Irf&ZJcu>cxQaqRX=2Sz-Rmvio5@ZF;0|tF#t>9`t+gUb=8oLd_ML8)%Y%uW^u!n zDDK`#gtR>4Ny@~VB74?RWicF_l?kWWd%{+!l|8`p1{qibO zElcj046dMnhe};)rPg8Jd@v^dybtnQ?RGC9nHW@y!y~JYZ+c`qC9Q1}WVDKfWfz3` zm~lrLxtABti%k;7WYaHMH%ew05~;}$C`6CA_l}&1ZSqsZVMrO)fi;)nr{eT%Dz~&j zNfmNC4GDz{T@5#lUCx)W6@iBNg-Z|ZxDihx=lrDd$9OE#hK$ldhoLHtO zPvxc@Yjys`6Sj^IuLq<(TX1s3V08&2YUfiNxcdqG{^@9a6hvrJpWaw3>qXFalImWK z^3Zo!MR1qO$SRdTJUMLZzjGbkw8zh+}p8NS(xJEL`e_2^SdBYg2V3* z;f1bXoR_m2y4Tam;92nRNO!uZjtl!Hkw7o(dviv`r^HHO)5{WdrRDIGqV9}UW+ai? z3S!uT>nDK`oGO8;UGF2ycHO|e-NMkfsfHuCB{8mqv>(i7bG>ylSU}hxGosrP+|1fL zJOC@7eefH#?Sfc-z(q_nKjDNA(KZ$LEQS{HY9^%fwVTQhRp3^55dF3tsn9CXJ9OjF z|M?~K-B%b%GXz3n?j9QN#6H1z57G^jQuw{K!+NZ09WSGcBRj$>K3Q$lIu+*aJVXEVk1|_u- z#%5lA{BJ2&KvP{uJ5Plyy$!{`n#kcb+f?YP03Hw`Cu~3i^)`{R3i~QxRHw9{COQoE zK?CI&tXSL+(aQCXJO9zRWT|&fPo?3Pea3!>m>qj5ng*^0V}aySJ*P``fv~Ei9i5HK z+TE{4Z9E}fKrsNU!8g?eTcZ`K;@_ToN^`CK)FJY>R5+Y(<7&q?r}*?@dRks5&w26z zjC#)>CcuSoFi)jKPIV^NiXXooTDJf(oVNJ-*ZDU|ICiGNE^aFq zCg*5RouHB9DYM2uV>81}eK~ErfU|Vf82a!Kju;m!Z)6LaQRk=KchXr<87`o?x@|sW z_>8Jp)*$$JuMky!0*FKo0RjlAxX^pBj|=hq<*lKQty@z$uM&5C{Ar z@0h$NNr+S#O7iiVTc6{Qu|`p9ZYCX;vr0Um^jM_bKYIH-m>|YX%*89ZZ0h21utl1c z5@+4aqda7H{FH=WvQ-cjTXyi$MkvuztD`kRB(ZEN*CS(M32)e;mL1DIak0){gQ77F zs>gpcu!pm=%B`l>mQo`;g6~nVz6rp7#*}W`Q)a80M`QdFXWl6#!Zh3BE^(I_#7Xun z80JC8G64c(dVDfl3L)xtmJQ0|SF43k%CE8ZUM^xpTc~SCzck_lgtyzy=_AQ7$oq#Py?i zLuRd%dkE{RxU5Zx)b;Rt#*<)aM8O{jdIjH_B{|sU;`#^%AI@D=dF}cPI`W=YW!SEp z8`u(PRA1N%(BPq6g>5*1tFSC*yJ^WY8_~qG7xiqsiF(q};c9*#R%IvmrQHF5p>EsI z(M`Xp99K8$861s47_pI@S-4(sCeWVV( zqKXl{KH+Acos3jzr;f(xkH4MLQA(3_5-Z)GW;4y-$mAiPQtr>vH}h{>Jw-zZ?Gt1* z8UdV-0*zB8Mt^Y!{_l*cD8DTV03t;@FjpvRnSb*?;hx1tk3s42rf9X7=x{-u=C)4u z0-Mb<^Zcmvqf6y%+K6hUd?L9q5jh>T((|`Iz$}wlDHwn5zHdTWb&q{!fg%h0N)Nxn z1goHzo&ekbNbJd$_A^vqhr*LUYR<=iIpD{Cs;z*ztB4}MIyTD#qJQ4ltD`#6sXU}^ zcJof5f)2OwD4nCws&NVFPMhU!(_T)lV1UbCl7Nb5b+2^hqu^df$M(Vf?%F3dIQzke zL>dVeCW+Vq(LNApR(UNPCu@LUZe$Kd!1hh+sAJ+0QRE?^ak>Ay;$GbaSmOjX4o(_R zQ05m2N5cJ_W_c$!LLxQ4vQ1iJfzJ}sVV35tBvD=IdzjxeY!DYTqNLPcPO|Ugb7V|V z^T}8LBw?!8xAe`VU3nWZ))BiQRQ-#D(jT@!F^!q^8eH7z6@F6Fxm5ypYDbmgwdSpS znQYXL0LZ*zFMdBHj2(*2U*!bxoF65?506vDAU+gpy}RdVy~waw^JCLW&@1^u@QO8# zZSF-*lXV4gBwywtwupl7S|WPH>`KZzp#0!*cv5-}TkF=atq^>Qe2^08*v+7xwG+X- z!{cX9W&=G+V#6c?h9qRAn8MfWJgu`0^Pf;CwFcb5&VYE01_^7LmC>I$+H~(4gbl2~ z?Xzbx>^Y77{?94@{$zC-L?T>WaeCOoU8OJ7-Vy;?ZYp}8$>Xz`wEp8!+_gNVwK&3e zklCNTol-io0rK?#_B}OH^u7XoHr*gizR$d*%h~d+_~vQv%u{kg#acTh5uB}9mN-X& z5R`C3a1>{p6hzbO=W3$yK#1yfX3<y(``7Ux1H1mnBWs=UjLawg2r@q6)wY>u2vE-(~E?WJxzp zXz;rrzv}lA=*=DnjhHD$ULHx-XJ^4(+?ca|n0L4buBg3-(OPAJ!_6Xo8wV3)qhg=QuIXl ze%|9*SayrXmb2Jv7+we1S7BQGC<^}u;_Y`R^~AwMv`~m<8Q)Zc@JUH!z4lgs>&Hnr zP}lE+eaBPX*ESRANPbV`_4}b*1&dL{dYLWgw+&kiYpHeA#Ow=b;7|E`iE~6-J=}>3 z=#N*@%{a!7eK7(B`ZDLI2;dtr7%RAz?udnmDj!~ih1e8F$;T@id&#rHt?3n0@`B18 z_i|zISt4E4MdB|i8Wk4pr+A>=DyS{gi#$ISVVhCE=g`HrKSQ&+f#3Eu2{Y=)()Q^U zN+HlK&L3ShYBBTA()kw?f7G>$#x(!eaSn-2@)OZ!uk?Q2TpO%HD>c!5pxiO5$)9C! zr}nw8Lt^U{&SCb<`)_%KKi$ROA*q#%4HBPechVuMv4?{R`^oLZ7=Mxq6#-IAEJBn| z=o~KNMMPbkMzH@-B~aB*Lxm%*n%|J~p3#&VqXyyE$mHzg7nn%gQm#eyZc4*_ua2+! zb`CO=e;8q|?9olbh=n&~@-6c$kHrTxb(zL;sM8RQ7SuWXs4A5w2%E1V*KRcJYX2ON zIZ`fd1r>j>tK!+uhQ4DQs0uOfqHO>t*7&J{_r#`MIZt)e_g?zLIrrUEQ-X&%Ow0Ga z*1QY(PD#5Rs&s&3eDt0uBsyUz5r)>Q9uk&x7Cq(XX+|`>`HdtL9_g;gm6ehSsa4wh zhp~6KC`E8)cP7X#IhtqPXLoUV6YUbkFagOG^iuPhSbqDYUoid8YbiP;u2**mP3C_^ zX!Q;PC1%xNWXnPB!x5mX;$45T{m5|LM^$}Fn1?*}6FA04yp8RPy3f5Sgd0fathw2d zRjTVw!{~~eOB7VxHr7R3*-uCz(H;4lkmr`_`*#=3V60r(g9jK!ZsJpA*uHTuK2}D8 z6*>X0*)@$@+_xw5EUxpT6=yl>8j+L=Js3PRl`T?Jv?i$@$^EQ?3M)Z?4dUi@7z+5( zIxvN)&=E2#mjcc&#ZORuuY&4Zzd(YPvA`@=z6RFhJ48|sQJ+nUAq8zy0m$% z`F>ANJ${g2_jEaYcav5pq0$r~ogJ_GnH3|>H86IcQ~&1%?eD~RRmRwPwezQ|w!oFf z)7-I0UNXMSLE+!oReexefa12~nbdCC%08rp9fn8?>oOw;{&m>owJzs!|57X3IDt<8 z@q%)W6rExrbs8nrbn%GV_#~)}>&KtWtW0ToDN_8Og9;rNf{yDi6w9iJ53>_f8>01tc255Fic+W5vR;ZqL6&E=1=l+9ysK%vd|Z@A5^F?m z1MYLt1CtQFfYH^LL+a0o}Wf3{-xS>uN3_@N6$)d_YW!K!g>NNT5lw%NP~(4 zn!ZAVg)4r9;c2b&7oKF9e9P-dRKGfUHuaF*snt$btMKB!2SU)`Ca6??sr%JRbVNd) zHvjUIlq^XlFlqej>(Q!FQq)=%aOVC3vIOP*S*7E+skX~;?@20-8{H!R_y8o};rwRp zD+&IrvqFVQfam{eke9$?I^3`RzYiu_3OJ=7vxM1feh6O8`d$0UH7}i~M2o4%3>{(x zY#LbV=0>5T_C)aTZ-X6uxi7@3?!OMeXUcBA#0(uz$_<%#{dk?`%QKgD{kd*;*0);? zpaS=c8)TFv3lai^h@h?)oaB*ls_K-!9fRKkwJ3pPYt>^8m~~v74FZ#IF(59R$}suq z5}^?lZqS67jga<=j4aA#dV)?E`&Gl;O_A+V!&ZMSm5Q&d<`!bZl;+FS4|%WxO+?im z4s1Ay8^#D#wfL-&o=3+7DXa`G#GLAu>manI7;FAos4)>~adNzCnF>&i2@@3I`})hL zsb%fwbw#MZ1Cbb3`&R>`WSxsFVzc~tYxAGVI4O<$H>#wRV;H^l8U@-S#;OGKgz*94!hFFkr4WajfZwZy4ej zB|(-Jfu*u_r_8iT3`rKBDfy;!oq2NEqg)F$-GkS%kdl=5O(o*Py9>T`F|FLHdB(jV zP-I^7AC@g8#^x%a7D;2Wj~~AUepTRm|Er>33a3urW{sG_Xx>!&^Y15Ghlc^$I`zn! z8EkfS8#<(mLdSaPnuI|B!wDCqiWZItAP=1RLj0;K`1PZuAwtlHVHwnv#@X<=Q0^#A zm$3kssUp8P3Q}j27~Rj4F08uI&*EigTRB{uN#v6t8tur8bl%kywsTz&Lc3X(-=eK! zYClt7fmA`3ze!cF0N)+aa03V(AE>VM$cFnv;ne z-k6)10Um2JvH#HG^vqV)t;PE`Z$pKiW|bm>_yo(#g#P8iY64+3|B0j1VMLJN4Jg-% zL>{qB+7v-J^eqgM?tc@KBJj=U;o=|c<-1%#ZS@6jrU-Tc#BHa)${?x1RB=-_v8nOP zf_>YKwdBHoPcxO-Ev3$HD6Ln){6K5=@Bu|B)_`=Q>d}C~sLTLVW9xV6S{$EMDUfhi z*^#$R-W3Y$Pa&$QkuqX+A6}wX;*bSTNzkUdbeqWJ{KfElZ-w&+(j}~^&%9_E86c*= z&z?CLJWy)Dh>etQVhJm1jmzQgb)KrwquSEd$bD?A;a>h?Dt~S3nI%S}Yjh#zkW}px zQZx0j)D!C&nKOc>dKw#Y;)_fJm(tK*&+S8+GMB7`u*7G@a;#;Ox7>a=pe(3U8^l4g zThKL7jEQchQqKLc4V$06Q{ri1E#Btm4~r}={$eI9B|khBhKFT^3bueqiXM&)C*NjS zRC&fjtx`cGa_mpiO`(GKC}2KG)kTuI!~}5yp5At#Od81Fp z7U|q_;v0@;us5@HBDPr#98mkP$!Ld9JVD3##K(3N@V}IBji~pD7yUZ+GL%5XdQ37J2Zz11 zsm#wIC%0s^tbArCJ8WySCebAqz73q+jul%-W^JT#j27CshOZ>Z5;tB>xioy+orc-3 zP}?AQ`PjFzG`fYgBQGtbW}Qj1wjvm98KqugJo&A(m@l!Jk9meDPufd#%K+XCiEmjlKP7HjF%k2IUAFAblk@QmM#2G9j-W!7PA62Q| zAp?bj(}a5beEkMmYf@l0f;+>89!7=4)Nkkl^Vd@aKFx?tbD zz-KGDqJsl7-O^K3o?qOh1Z~Rfd5bu=XdP3mH9wV^Nm`7sq3t7+%XqZS^SyGhY2V9iJlremy=+n^imz%=BJIYd| zl@Ifv--^euoWcH>5B)fWsF)MzVaFrlyPS*srNusoc!oDmyi~u zzyAdHQ-&Oxwzm!lZ370ihA$-AnWPe+} z{~S9(T>{gHv#Y&7KqWCMm_0)vlTOlYxxAE+WOE_5urur4ZgnQ_+I>;KSiU>yn&T*5 zF=@uT_sL2#G8AQ#(I(KYQ+v8DGx{}yW0D#%2ZQ4Healo}{%kztelC~&N!&_)A)d-& zReqMwPpjIQwWUaQP`TpHvQ|T~^u@})fYyc}O1R&@iCh%~UJ2SO zR`Tan827vLT_wG*$kfQ+jb_Itm2-?r#AkGwC{u~H#{V#$Nu@W{@}g*<;NGx~TK`-BPNQ-4`y&cEt6l!0Q}%3$3axA|c{~ znf(i6CwKposWwt6Y%P_S>j8fTjowDeUQ49CoM!pRM0WYLRRS6Ch)&Wkq)V0QAaiBp zvtsyDTMPv?-J+=$=fbUMuHj%24$RP;BLu$XP#ASJ7+FgpyLjKH6o}Nl1P&ps z#Rv1$cF~lcJb&-H`Yk*#wPL~!fIu1jKLiM{n^eh)Fw)GPw7l&ZFeY!2FZJbX7r!2( zEI(VwMHbDL_1dfx4~y=wE-`geR~T5x6UT|(5dSUYdjut)m4qVJ+14ady+_bTo%w`1 zKsTcvtQtR|hnpY#=`ggmzzP8N%5=idA~`zQ%I#qxRtHZBXp3eV{Gih4Z7l38|N#x%$ ze+4hChF4tOC!30LvF1^swi(JO;rBHgFZcKy^b2isvAyz2ZMqd3z=Yb;;`tj+ZFK*sE#Kv(k37ynBB;Cq=;_k$ubK4-Yu^Ya~0# zC>;B#!?96LHmJwAxosAP!kFEc9{)d%+Z(QLbpaoWTXChuMVcEz7)pEc|}&D zd&X`@hRjs6t_ZhVf!=i;8LJkoQ`qa-auyHeja``P( zk}Asa;atKT^U`kCx=&P+B}%gWrD(c*nT7o9IZXMtWz#=cPmn1-;x{*>3yl#l=zgYk z9;{i$HU3V&HGtW>*E~u2mlbKO`=;rh!>J;f=BC$!_f=$R@fQ4Mxch~zubP^|RJtK~ z(;@CS@At&+3?Q2jxp&ECCHxv#Y#1#OI2XZ7R=y-i*aX|D*6HgXUDRjQkvZVfX(ftD z-M`(&aLy~M4GTV$BztMHp(**bT7NNJ(9c-|r*lQA=da6P)VfRhyZ$FjTPj^4%oU?U zQd9<~Bv$-=2MX)NE~ynX6K!Nb!6j0O%TYFBSfdl&jyZ>Sku@5)JlpDg*aoR4O^14a zth5b^&D!B+?NOcwyV$9Vn6EV9iLNC5wdWS}o5`J`FhbpaLUL2EuqwYpT{!d2+*{&o z92k^qclbe4H`vH_5)1dBUzZskvx(FFJe*mQZM%o*87Q)X@7m7`sG8*4em*BUaL(sk z;qPMlG1Orgovze5D0f+-5pzpW=U?)X)#yH$`RR}5mor|gu6!tXHNil}>^DIrrGm%D zu|eAzHN}j#Tg4)(p8+l|paM|$%DenO7Z(J3UZd;P%JWfwYQsXI%!xLIiOEs%0eppt zy05ExGuZIXYNmmz?f=GR(9^Yd z{nbo6OsLAZCn3VJ5&QA`jMKzimyjkwQ)SJ@hB_GYk(T)Dh+hqzajl94e%i>H)(a zT9t$GjprKjyBcXiIoV<#A#->LRYLV?qK0Z38U6fXa$i<@xnG`K4hUL_3^WO?r>qOC z*9)gR9v}Q|ES;}D*0tI_a=(^4Q!_l8liPLY;osjChf+|d4Xt7NOv=2I#NNO3^@09r z4098h8aRuW6=s9}C>W7bB=c*WpIPVm>*O_&nQut^BK`kr?>(cM?z(ng^~PHf3sQoL zfT$FKfPjE>K@g=QHB@O*0|ul+uz&)gsB{PfK?o)E-jym{YUoH1LXlnqp`Dfcd7r)C zGsd&s=j$2c^;3p}Wc&?dqj_b3~{l&dp5w++qP&a;g=|4rwHGm&q4u}Sfz5pO)aW$Yy9@Zm6^ydtG!u-m&bkCf1yOKoa6Bw^Ig zQ)BvduS#vEa8>-Z-U>&+UU{17B+(d;ls_fnuoMo^ z?s(kY7^hs{=5?&GeH(>z3vx2mw$+ca2y_5Eu^=aqW*%Mk5r4_l^t)83WTs?>1 zo(KiB!`Ak!z9{AZT8;IW@ewb*y7fj)lHC|(+|=fJ+(x~1*|0Cqh>)B&(GxrIyo~r? zWzu*b9@&*~;3aiuZeu@lyT`g^;youkW4Tr$NG8wW`1fY=&ccz;pl|BZ{)P87yTbw1 zY#qL)IWqfuuKMhzcv6pDqBC zYL^e;7b_Dj4T>GA^wVd9e$$a9PH`+vsyEvlHk8*UAN^3?OTLI@dw#8WE3R%VdlNlB z^P@PtGSBsR!CPHJQLWB*ftbzqgZb^2fqoE?$ZVHD=6Bfh&R8MV1lk1Vcq`!rk~#6! zC0BhWbK0@_iEWS{WWloTO^dO@=3@AM=WMyXzI0B#7!mW=ASvC6cJ(=f%~;LS`!97G z-K8Fq_$8Pmlxd3n#OrvaXWq}%F5DlzkzuJF>yug7E#NWm@QEz;eQc#GG2CcN&zM~5 z+r1F@{@6_=ArB(xG8Q^VNXT{_cx)N5NauBhKbnmg_*C+U>^kHY|_((Ob8k6Jt~`B0!dxAwZ%uaEAPt6Fg7 zhO{V5bGlh*hJ`3fr~XCz1aF86Jg@yiYS%n#XE5LG$Lp7f3#V`!UutICbA0sSon*|o zg4w087ETWV-RKdFK3hiMgfS~aImCPrn_j=OE?|_?RHw&wNM@gK)w{@|%XNqj`05@( zF4%z`DkRvTTs4<#?T&v#b?0ysup=w!2K;-zxi5MD#EVS=;@epSB|cFH=q zovdhP+8jSN^k|x6eBI7~ERmhG@VPd26RvjX%Zz18uiQ8B(qzNX>$+WKst#Y!wI1-} zMFh-t87#(TPi7^Jl}aeVt_x$$>I~z6Z>S7!*-(DF>l;{eZ)rqv$sl&#T6?rx$Ng|V zH9S%lC1BH=Vs_oW+9t_O?~Y?(#Y=|70a$HGlFFUBg)2-Vb6(RQ1h6tW7p<(-1p@Xf zOX}{i4j~eX(-%M-*|VinUr%@ak!RTfaJFNSNpo|x5JeXwD&1%!qDX$O3!H4zJoxfo zVT7KrklcGo-f?BRgJ0`KbpJRXSDdD=_miwQht0d}f)i&H;ztD1L$o zib|}{-YW`-ol{bBMp)B&;fHJ!A`LLNkk@QDM9X|&uHSgD2gC9iJ>UYz?aLP`QY)%T zNO>ef!DWwPlFGgz1%?VS00 z&nW}$qaQ!|%VfH$CQ-}GiN>pnE&6?x`X(%gYg>0tH6@3nId@>JMbcgz?N(yYh$T7( zI`89h3dHq2j>gj?n178=l}irVBoQ5>jkbrk;yaFMZ;~v9X0mpk!Bv*~jkrW*-LIz~ zjclMRDjxFfa!cAb zlF~jz=WSP;I-q#!=PqpE2io3LuSe=G^I*5SFtXV)Tk6?)#WohGhqAZ?M|^tMu&ZCJ zNAO!7%&N_zj6m5{BVXjqiD&v(YGTBiAmGMqqAmm;LAKGptuq?e+-1zE5buk@`exA{QS`tT4DvK#dguj|m1ZQxF3h0NOjb3vX3I;yiQ} z)n-w@Xrb!tnm-bEC6L=3IB4*;q+e{dy?&2BoAK~%3wM|bU{IM`Cy9mEJU_9WpP-_M zc%gN)n|d1u{4qry;z^>Jg0;wy$R%x(im#W+77>^Y+F93LNR242ldY>c4{(C&KgMqw z61f*uT~)jMm#S>j1u1%@K+XY45U14SqECOpqb0O{M+L8q=d#rFN>n_l#RT-DGF5mxpoga;Z_sep%Hk! z7$o=j(TXK@EXJ!HG}W6`CYF)pCLJnWR0k_XU61AFau*J~KG^Z{52meXOMA>qM8B;}qg46hPjzpQbqq!UIeMhy%MJE6+w|EE~nMSCi;rAf)8O7L#y; z&FyUGoS+}ApD|BUPNNi233bz3E0g@AL$db4Z?2f`=Ell+l~;w<V*LhuX*m^=grT8e&UXn^I_F9pB5qFD$ zowRsXcZk8-EoxF$<1*c+g~vCznjDzZ{ndl2`2s37wZ}5A1^NJ!w+6u4)P5Hnll&HW zH~8&WtINPwjaRJF%qn|HmuO#|pt^D5wbo7WtfK54&TV9y{rdjt^*Aty z8kpA?*ZxrZSxr9MXJ7FXj-jmxk|%Q#_A|1%8lX&halmBF`oSTOcIvB&Km>x{Nse}M zGno{He?R*c8O>C#b{@n&hjp5>vpG{L9xJ8hSs=z#9Bg#Z2nOM5YBrNr(N9sRE*A;o zI=j_IUAA8tVy{P}GA^Y?P%7$}pl!t&bcZ#GE+fN=vrWtch&*o~LeW+QJ4{WfwpLW;{j6Qz9+ zgs+RShQVm6T)dB9&b8YixnDTG_U|mY6lFt~MnQ@@@y|7Z7ya$B?dFje00U5y%{`5{ zE=QNWZS9gdWfJH~KDk?pH)Uo8Znboi7bSnw-VQG&@y`Z6A^-GHyANPr4WLmZ{p8MN zDAiHKt8xRM*=YTD6#tZz*OVc2@@y#I$pFt=Ms8NuPYJ*O3ew~iQ2?T9KeY@hvu29{ zVDMeD^hqa;vPHPq>m8F3RlwY(G2Koy07&_4!huA_VrO?-J8LPwZben?Mil zbPqUao&$cM+M7+L=Mmr8-#?SXiPZL99M%oOmeQ5NTq!nnD5xEinJ+?8WTAC$lgK3h zpqBV?IskZ-xb*ekem$c06W9jvB2&r$sr59hsZs756Y=7BLS$K1bYKm50-oF|*~@%&Yk>j*zyp+>V$syx_^qgeUoy&zL;JtxUe=8i z=*T^p_V!3vb#CGc#d|X))+ymHM_~EfeEk8$1p$KDi+>RF4ZuCqfUoH{u$60!uDtxc z>#s{vv?X`3GXUKS*|JQ#AUb@*v1%d19Jp0pc}@30@qfMfy9rc})(i4Awf zdnStOHe>T7@VY_eK3Dp9((B#7#_xZ=r}H%6N*bTi|FZl=9U2xE2z+YGxLbkaOP|GO z@3?c@t+GdgwuNdGiuLsj+{f4_H&e)ZwH?r^`PAmA+Z>7|w-kN?hlH`W7%`wc;A# zL2looSnmfEP}qnDe%pgwXlEA!C=AZl19R7o8=-0(A)gP{0HH<(xIl`Szh`kjO9@k& zb01Y_sbA#JR(3`egR?EJlvh6tIuMP(4wQ*vfjg?$U)%vquna>L!S;YcoUo1ha)D>S zpB~DCXE(u9p^x8^vZP@Ke8c@l3e+_jS$V(u?DwLNuzO^{C}r9KxXI6_ZhUS97o+#< zi99;C{$TcDw5*Wul~WXY?frwb#zv3&{fV^>v!kNWmz1bP@eb=)^}V9;6WCdw2QH~z zhz*q4fd#O>lWy^eW1q6}Q+rd5rXa*^N}_YX_`iz=4tew;M@;_fOw+)KF6jlbNaH%7 zu}*)9R6i%+Gt%*fFKpp{=nAMSF9NWWk*GsG7#7Slz8HTH9|XvNc0>>K%w>r6NpMx> z%cXc$k8pvv?05Z-4&-gqhMOv~=GPs}_6>)@6GL?x`yXzb0~nrdlpy`N!#2#p6AF;y zT41xQhrkUByf3d^hKGe9fYnpL+lvyJXI+mEN$*RC40E(R zr5^rqQmh?=$#*D=GQ_9`i5V$Cf$#LgAA}sV!ue5|OR-!x;U(ON++U6* z-B+`T04N&yl=NQ|vQ~?czM6K~ld_Yq0Wt4Of&C{Mp~!t4XskenxIuK989F6Ftx`5> zpL1%0;<-(6yT9J@QI@OYx$fbvDia}o4Vv4uKudUXx0Us+$YVLV4JW`|#hW*P51UBMcCK?S%QVG9tpKb(s0MogX=CmM zfGG@KbYKNef30g2bFkgZ52AI|Hzn!A8}ItFHv6bq4uEiKeD5`IQA%g{-V-1yIAZ-n z+7C=I22xy8UQvKCzD+2LuVnGeb&AJ%14uANV%$gnE%^V3kB>n@R(Z9xFEJM2X@f>Q z8umXvq~W7WcryQ{gZ@EKEU{rE>(*Lt1o9plFoqT1fE^zjcIC)zc3^vKl=t+DzUP%` ze-&t{j`_k9$TKVIjIczgd`(msl{m0_^FerL(5&|rQH*D&!CZjFU1U#YI(-C8PtO5t zrei|w=R@SzlV<`be1q?1+U%3S51v&I%+H%Rs?Ve0r2M1B{5NH;vD!&|facw7bL4jG z1tr0PVP7_^EE<-TMcacC^WQ(K@(+sFtQ1WTfnusMqEmkD0qa;VrSPM{e%SA-=d zD0Q|uu&!p+e+2hC0J?bwV#2v~M@Oyebm3)}RgUofhx(V@3!5P{`CeTPEwgY=Wc3lwi`nsqd8l!9-QBak^)008|wrRYN4aGmRv~w5OR|`XLAUThGjIyW!JM zN^wdt7}v;d&Z7XRZ9kj20Sj=C000Uqc((50Px6>eEt>UkbmaRdg#r!UrGM(Ys!!I1 z6yP&pr^%QDtB)EvelMq^X0h zDT5E~mXS5skB^5xyK_2l^WqBR%^AV`+&Rl+{6dbB%xG3Ae%1z-VIqAg%^ri50XG4{ z$=MU|B~YRn-eVRhXoySBVb^Ug)2h}|kyM3qAO>5S>#D~n=-?^nwqcNe8YzX1W1)ul zY*a)0hl=^+IFmWUhYX|0~z=9e_N?iRrcnj$mZBeB3p6I&s+T+L#A z_$0LjsqUA%k^>l^BJ)SFrQC=Z+70L99p~b2jC1{%qn8M^1@0c6YBF`HG|W(UDsy=^ z8>ilOGny)#tr-Lb6e z=L-s{=RYdAdOwlh@Q_IU$E0cCRmaOOHW5?ia6E49Sh#WTGGT8gZ;j5=_IfmZ>p)Qf zA*R4x2+emnd-cb-(qU}b%D|Q5QGA_fruq<2Ft%HXu-fM6msV1QnFZI<_tlnj?fFWN zDi?+~#$9^?{r6)1?`)*X6zCg0WqX$DK)pigc-=&|XYOPcg0pESO_kr53Pd7t7 zKq^$bq7*_C?+@-$pRzvZG=>3U45|wAV!D~$q!q%9#)B>NE-?IO*wZ||x#XT8z-@MjUJ>`{)?`bu`N65)Lk(u2@nr?q2gM)o zw=W*Z|EaKfF8!MO)z|GCw5RV|7JgLSMGED)pw*Ykd(UTBgMIO&N?XCe5?! zjDW1t>t+MvFN;_4Tr%syeaAOs6~0XwjH_n}!Do5--NRIb_)qAO^+%)yED^v8It+D- zpEVYd*`3|ama7}ue7YQAzO0g1V`LCL{g&-> zkp=4)3eVqtD5LNi2Aks!;*rJ9hBsgxgL9M9*6jI0go~$}WkaqNPgkS2yfiiwkv-cm znbk^Pg_Dn9$;Ssh$JdGJ4s13bCqOx3i7>BLCO_0mEzSP1@%65v05yQa8a}h94bq zCrLux?{gr^B27Y0VlP-8*S@Yf6FkdtuD*DIyQW2EGnO^2!jJFVl_K;0NJzwD3TAWJ z#Ou2_Gf@Oda+%Xuz`L336ZtJw5|m`SxhGA#%Sk19E>meAu=+1^^C|ouORXEQ5vQnF zZi!^t`gSINoaR+^KU)T@+F2}poxhV%8KsP{Hf5}D+2LVo##&FNo-JuNLdb;dE7xni z8tc!mzv928^$vCOj(--B$UQI6xL2iDKi0A51J5S*wCj%{5Sp7rQ%7$T=BC=Abj**6 zg(Y#4c!+ZGj77Kepmc$1r)Rlyt-s+h|;UnMF%&S#^ewKvakn|3tuAJ zSBpZ}a}-^QRHNs#^;oSKd#aE3FvoZJn!bAnpm8tSMtebn!|<0b#F!utGk>kFH~aUp z)n`urNl0%_(ex9rJ9yNC zYdzyPA?v@Up=eEfS#4cDRqv;{eV^psTr?R(6=2P)%e~TuYFS%Oh zszOfWp!qDkI__2V3|?yANarhsO<~<%fR0r-s)f|!E;oI=A#hw&P%^vE9avgsh4k|r ze2+UH9E)oTj^)E9Xv+*jyeqRxg0M1fl#PN_>LoTdu=7h`EpbmdXg`%hzP)O7$DvSc{xP{?T+Z*?F~!=o0f>-_&b1bwvaj$I*5V9(MIw z#IMw^lHW%SSn#F(D?(*o_8crOYv>U{8_ehty9&H$jvI?qvJCYoa;gOCL9%v|Y==A! z_g7+M^wSCFqANLtYNXl~SQm5OQKR!_W}zI!&6-`$^%hK4!~;0&Gm%pL@P!@28|xAN zQgP`T>l=IJ_YPtT7xO#YQ%y7?;7~%F{b#OY)GNg&2*E?}Z#lJldSOFf#?_f7To?qT$f(-Dsj-@wAO z>kdSl$gHPdG^4dhDBRpA`hlN$w|xE_x+n#Tt;|?72TL|yzLsofEjA7nf;dg2V#+TJ zZoaL^tlvkkcYg1|Oo?%%VD{OsvVU%59YvgQX?jj|QzUEX+>%ADy!X=i4UD1T9;kDT zo6;PxpgIi5wA%aH7lL|ViCD*O-CE)^=wfMD^s=d5`%^Pbn06A_ZVDXT=Q*{ zYOtn2ZEB%m&rEYQhO0&a$(I-`i`Zd6l=St=EA}X6NHx<+vuE$xwV(dvcme( zmGa2Gg$70QUk1ia{QWT)w4#)AJg>eOf~L7p&Zu<_-aI@Y-m+YV^Gu4EwEwus5%O2Z zeO)C4dxaGflXZu~&bJEQDf?9|Rjb$*g1qfjJ-nX}xAcXzw=myNft34@6ppK57?riE9HfbY6 z3&~1@`IX$F-1~R;o(`joBZy-uAmko$?TEWZl?Qy21DQw@(WVSOpBsY`T zqsorqPaS6TC{H_2SXhxLZS&f~>}!AguFW>?r8Q&sRKBw240dIPd+vHHk|8L@OP4LO z`!&VaWW#&KbXllOq4(+4az0@qx*p$uBu1>yRg}uf=RW9CG}=dZIi!8hvWZ(j%ko-^ zCYM}g^7{r#HNV|FOyZ=2$v!pgLX2)dw@l&(x2{6B2m_-7oV|utHSW!1gh{?1955ADOc2}(vu(GSAN!&tw`3rA7cV)Pf z{Eq?8;!a1-n${0l{u*GT2CCFG^nNlSK+cscz}X)ZfJb;hM2Z(&A`? zeTT>@7@dg6anIKub$H5*5{e;5zpKI*V5XSft+WFH$^f#G*QWu6oYOfvJE#c(E z1!98q)HoHGp~X?ZT^2@DYlpqQ;S@`NTT|?Gnl6K+^qtlPIzs8fOwA7k_$ufDIdnBG zKH~wGXfs;-h0;wNq27x*+j*dX5}L`|KY}&9GOo;JE!8#AChiNtr^kBKqG>e_v4fGr z3QL~!MRWVDIze2YKMF?M@(`uQBu<^8S5i%<6)+U#>A0NxdM@OnG(MdYb$ za5`0~rA_g`Fs|LsTz3jXJwUadBQ_zH^}=jnzLlzq>!k@H9+?NaWCYvl)skgbCj6ry z{By^`mr<)3n>Nusamd^K12v@?{b*t!` zM7}( z%2*qIIbV&93nk<(u79T`rB%vI#!9e(Yt(-?LYL!h}bWs3Wqi}u0!GEaSj8e9` zU)r+dg7mXDMQS#-_cj&Tj2%@j4S#YFPP^R{Y1{AV5b8uYkhw5=W8fIHkW6z*);6x@fJUR7$a(lN>B3@OOoX^ z6HV3$POYivM|OVX)kxfPHt7Q=h0NoJ2#%N2#{k)NB5q}JpkK#R9cJ_@!fXi9g%p1Dp% zp!S{vbzA$u?0g4&VMj7SL|&k2(}*?UtJ<3ju7)4VJr5hInZQ>bb$h7<)m?CFtL|GL z5-91??S694YOkBRyGB=fPkTRV`$2He?JYsntI7(cJv5U_Cd(Ifqrq!p@Y8V;;N@#_ z5)V>qA?aNG@n`L_13ls7`}Jyey1P|4+{Fy}AY%DyPSK}y-^6sB4(;6Pb_D3~jokD; zsoDLuNf7B{?0{J)3>RS*2-h6p?C?@a1wC#n(W}&AxbvV{I)$AB?!*)}dWIzzXewZr znW0md-6k$RUcU(VzRlDm-vm;LnJGs|1We}bKs{5q_E^*2GD}XtLeVGtb*xeOzUbI< zM=x;JtD{1DeNnLWy0&TLPi(lut)_n8?)KQkoS`T9gW;MoOhqfrdUngZwgg$;e9`1i zPWH(ZGIbXt$Gu`vZ8u6+%(2U&0bvl}>4sA!?>yBV+Vbu0%Pp^i9$!nmeNzUw=eLZW zT{sZ^I_h1WRV>wEA#(B0p?eN&3y4@fN70)qM57u6Zrhvl5EuDx&owNLQ|M~^?sn7d zXfS9%6MEzG!EkDh;HJ{7bopRH^yjW#BlHjXY)BFLFkV+eplP4d6A{{bcv1Yzv)@eJ zHF4^837hg#u(mewV4kqIQxb(LlhNUj4!%j_DhV$3FMDm&>$mUykgtWxCz?qr|I+pN zR?l;((Q^_^>%C3+8uW`6PdAg2p7e>W@t4%=qg#l@hq-VH6Gsgt(Op0IP3P=-x9stO z&=eg^Ph8-{hQ(suVR3_zR~RO|F28uQZYsN-s6)LD@Pg|O)(@LFdWo$ zBuTTmWT{YjsR>@a7M^eJ7uEkYZ#z1;sn>riolI~RadT^ECqxetwUk&y_=EE-Q8za? zGeH+9Bgib(b7?7B3D$y}89lqskRP2N9@u5NS5O2xK&4lW_*iDE+jYAzvr}Yzo+X(Z z*2JtJJKaj>AZ%fGSX>G-S>o7X`J3Hz;;6xS0_OsQ`LGPiXp<;=_6?FVF|6A)Kbop4 z%Ht-tlLu+zk9R~jLrU%7d};XUnp9>CRL`FftWRq|JRRjmD;7OL^hm&WE0qpXc;I2jbrd*?`q<5R zo|wCjOqiCTvL@kWKcu-)o<$e!+%Qhx@Vbs*BkfEqLTo}v$lA*7E~9)+zTdKgb)780 zYhYn_>YYKUoi01|9>0L`fi-QOk_{eoA*Cst$SyahBkaj^Q~lt;G06f<h4kX_l| zlqzngy?xR0B&LU4nYTPaAXz&W58Ct}jkr2X@Ea!g&wv+c#rSmjk)MLLK9{2a&3lkm zXYn-trMZRZDSm0jV@6hUdq&Ol7F{b^eMd+Vf(8@x>^nDX~Zz}n0; z=35_{tvL>I;eY>*GYvqC5?M zb%C=yy+fbos(zUuJeMpeyNJ+Hgi`&st^j-TrQ01>id<4pKXBMD!ijN{l2(1Op?E^R z>wsw3TNjgNxE59%JvO_^>T{6*1RN@~ zlJ9@G^uK3jjbd>&R4-b*I3t=rU|%LBtNAI+vzYut-cOxjX1_?n;*@jqcjr>+i!bI- zRF8-B+@zLp#pkr-TO1JC__AV1ixH#R=@VyIUTVVJ1+NDPzy7EIXdm?v{-7zTzQ+GL zAU~0sb)33dW699OMXuEk*DveAV${O1OJEXc${9T;&7@9D2zu*#oz9HBlhFhE<7uQ% zhri3)dac(&HQt`u6yGFuw_2KO#lAJY4Mt@=4Z^=X!tr^4GX3KjasQq5p~LZCrc0w5p0CM(+4rFELY_M-bY07D!e)>Ob zH2#-_fg&fcRR`NG51+d+iB*N@IDUJ&Fsr~rXEbbKUbUOn{JaBOrl*#4iPygtZ8rX? zIaKbxSy^68LEpjfn`nqP25dlXxy*V9m|g{Ps8bQ>O(_a)2Vn{d>5ta|ht)cX?ZMO1{P-5FI3YU3Z@44cAx@jTx|hUK%2IsRpP z3R4AH?<8nPb1=aaOAfwxt&X;u0D-FYxH1-1T=&D7G=5J;fNs8Ym+y0qU>4mA3d%ek zb5Fx0z_Wb9-RSCStISTJxsNe?H{?(rw39y8CB#w3J^Q>4P| ze6%$i{Lm4RE-yt*_d8*9^2Rt2Xc)DOIM-;_+b_$l?1Zt*6Sq)#5dTpn zaE^p8zm6^Dek-{;8wA00${;Ya$r`?^mWKC8+*bpoyhlD+{Dp+S-cfZZs_p>!n{D7= zq;+bwIKNziWEhK-*Na+~`S4hhNa;KGX@I$QEZ7Uo)G;XghDy2Z9(aIf0=>})g?2;j zwdCCpfvUx=8A?#ZL-81p;qdBs8Sh>nrfK*x)%w{#MgF+gRNgrS&!4@nhO|M9$gr#P zJWmTxtE6LlvS!m%_$$8{+m1NbAGR>6AfXt#fOPZH5B7Ms-?Im04u-ZlGgg%?M)N9- zo=ZG$iyW=&c9;2caB6M&akf5UuBNwz&ax+0WQ<|$I}E5jLTk(XH}c?A79fexP_dhMurZ7a+CuvKHxT-6;8UZ$a+%88bFdkCr%Ak*p^@FfJOQrdK@ObA5pR9gJ zie4VQh7@9amS0}Vxs;o$NLw}ot_NL4V-WGdW`I3h*XM6?5=*cJ`)yaINLavWWv-L{ zA}=*{CM($orX6kY#OPq#b@G#~?B`Hi{#87>S-XDFT9!SzoOMuTYhiQhpVio|)Ub{7 z=@lTiE$%N3S8JAo*cq`;g3V3Sp~O+O=)im1{#8shpql7tFIeR5M{EPt8GpXR^~bq? zQHG3w4BIvTT$bdQeNX1nhYE9Pdz`W3&MRZQw?y-kl-W)D_lh^Btc|y3orV2(rmK-o z-uZ3|TnE>+Q(NQm;-ms1+N6q3;N-t5mcJjV~CKoO&?8UrUdbw;!E zu?>oObfpu6w+X({>Bg8f{3bn}>*U+M4_~_k+84dmJ2`n(A$l~XbT1yyzW~C=SmJWk zthmRP#dT<^-Q~wL1iFt4t6*OiO{@AwdT(&Y4lsRP3+J~EHhUu45+ciRMK4RQj5&O> zmv0hL=axkluD<%~;+06Z3xi?9ZT27el!s#iAhT9y*P|{$o0PA7QAF0i%G-&oD)TD$ zJaBKM3p@a=_bdHFfyj?PhKT{5?fHBPcD)*ST^oDh&ABdS;m?r`ITyNa<8}GWm*EV7 zo<~TXiKXPBo4Y_@qtD-BnD`{n#bp0}>1VJIDfxXVDttw4f_IT=06FMh%6$MRtSL5; zo2LYz<6u8{(6UH1ZDanZM?ARHmu;oFi>_kVp&jK>#TAO0ATF8>^mNAy+l-g$p!*er zuu_jzt?W8RV+oPp^^J7MG7pf8XV+h1e9S)+XlYTu|thZC3d^jNGJ)gk=NYJN)j zh}x|C-}{_+o3uThN7To2C+6$467M;nvP#9G8_PRD3@*RIEMoTI1b*(a#To@bU1Z1# zehAV_(G(!yJ;OQ6+b;Hv2@xY0rnm)}!cB?n`lZ=}p`B~D$l_f-zHHe&q7j-bU+})N zBW{ihz!FY~-|r;bWRFIR`-$NE_B?7O>K=9$&^_tcy}^Mk=sfxYO_Y@`%~J@7mk*b_mn8el;*U5-Xu1^*zwL4vEd5T{@lur~En=!ep?A*sku1{hvDF3`f;brC&cvQf^@#J$lg-6?th`iuE>wEI z5HC#(w2C2ux~VzMQsPqNvocI%%5ZEx-iG>)k9d8cTe$YMA-pS~P9?0IaRs%vzXmD0 zI(yGG7SY@OthHnkS0gbu*8_2)RWdu{R+gcgU_z!oTx*UM0A-rN3Rly9yMEo%tNK+F z)rHp-jvad*xwWdR7+FY;_1%YhkZYBLV#zA?ORZ{$XbhVU96P~#(d<1%k}8mI{PvI1 zoRl-KYveyGy#iZEgou~yU+^BVH-cOav~yymI7_4>vrMqbQyZFKI=J1*(E~%YM0<#e z0Fg-;iiX5G0VcGdfag5p`@A;$PO-Z6FhM9X9hE?lPH|BI&DK(D&8ybFvb3gCMFt}A z>M>b0r|o=>kXCrt5Z(wIrCGR<0rH6D0>Y}Y_;L~hiSnRHTzmVA$;|KN@gWZ1&LAa=sG6S;~8~3++vSUkX zxz)F;=~%Ls=c@xwx+S$_0coAJywQi~N^}P`!NuZ>u|ac|%y4^(FGb}M=q72|uk!K+ zyQY(>jzJP0f+85VW|qWGr#Tpp7*&IDZIP+hS#O~Nl|K?T+a7*Ho1$eP|`VD);LUPWvM3KLT zrLB*aGZChcC6~OVVZw|Ko2}gHO- zRe^JVH|Bcat<6QW0!Uj4fAvO>y_KbuHi|^zgGkwcMCPLYVdLlbtcC`yWn`IOA0Cn+ zYCSSl0{usd+liFRU6yecayK*`xb)rIEWwPo80TQmyZVw9I$88ipHY1#Bw2sp{kE&j zvAvHPnki+6URgAlss3Ne?f+chy&ie|>?ETEKi$uFpX`WfbIOrS!qIlK(b0FkgPJW%Y0TA{)G+tX|cBf@cy}{np z+(Zkd3amKGq-S6rINIypRvkA5lr!c)b^`4)dD*MzfMqUcIgjY5PMv@jeZQ%gl+^{T z+a;9Fe4Vkq95gg3UoxOK&Pze&HURXhB=ofBT)a()kH|TTkUE}_*$||qp^&z4FZbg+ z6~P!X;F`##D4jB+J#Q`>M zGf}wv{;e7yt6PfC;YQL>R4_rQWvR89{`f1!Ur*6&oyTvdlJVR%7b&`DaOe%%0FFlW z^0afepQRu?F6$wNu42yg%6eQiGGu)D{Zunl80Ai1E^?6qeeIosmJU4ibG6&N6R0ma zXqMzNWvV&K~l`heK@I@SsqQqNx^)=ymPa>9pjudPD!m@pD!F1pDL%Ju4@6#N%S+vj19P zCQ&P(tl6EO7&XzC57JPMRT1yM7k^_bmrXmQGR#=scAjo-`o`T(s>!3B^7j>R+aSGl z`I0q>D46w;RMen+250;$g(A9+M;&F;eJTR*s04*J&Kgtd$7VvLo#8q8pEf3xYB!VDutnk69X*A&9^&z?uW!o{@r#q+&vdy2nXrIje# z3Lk6)2kAzx%^FRShh6~bbjd}Hukmuvjt=(Nok~p&p5|Ll_s1FzU0MZwF@lZ>jjxQ< z|3&-wzi(jxLRbA}3@G^Y$jJ@* zN4^5w2C>w8X$xiG#$6y5S+h_06(&=y<($VjDGH~#;8sNui;uPPKM`L#Q$ z-R<>F+;k~g#l}x6{8!z9#%gqW?Y|bWSM*MSBdzk3BaIDRVpG>yk-O7tlp_oxLk71f z=fEfjGiiVrw6oxOk}o(QbL+vYz0I>abg(}kBRFwNI1ZemxoTX=aN$p_wSV_Z|6lb1 zCkA}A%|`XypJvm4jL~=BQp(A1(JH-vZr}X3PR_qSKHH#t)wku#f4rnW{@nlE>c1c2 z|7oL=@*@A|t^Ro>^sgxOKLhyx7GuH0|Nr4!JrLkW>G$k+L1<2Ze@_(E74qbtz4?Ct Db|2=2k%-Q6uDkOT=%a4JBs;O-LK3GNWwU5cWRx%FOm zzg{!zw>oeBn>Af)6?LobJy*`&XWRE}!rmxJKSqCs4hIMKSXM?t6%Gy^84eEdH7f8I zF8}o=BODy2l8w0d8(DF2$~Ue~mNxblaBwnViCQSyYW)P6daBQnP$gc;?I>c$;J%VW zWxj}_WO$B-EfL=P8(gMl>cS4jbz ze&+#h+ief&knME0?d1kUxRwak#I3AUxD9!9bC>7v#j-C8nN+bT-&edvj|}ECqD*o% zHKl@M>O5Q-+$VuksnsG&(0w>~D3g?@kHdg_WlA}MJ@5#EO@RRCgfC<92o5_ph8Ltm zsj=qz$;14o*eAP0KKW3+L_YCQR;+pIE|=$lm~c87qe&LzIF%3o}vG5RI6=cU;PAHQ@#Vg0bF=YfpBb7t2lj1! zv-e#*Y3Ajy&>z8mh-4+X5G#DpiB0QM*)^M=qgsv;+0stPh{0sXUu%EuqWZM!8Mbu4 z+j{3m`|Q(9|`=2)AsXmzDgd@CHuUI#arDiwmUiTe;_m*!;dr>KskkdRG z<4#r}Y`Jjkj zMRZ)|(k|Iw6WE^yg%9t8 zVv=8uq0Kmrq&P%l7dr=S+tSlUJ+|Qnhs_te`JqXV~oWIm->p4?@=HU;SURE zvc=${d=ep)fM49#Zz)mHvYul~P_T_QK_TU}skwf;ThMXL=^z&B0F0L&4b1Q2wkK-Y$3) z!Z00rX?pBTHPWtAkEs76%bSX&-Rjr*>83ZnVFd4*)k)DcVl&o*!dDzjv@0Z5glM*~ z7|CQ=`seD{p4dC9tkp^03`LxqTW8OZ(&(Mi-XRCEXQ>8}?1P zk`5A+E$goeUl>vyT%G3r&OLNe(N|vcMOutO-;WLT$`E~pQp#g03Ob|bqBKEYtyP&` z8YWnW$8e-o##LsFziyD&&GY|it5Q-ZGKpmS!~>@$ZvK_9e2aWQZeE^c-c}(&5v`_P zfo5T?@?rBlVvX0{{GR8Y{w0@3=neX1#va|2Q&C&?n6)NXkS;^X+uC=U(t?Uj%6>&Z zemYP7vO(o0uvWL@veh5+7R&$1yi0nc?u*m%E-)rAY`I8wta^OI_KbVmPScKjthsQm ztox_ybolhZjBaU-{QkRXPKUVKM9-MoH-2GPJckqW(L_X@=|3z`i6V#>h^>g11SH7p z$TF3plu|~>GoECK7&sVcEm1G&G)^`4x;G^gO@#NGZLw={V6i!|twggw>m+{htjb=Y zGnvtBdFS)g`3&A};@;s@Qz_UGluPjqRMe*1Ci{>H?`Uj=ZO1lYT=hfe{c?qXSxYZ^ z&zC4+wktyM$)6)^BhFE)Je{7MfiKK*uwN98A?Hj;we%>M1+9~=yRNVI2-9NX8R1pP zHuR&#uV^c&+ z#1zMdeI|Q(jZi+EW`SeDwPllkkXLhBvtLubB(*qGXJ6;MIHGvJ(`Fy&CID2`F461MQ%7)E*wMJDG|bj>Eu?#@EwsQU1 z7iTynIKsmh{rd3`ra?M82TkQ#BTq@svSh|m+_oXK;kmeTT3!J!tD{$juFK2*u7SSy zIAOKaTCSFjrx!XCg1J}YxHdDGWyJfI7yTU;f*YUw!d=xk?XH@>e&-|qHR+G^+J-zg!r=h6XilenfAhee+uBE47+;un z42desJ#jo#jhuriF3Z>XM+7n6*P6|R zSde@PV8Q=#ufCeJ|WnANQ8&2#J8g-;9V zk~Gv-q-z-J-p@4J?6auXYuf858t*wUJ+|KN?hGI9*T5X4eaxI;MC>Rm?v5^c}aKO`l{^i`9 z28otm-bhZVW72AA;-z&p<-Q>JW5uPlD2cRZV72 zVb5poW4&(DvpO#I&BfEqNX=7C%T3O0JGT6V1M@QT$7liar8(Pq-2esf?7;fux_lH? zI=#SHkg9JbSbe%s+F@e;C_U5tWlgz4fH1mP|x_4rmZ(AuH%)jFMW4|L7G%Lz& z;NCVndo&$*ZB+-t@~ZdZGCZrF^q)I-mEZ0fS=YKPO>cbWec5Z*Y)4ooZE)Ni>%Bfb zT%Bf67gCqGG}3f;megS+vZ&*o1t~n-%|OaTn_mVLJ=9*2L6;i9{l}odWJv5q#aaAe{+H2J z(D9QEEKC^)$NjG|IOGKBGzGZ(MKn0(M{vkOysC6XBOl*CQpvDE!MRq}_V_@`bOguM z9&|HncjD+u{#_mv7MV|MZ+Fr#eK!Oo&p)G5vs0haZY@@;+(#QYq<W{fb{k98<#2J5A_}lqEk_bg zab>S9>Jxmv*Qey052Wu}MGl>-@7#i|_M4!Y9@EQK%G+R}w)5th{YxP~P|J+2UkTXo z{N|v>ayOUu$16Al?6+`8lyLBfgR7TrfM; zTQ=uNLt2&Kzj?pFwJ9;FAOBz6YtJXRj&^q66p2TFf0o{|#c=)g)W1E!L@$8(>D{rw z#s7VQ{Od9uug3rW+`KZ51m=e{CrMWQx9{>Vl>vl__+MfE4>RyzVgAQO_y5?+e8GAi zN@yrk+Hy6&F=qu*BrtFvGcRq}3ZGrW{lkhzVg%EH{5cwqTcMRZ&3hVUa?4k>bshxV zQ!lYZZ=E?-{LekM=*Y!>+wKO#3?`yL(ET|mely8Pq+u)7u2QhfXV@-oJxY)TvbZfm zIJ;q`hp=DP7U()E43=ooasKfrUKqT{=^GufiR3wJ@^RyAkd?KXx?+Ds@L1m)M1q}c ziD zYkq8IS@YRIe?>3(f|thHs}gD&F_b((1mM!Xt{bt^Qo{PCVKi~_tSwxBJk%91`5PA# z%E}{r>yZWB7D)y?{QJdK+2DHA;6j;S(3OY#x7a#Vr*+Mn%!w{S;6=Co!u-rs(twMx z{xG`C*=@&Ad`+3w+f7zFhI7jdkLisb5+?-$9rIWbL61PDsv(Yk$TrBR7tQnezpU$j z@RG@Z}K(k7vKVu>BhVBtO^7W+v#yXEI34zi(Z zKTZELyW-d~O!_uzdeL8R5##W+^w|Z$_YZqoZE~7A7WhGh(-1KfvV`KQ5rLXTW8&X+ zUBQKt#sjE`PK~>H=BuHEBg>8{Y$r>{fnKf)4t0;eA%qn#pJ#$G^^b4f)y@c?v?EqA zGv&Wyd!uJF0_a+Qyt2sLM_dzQil`n@;K57bYf%wN*1J|w(bSo9nlL*qb|>Po0(Qg^ zeI!N;s4yp|+ZXJs6f0*8{VpHLFN{(6pZw^*Js(~1S_-tU=qBiQY&x_QwWFZ${uOM# z?6X~R_ypyDu(E$SqkkTtK!%|9nuz|iCr+LvWzo6w@mv}+9`gN;w5VL|vhRND)AC7G zc^X0kca^isq?Y1i-x<;BlG57L1fO-16+RkGOm%%TQ-zRVs)^+x-WC5a35jJ&%nL3) z5GK-I!k9s*R#BC0%{XIOGsoU0o5`<|y5&@VV0}_x$2m0})`s0%0*j!at?_zSIY@vX zdQgRNoU|0Pb5cE_#8}lK{QHhQMgh&_7jGM6c;nNHs6HbvKp-&d6L`B$LHCi=Rpx9` zP1&L&ddX`=IJVt30XFTjA}7IE8_}LpxvbCb4&_-v%$OpG}l>r*TKy!nHLm3z#@=Ehdy3(RO34~i4eS`*pC$FrwB29YTWC7q1OWyLQ=&qK==yp+nl6H+(Yki3 z-0Nbxu9drC?|V_4&sIvW5jkJdPp7OO{c8WTlN3I+Llz9Y3fVH6)0V18-=R&5$rOvyIlS1VYULaaVyLE*xlx zHD{A_BBx>Sv9f9=zgob{X+yA8LFzUX+gleA4_HUq^Sr1wWua4vGeFI;0*o~3a0Ac! z>8cWI_d>EF>Ioq#+<}ruO|2{&MU_doVqNn^k1AZ(4B5>y}ucYg>KnBtvZqIexf5-q=cW z8YwQV9AN0TOmq0+f89wz?(7RVWNOg|$Y}f^YnxERLFM4hTCTTa^Jxz;_>GVTEoOrA z*-0msvXbcC1yhu7!SaK^esR1%;8c(G;rOv%dSyA@4vZ(P6(^3HS9{bfOUKTcMGIFZ z4x55$Tm=C0U+JHJL7gL%=hX_Ve#`7K=cb8&Iz!WC8Z41(W7-4(KqcaxG1Od)U_O+4 zTKk231n%>wfrN^9R0SKujv-)X=`hMPml9O?Uu2aaSy4dxq|bh3xQ+1lvosyVS%>m) z0-o2sHnqUpJExgCPg3ew-bcJ>l4vZH!`TpO8?f|ze(y;o0q%!~0T(9W;>A{j?^_Y~-Vn?st)Rdl63wLw+ z-Skxhpp`R~ZVqVr=GAe_%YMfhOOC2ienE+OGr%$aG{T8q482MEuCh)Jzi$5s!i@Y+ znT{BuujcK_ibcLNU``IalQYV~5#JfG*-`(7Rz?+j@f55Ts=N5%t zLb$(ED6VOJ$$M(W3Ao|mcE1Z3MM)>in&af~MLEc~q6o?3{MYp&K?l9Chzv~rMaMG5 zkfEKl?<0EJxIyyq5Qrtp@2J7bNLvUiw@}@lH>S3kEF*uR(Nb*AcfSPVE>1Yt%?Wto zW_sRpqx%mQ@k>=^3`|4K>Wvk46*&V{?)c&l*RkpOMTl3bV@5{W3CL<$PDc9OSAcP( zKow`W-*?+z&-Qnu#b*UWwhvzpC)=n>rRk9+erQ<{&}z6yYP;>$BjHo-ew)&yE;EaV zM?9sLWptBKO2qf;Uf$?Nb@B*UkC};*UW+c$PB$03E$WjZzov~)2VVt^f{I8lOsd%Y z1k=TTKMp5?cNkunzddvc+Ly&PMLi`XQYrb=f=zzL&B2`8MB-#6v@+h2#%9EKH(Xl<>dK4mb~&YG zwTjwJL7)^mdOtpud-Lw33Up^rS-dM3(2!9xW7u@u8p4)EfjcEjWIlS{>s~at<59Kl z^Y9Brbf>DwDwomC!bDapbH9lWygT6F!{>pEhdl50fsCe=q{aqMH1}ohC>yyUQNf8k z{Zn*-VMPTZ?;-YH%`DxWJSs>H>LOGASg+`6cm$)OpgH#3Cpxhro%?2leo6d7PeOLolvTn0kW zN$^2w&<@#4<(qA}UzYE>NSvB0jx!H(i{>9Ay)Za+ZqGdMHTg=q+XAtgYam4CVb0lX zLn_>|@>ciJF{l2<4Q22fR>wzP_Lwnf@H@{xC~!5{1i931YY68t3S0?0mDEgs-+Su! zROjN*Tau)fZOtufQ8#-VnmxeLyU#J)$u$DIw4qUwCpMde4!6(4}cObzfNFz*5Z ze{|Cf1+t?_tKNg8FzN6vod9yL+ZVJ7VHFjvhZ7Pg;4fc<6E&sw=^lA&j+$lfGo<?0a0|p9C)3#rGd&@6>|+za z5!2gRcw@3-nz8?o$4flqp4!XF0rjjfw1!)`pvq&du}Uw~UJTY|wsY6_k9l_dyl`Xt zWr~UM07K~uJ*)#aB(7|UoKxQso{L|-K8-`-A>a{|6XIi-(6}{SjY2~-D>{3bnZ@>| zgEpc~)G+AoGJw9CA+616JBzKnFEhk`xvR@?KGh?^azc^!bB|cj2j842Y39KDf@Z4I zuPG=L{%p3&uiJjgB<$T)x+Mw+;`8OIHEl-^bgoXWXDSb;wYmhpS$P*&8Wdm=auYu#kv-YDyK&{ z*mhXAB0@N1-Cg_h;Y|S@P84B%-+iXv(e8U}AjW%zpGsvC72=S?X3$+GZ}JsM+?3Q6 zYo+KEb#Bmr9y79}{iR>lCobC5DY<|gF5t9@_{%^P_y|$q>Bm{mO^xKfV=5D4dXb4Q zi>uI|P9?vHQj?99JTDEUO-vbI$}thAW}iWZI7bhvo<(d&5+F0Y62041MoBL6KYOQ$ zQD352c-&SxHU`xCt1 z&70v-6YzEQg)5md5`M_$Y?unS)wOT{Qm%cQ{Wa&o?b+~P zc3{T%Dz1f2yYM8%>^)kFu0392PBK`sBhRDrSa$fotgG;%Ud;z<>2ZCeDoSQM>7=-K zKB&krC<4ME+c1$^PjWnDh9N+;*l9IFgTw6=fCYhk4|oqE@7fMFbd#?V4FsY=`d&*O zD9fV;UQ75jMTaFVr}wiE2~Eyk8~(N^r7kBWjI zbYr<^pxXJG>aA#0M57K7;?Usvck^wGg3)1t6hN+1ur#HqcbJQMzupE}jjj-)&?6oz zdu^M^^D<}HAu*>UnFvoj75crSeO8bF#b~>4w5YU^VLae{I)VK+NZJTy>tEc*L{py}sxOSVemc zu28=OT3UrzB5KR<+NujZ`m`pXIw3G{-i4>k#rQci*w^qNfPuwh*!(H7Cvh7KkX{lq z_XHfbfTK*yUSM)zRjdBevC5dz3%H!h89f`7Cc4J0Nw&7n*1}A9vjEJ*41-_RnCGJB z3Poq4;GM7_sJNpYmfSw0!|3FNz z-MQ#W@w<-?k4hA*gzp3xBW0)aVcugrZW*?7KOs-Jx&1SDDIv*^;R<9)skLZi?0}VV6WW%2mK<2=a@fiTuR~u`boZnvLe zD3XT1e~zdD^CNR{kQ$ltFLImN+H406ans_ z|2U~lML^!NP~nfC)NH~_)PQU+C|(Sq!{1(@OU0+5i*&I zC=eA`bxT7DXHIRj0%>g;Dm0yKJo0gtKjffvA zVtnKsu~qt5ucRn#4%-P(EDq00e9XJ(OnDmw88piF;%YD zV)IAyX|OoA1;|ePc8r*!`&JUQ4Ef#c79RGjffPlNcWF|TM=%;fywwCRy&L-G>wfno zJ5>K~I0i{1T@1v0We_>OJQFMx9u)#O+}T&RnjNm?i>`x@XQ4pCHM^Y@xRXj~wWIIY z@)8&Dhi@v2@R9GrLyT_5laOx{S8}*;Jwjc^oWfWvs?U-%qsG6#jTX7FcF%2{Ww7)` z$!Y^i5z)|WDz7ZEI}xC;4L#@%d>Y2Nx$LBMuw>g^48R)I`5JpeU0w3HeIb^BEIxg> z1W*Lj63?@yqU-MQgb;2$;77-Xf7)^({)dZrLGp;MkRe@e{B!z+Lhqf(2W)F z4g$c!Tj5&Vz_2$588X@gso94uM&7HTKW*vS5?4BJ$-WHxz9}Ysldit(jdzo~Vsy8s z)slG798Rx8kd`nu|9vq6At2e`kQi0{M`-rp+{5j*C{}$;!MLdMb_JMJ9!s*wcV4`- zel1+2O**6Qpn>$am5oRlp#Tfz^+nJsXlFSvM1+tA?i)RymV2{|mGSYN=$xKO zC_eH*Xwepa$vcbJ1anCbhV(fa86DgE;DQjraCrrZ94p&UhA`u4qo6w`1={zkGHne5lg4MPeB?Q2LFmNnTUy@T_0g5pWC&28h3%t$I?&L{0vcU<%u0L# zGv@wl4CaKDx_By;#Fb}r2c9km41s{AZ@#*HYLu(Y3FP&??Y1U8)cRocyd46;K0uqY&*u;nJ5ND`1u1X^hd{fG|VW1jwLsL0#@s%m9nU* zg$n(bgchKz#&dJb|$|%u|yZ?EMxdOAG#&@?pq~@ z8B3~Fk3Wz&_PuWapusE1bEvLjzk)(Kj-JMwwfbaZ>3c{CISghSvJ3m{KYI%SfCGo< z28*W;|RLp2?kzO zfeuS5dY%clWgG#pS-KyyiqTNU8d-R5HHX&F5lxbQ9CYSVcH#P^kTkiEIoBet{UKns zh(X%T2dP91LCML=`mZaJ4j3Mjx^&^R0KjEy-Ix@KB*-T42&kO5z^Zhe8q}Kc24S*h z9g64D(j_4T3raN+UKJPP^3v-#jui|QnON}|c8Ri11Dg8P_yWWCJPY=Cmuh;feroED zG7^NTR5)8eK0>a0c;IDE0)|yM!A1%luDXjfk&lOgA}gLn_H9Wusov?-a%fbVQ>)J? z()wAGBQZ0tq~6WI1FdPJK*uZLp!^KaXU5K3DOP)$mMo1s*}>4&y)?_2sDTO1X%f~c zH=_Tk*6@E8mVPDxQu|7NAZbKviy95MovF9l9Sf%&=xC~*fLj-V;+9%JVhvm9U{>@3 zAh&`~k|vC}@pDx*-%}&IIJ0I)4t5Gk9HK?cB0>0cFt@(PbkvrZ;n=fjs4n$ygspXnfHmnX`ywT1j#qIZS78-UoD@3LZky!*nY%4j7wlt0Bh z*cBnSJ-|9Zf}Rh-A*aEPCkwk6IQvj9i&^L@XYqQjNrOD@lL})N3|kK!ovy7Iu*>0q z`4{L}!CE}i~+M^*jxNFJU z;t1qTj?!US9aC*guagWsb=)nu+!)zoLd-0r7G?;-f&m{>f}5vtFtn0wW-q^9X*JW3 ze`snpn#gxl+H}NlA;;K-A7;J*>WZqEdNL~daJzGEYI4HpkN-&Gbtgw~L5Rh?=_H9k zgz%^ltTxMlHq&LCkSBZV?_GVPHD+szi!Zz51q2hDLS2@C7!GCMFv2HkzQwGgnyM|9 zw9z{@FjWPyUXr3Q!H!yf{*g2eO@KLaB2RG;bO?zkIuHetNMQ#wayFvi5%CvUHL<$z+0ia~7tiRdn#hwVO37lF}7qxnoehK**+i zr;22HLFJ2D)7&*0m8|hx!dN-Jmns`q3r;I)DU3MjvnhqTQfUEXW{laki9@uOHB*`q z9K@POvOmef;qICe-}og{5G*@~j?(H;r5XLcuX_6ybtpTiFgXj)?X;VaE(m}_+IMPv0~(5(pC999T8=zapm-ka_(m4DFzqzY z^x2YC9W$@&ql%zr^_v@%dvAV<2XZHw8!EYOez-eU-U7-Vs&xh{{ug7?RbIL7U$5zJ zZ`6E~9ct$q0~ayDWe&GpH2CneauafGa)WXa9F6&4aLTAZZ3;ljVC-ear=S7&05);q zD{%ntf>p==w7cr|IhN|GwdbK31yq{@2!p=d6gJ0}CMPX=E;yu+wD5*HMe%R-Exo)4 zrouO_^n226+XMI^PTfAad0Y6n|JaQKg7&S5j+Vhp-S8V#>qfw_pONM?Q+Yc=KLGgZ zzCJ4A5olTCGa@t^mY#+8w{HR0DhEG|H>X$+dT`(B> zk@b;0)qj)GC~Rp>7asmh3O@N+l8H7Vf#z>VO#sS`jz)>eB!Sn)4g_~yPF(fAosW6V z7Y=xw!{bqdVdzp=E!$xSsryf{e!%NYglehvV4X=pwH|j~VV=VOelq-uQWhI|THz6( zyjo`)@yf7A3mjywqsCw=e|1Z%WrwHjvzZtVgfX?XP082xibxV|dZq zC+jggpOu%R|2_mx`9=m{4m?l2{!D1-{e1V*_cV>W3`UtL+eO&7Kr#fBa^pDY!7n9o z01P0WwciG(78pdmX=`Pctsl;eXEUFR{O6MVUv9G8l+xy}Z@)c~O%q9*-MQG1F7|I2 z^*^;GxKIhsg z@0SDrA)N{s;rG9S{ZI4uU%~#~3jFVi{VgWre^>19nb!ZB;{Rlr{-6N<*T?>z&;0+- z$38LKUXK>dgdv)UBEurkgMO0blMliB`rcvfEB9A*5<#%kDvp0E8Ig(eQ#$4WDG0jok;fJcP}Ix;+Q4vg(|&1FTwTkdL#Sa8xM@Z43Mj?} zLxChh!Sx=pjinD_B4lZ~7~kLoK@ zo7VP5UYesEIRJ7E^-GK1jF|&`t^SOl^RyKO7!B#bt88Yl%q@-@yyRsBqjOu(FN0gb zG-UyYc7mO-o&*{!p_5lHb(f}~r3ODn&8S?WbIk*`egELUK& z>&>XHF8!b#a|^lMcGFR#o#C`>`Z>VG_4v@EwOQW94@*!gcd{c!ep-zVVZCS>9ddFy z3k{gOG}Xxn1mGnrgm$7tfLcfgnamf#Z_@BR#4x&k?YdW(|DItNJuMrQu@ua$qn%omB7Z0IQck$j>d`c0MtCv{z!WPb~t}^kk(e&(eO@V z_l@L!Nu6Q#?WYudq)9@x)|fef))<%JId9|o$eH%ZaO8*`UYIS%B_Um$xQ?DRRcCiq zZ&G`GtynMkv*J$I>T>{DT})nPqUf;jH&WI>K4Yvjn@EZ`A?&NjDA3(rcDJ{ zb8a&O6HdMpX_YHlhCUV|r8%v&H>>f()`8Z!iJIZa`+v}V!?l{Je9Ly$)aA>39{@hA z7*FO`5g6}w&pz_?I%BFEGbe7A0Z{Si2g;n8Krx(b?lrz^4*xrF+ryoO65UcuiiiJt zH=&Wj6QCr$@fPuk3=CHnDfMMN_U;wDDLpmgLfRV{jG}#d4ND4*(XnOd%zHYTx46q< zD~rchq605-vRnXUBq$x4$O9*L=mfA<8w@Bus1s@LIc7WMJ)Ra?EX4lK$_L;+$XX!|pdD&;$UNGY3hnJPy3j)OYDlA1=SpB1~$D2hK3FJG=03U?rO7 zf^{yg(zTy}CmHDO5}<9kzec>i*9$NQ)3;MYkS>uk4$t#l2{!8EZ`7T%W^A(Uc898otrTV z3MgEn<+b|4aRkr@Ext?_U0_9a7EDA6=kMo?t0#|itdKa?o3P#$U`r?qYw3fYWo7q+8=bscEC_KuCdrsHL#7RQm1{re^HaC3(ifUhj z^#tVCUA%9=5i+s$<;U?ULE9}on+J-bJSIPL(<*DrFma)wJ4(sCaxDHr%msVWAcGfha1I)&G3c_Ss2+2BXwokKKB>*Dqc6HjA zNcoMDc*n;vm1c=yZ9EwVp#-}0b_xm}Ccj1jP@q}G1AsN2P;-X_Bg#}_z~Ipl2LJ|- z*0Xh)Iz#Vh7yINv^3`D=>_9QEpp zcYLXSV*c=jT|aKxx{F=HrRkEuUHy90;N0jzM|(-|WlYCluInO%Z;h#&SoVEw9+jud9q9wdWuhzRW{_D(MVfgO0OQfk6dXeni5 zrqJNsxW$%A(SQ=}p>KM2m(?7h_keG0dt9~bMPXvhYR9$9NY9NrRp1N2;ZUMol#X_I z>bXz=LgsXeVIcEzkmCP3<)Lj=9rUN@O4st943sxdPgV7dfx>W+mjPC_kH`$+TcDv# z=6H&)riOb8`A%Agb-HAt2^%?Yq091c#(gk#{2SW-E#H)URKPbyC^ zAT5L5KM}A#S*6jU{q8u11iDFGGn!JUIcs1@QkfMeniZZ38?aL}9oH~S!@TRJJv|;* zT!JmROol)Y8X;m7LrygkcgwA%Z+AkRyDSuCax{Se8ywEoQ_?0;{IVs00m9Ggki>(|`5nGQCB#Dt%FoI3 zs%WEzc-Y0#cNyR$0OsvZsLTl9dUN7cqCIBZ4e0p=vVDJ_^PMnla@PPAu_!N21^u5g zC>U3m^5SLi3oKeyDgI>YQF?9jQ$vpcdKf$y5{%2e7ja1C*7yA5#^v{}_Z~w->UFTm z9q~JuXX$7!4ZHH)s5wee@}x+s4yE-=r!~87#LZObsh@g0F*J!>LQ>an-X;t6;RtPW z)RvmM$~P>A(pri@6Gk~#1nqE5LxL3xh8G>Fe^oV#xqW@MfcjaauNpE2bZ68a*WRRB zrMpP2I-gJXUb5yw8GesS(Ecd7A_X}3W^fkSc^ zx(rWb$B|{?N^z6EeQq<4l_ZcAxyZtzF?zU}iFY3?DM;fXWNkj_V2V5wzgGr{2!mOc zzEH~69Sj6A-b>oMq8v%c?!BkDHI8!>O>T(>nA(CqhjvE$_Ra38j`3-v01;^ps{z*h z5mzmi{w+SP9~R6qwO+5mO2YBqj7Bi)FA2>b0QRE7{dKP>EqEm;sBTwX@Spz+;0%b? zc>jPY{DJez!#YZRispQ7rWCItVdzAEb_7f6H@tP(Mq9ZvSnJE2|M_q{fB^DBKo1a* z#FabHQ1TFXMUE^a$4Z~R`%3A`Uibls4Td^^5L4c26a+MFnvS5CIRMRn{XkrqxTH0& z@{EDQ<|C!-BP3$ZmZoaNxIZH2^aC2Y2V7MX`i_fjwv7TNeI`)chXIh{p$HNV)q0?OG+ z<6^NJ8fCs3JvF<6%5@#`Bf!*b03n?cEYdblA3rp$1ebwnbE-Lk{!`457X}ET5Lj!E zKF|^gpb)B8Q7{@8ZQ&bk$Qof=^5zdV(PTQs0kqt(CC>$BS;0sh?Y7GjrxY*5-ZaO? zm;6_=W>xzj$dFZB?iXF#_NcZu++{wz4x`<_wAyYo;*>_LEHrL%ZO=x$S~(#nA4Svs zfoPh^AG{V52%og+wwZV_xKo_|hM4ZE!YjAjGtqJ*dOUI&3d><6d?akPf*!y@@g=qM z44tlkCw!bh3}az`p-qq;4=XuPhA3DJ0V27lCc{=X1Xvz39XCKoLgNEmBfUYkj;OCg zV=K7B187~N)iUtV&@6?uQs|^yP&}Mcgbk!7s|d#heokJ}1?Zab?o-nnKx^S(X71VR zMJ-*MTb)$_0`B(+HTnRVXw#) zd(cZ!Eq>1qG&=KL?jw`o^Xn6RCA^?IV#5TG)wcx+#HJ+(9xBxAt7Kgko>#P9V zOMJm(3q)4*#l?5#JBE5Dtld&MN?@4$>_&IMK)alE()l{T0ZL4o^Q?&A;vBoo?(U|z6q(G6 ze`$;11N9iy@cbUA(n9@#>&+L8syA2l>0t%0V|1tD6U2fTk|_vOoKp zN;qYy&R;C@VQ)RbydfK)xnpicl?m(u&bGLOY|tRhcv~bJFsmPg*2y3ahh+ zZ~mLtx^Zu#kD^E1{Ou!9R0=^1%Cb}bvl4}>TZuUL~2*V%z|NM3XAQZd;y5W5djK$awHg3Sej-QZ^6OQ8NpQ)di*Q z4&5u6RMI4T#CpaXN4P)GKsnHzW|g#b@;sem3kR2=6VwOG0ZDg~Eu#W=2aT7eNq60~ zn^i>9hpe?P=^Cv?7yO(ijMQ~R4nLQsPhc_#rT+@Fk4 z+#_IDHTcf%MEn^iAA{K3;rVAL8zIfx`%9w-ZpQu2t0Gp4&DLR}V0Nc*a+?JL+f>W> z>h(qjkVdNrK&kFnvASU426GK@_1ig&M`9^Y@M9cHFS&s%_osKC;k{|nHG2zSdpgT> zPddN)c5XTNFMaoXJ1QRNud{0yK%U~>3$nN8y6~#Jy9X>~{5L^V^B<~yeLbiH9dleQ zp4>-9bxN!e@`--WK$F^U6~?aL&ljYwB&Y;U?3Eq74h`Ca79M?30nOj!;ir#aGb;A* zJ}u{QBXD@JHOL3aZ4eu7=qtd&k78wDot@RN+@*N1G+!`s%do8~@E@c!>%o`s)_MRi;3t(O+;uAV8>Xh zkF+oq=xkhhx2B^cjqE<@G1$|)(>WGmTmFIob(d)LrEl*GAhpF;{t!I99};>Ob4@%= z_sQsv*+v+Tk#;Bu$$>t1A^lxA_t=_)##&|hM==cn5Bh>y`leE+v1BDk@!_I5f!22O z>RotlBJI7IR?-j(mS{)vnt<(j%NGtY(w)Uz=}LTC;BlpTVII+NJ%Jx_3l(hp$x2(B z;Er2kaxP}N3z0*eUGoSZUgwKdx7_#B2Y33 za~cT!(`&1_W0vtDDH4aLkE_NT26!>lk}YrL*e&UV7$^urg`Z_zm@-HVfppYcQ}i6@ zjD2|gYXhACQw-6J*^=s2mq`>AH)?|%K5r2fJgkoQ`jV0Vx_r?=0FK>xg=J@MD@ilu zOv;GEwg|Dw$o<0cL&l8kvEtyiJ2oX&m8KfT=i1^62!BQ-X30fk!9>~ycdq!le)Xe* zXb_gSz;1FAx9C6=kpCBXWr98~_VQ0hFljJXqWaJF&egy)3t{_vZFiqh_1b_eH zveh;(Uzkk$45bCnL45u5fTCL(`jgv|R<|0nq0I5n6ydAc7j2H4E8XTT~&r@1DiwE6O2YnD`6tZPZrTSlty?H#; z@%#5%DJ7(mwWL&(B|F(g$TDQ#vJBY`S%z$*vSm;9UG{zKV;e+L z-{1GX&-vq=`}|dp@)*27pX+*G>+=<@s-Z9yzh4blUGhk%oKu0P(b*kd4Y3cvtUwsb zWK$VZny|fI^}YH`2dBCo0et=10So{ND5OJR%lWXc!c9yGH%ZP)iq{ubwc7bpt2Qf# zgw~XXCcd-;4fa*D=T33p5&8W;cQBLw3QpX1OVyxFA09L_bjN^Q=lQyeZB8L^{oDJ^ z0d`g&;jqFE7hK`aH9JHf#=ySwew131J*oi+YCU?sj~=jnJ9d1a`FNtGvS@?~Iw1qX z_OE2?K9pPt(5oL9^KVx;mN&m?iT-m!AeIo3t+4nL6?SE_Tu`dRxxUz2pKh&~_w*62 zd;=--Gc~obOoAhK_(64E%}M@78w!Ia#S^5MUEmO!)*L5H>1RMZZe!d1j>Y_*fmwK(<4an3Q}*Q*AYqj3xV%)*3C{eY51f z=^C!5q-idx0zbUR$9Mp}{_Yxw?-Np@yRe$cK#vVb|*mfs2t6A-B>7>d0fUcHzf z5@uqgQZ$pSe$IO+da`ZNti4jk%ed>983}t}1PNfzwef_AZNSR}ONNSm7xhF`mk3JU zvtv8iVOvv+JGMI{Os3a-Pq)wga}+rH7yAUd#5uvYWs%ZfDYvvDbDbnIWf0=Kj&H~BWnAVs;Yj+firGn& ze#0bnrkHO=GnXImrRJJnT3%o_D+tZq>NWGop62JkmhQRQ{j9sBxg5gtYiM}&`EkqN z9$^C4h8g^qYQM=+IjWkjI@qxOUpx@t;VFt>?U<*itbw{Rb9Nv3CF5`m(7WVy@PK5kaTqoZFI{p>)#Mkx_ee$ z5fO~iqozsc1c|cszL#>;_Mq-y*PO@{P?V`cbTG;U9Gc@18pKqzC|HSGOeigl>3E$oO!obe ztY7F#yn=`b;wNYmkRLy+?hS zT_j|}f{#jF=~Xjg$s{Er_qwYH5*!p3o8yjj`8-P2Y_H>Xa@8tfJY8uEV&k+@vG>`t zio?oQS8h@-a*E{^1uSeToBd2suZ0XKfXS{$4aIgkOSzgXRL^GZj5ZG?;N(UNa}R|U zDzCon@pd=<%wRXfqD)#E)LnLi_EcAc>j($D0w(A7+xP_$cl7y}Nh{$?ooVk#Dsh4B*NpeRB|^4sgg?N;e_LtmxluDqty zpR0Fmw0NOiue@@ZVh^=+vQ3tk7D6`)d}hMC?kivSbeAK+cpfey;K@%v9IQ+cei->q z^$FggOA@lN%v^2fd3j*ymu&nd+m+`@m?MIN0uvQ7y?vGA z_N9ia%A1b*hk@Sk!#xiSVW^sANbYzt*O>FBE@l;+EbF1Lumy~!jk4X)k&HQ#6c}m# zE*7h1+mT)5x+im4ZTwAWnnf{GD@oJv-RLvP8n2lTq0Fxi&pA>G@AszS^IQd!d6qqJ z`8H!d6I#faQv3ASC{cvfYCi6#x)&-QqSI7}Tf_yo=AMJB>)&qvOqZ0jFu%%v-J3AX zI_Ndp0gmf;ZMN?IxT&*5dkBUq(|LfL9MC4fTC1(t@uAiB7Nn~CAt%Y^j<09Pjr#oNTwarQKvVn7Nf*yM2yCu(R)^gR_!3>xtXFM3Eas?z z=uBmPQpTrh;XffblOkKsADS|s8|qmO@jMK}*935~fQD3(g{o2{>D3Rr#S0-m=?U>vuxUF(RQg z5B6q$R7XPhnBzG%PrXE`PP?cF4-RK%!3)jX!}p z44fgz8WLdsZr@H`zjuxShT*2wpn-`=-7mVyY`5DI<>gfZ(d!-gH{dd{=htZ!B6Z z>a(`PzC({=9ohAU4+_-!az!c=F7rX8y?gfjjL(Iepb3%lqEFpHHL5+nJVGGdo^QZG<;hSsiq*5`r&=9nYRY3FEZWli8W!I8x z=&zJ;`l6oLZE7Ayl}JnnWOYx-qt|_F^(KkzmT?T@2>hqf8LfNrZ>@T?QvCFC8>zCR z(kSnpvbEC+^cn}3*OLu{;}`cJC$TTJujd&qcvvNV(q?%`wk&Jz0qVEc;0`>;6b#8C z0C)D2S?H30{T9zX(m)cGl{mj+FHH~qb}g>C`FNx`leW~6HtZ?mw#EA~+W6Y`$)ISx z9YY)R2wrkmWSWn`qmo_L_fIn(KGcsK#S;MDPd}bGpq{2z=ZPZI1`YnKcv->Jz z+_^y3g5TM5ZKqh82|=Xg+M3$1B4es{3yYbut-veke8QlvrP5O_iy87weUI!~c3ggh zVAjs$PWnb3Hh!Xnkbk0Lp~*>ARPte+rY)^%SjLr_S*sT+{G+kKTs;CGC??V|wee$2e|XQG~0dOvD!Qw8r<9 zo7qe`A=7&$+-ECW+|102EG;~%dc68asu4NXb6P5O)P}qoBDLxP<7~O=ZMN9mTqgC@ zT@u3XpyblDIoBcl`~BHdN1&Xu7!N9AFd+lCIjb=*KJZ#AU(DY8v|c^BxGDd&fjyUe z!&7yr%zcU&zojmpXL6e)$Mi$~MQ%JG{j(D1tdNyLz=IigapO1r=O`KNz8^^eMPE(d z!FUOax%+bVMZ;Xqs~tCfE3d;$P{%adZRNXL;~;#y& zx^1jxmEj!RTOTxca
  • H1Y&G%>z1}*s!M?Q4VlDA)f%5BG$;!LOD(iO| zj1QbtmT_;+9V*N+s2nkS?7|S#S^GF*zdj*rcbnd=6qd*w^sX7?!&W_~CB@FQ!aagx zTtpCf5AWvpQlMJJ?7iC%3iGV?S@FI4uD!2)7|GpZheS=eBzJL>AAHZM%({O6H7Q3h|_nSLs0P2 za9LUpN-gHoPZQ3brWxkbuj8Tntz?q8R}mncWse%s@3l5&9bt5&6PFrrUs2@jF~;-L zYTr(26hj!zo1T>1YUP<@vmzEGG*%7H#~J;!14^f5?dZFxq(rRT2v0vZ<(JqIu4bfn zMoB@Y>*$_af9Do3<}9oYka=-Ib4J{V6x}@DqgAb<&^zx45MA)sQ7AvO{HnDHU(&J2 z$WS%j?YD83giKV4ud$|-;3GF51(DS3$fn+%%luO~;2LXLWhQ4GBfmCYE?vCE!6R#R?P^2X&)O2BHgtmPX*wYDam-(r(UR@cbhVw*Ffui9i|!=SD8#A zkGGB$HGL(-;bP11M#xG|e&3LE!Z=xTcJ-3o-SM^M+QVrtYzcaf*h*{AoCqzM)LMK! zx^I>$DnT&{^hKVfePh7IY1FA(;$xh4I-2cDv=|wsr0GfrRuE(n)$3>3(#2uB{e@!X zFAxwkF|(7LP6|8NZ0UPePtks3+GunMXrB0_(gEvr;8FubCvu6V=rANr-EAs?>i(-k z*f3XmLm{0HJwE01U>zv;z4|+RXr~o4B4~W$t{vPumVeHEHn*_<2C&LmeR}j9RAn<4 z6EJmXXjZhRbQ`jL^OOsAszY`Iw7f7^B9vc^nO7TQT^B{__S(VFvJ3WWvR8aut(;l90c2cv(o>D(5*Zk+hnebJ70pmxL|I3>(}bYr8uJ`xLI$uw?kH4 z$%CviFkY)ktgX}+JwoPD)9IpM1m9bmI*>lQ4s;>|Xp%MIbgG-j zK1?0B=$^H|o2Q}v76is@{uwbTHsD6hX4+}vX~%cXuNIP+oqec&qIFK=^PAK61AZhF zGUSz9Gobt!w&FV^Fn9JXHNJYT@r-q$(%o@MJ3t4rYk(D)!yYu9nQhZAltWTCq{a|b zg5ICb#Hf}E8j((~(w~D(r;dM>;`eGcdq&|SIyG5~$&K%-x+)3yb@ql;&Iyh73-$5x zS5)_LTa8IQ`mPKxsSQ0j`d8`@-_s2FSxv{MT3X`^OP_p1N+pf7HKA!midfv}icxR3 zPB`7czagNE^cKCFB$#iUC*;?6PMiq4x){|1DI>ye59!k&plA0OK^jfkKI~B`CHQ8o zKlHB|<-D(?hgrYCk?nQ?pY*~tsh{7JdAEz61(Ng|Pu|APf;Ou0NdXwG; zG<$H9b|IcMt>=P8XoepcxR?kWBv3Wq+hyT`C>(IfY&RT||%#MNk{ zXc)L}-q7`8fF^C5&<5I|f^==Am_Z~oct$EbOqVHGd{{sYtpVj zqQ*NUmpaV~KaRKn&>e--ReP=g^b@u90UA3_CTG0ZLQbNs+kW-3cc>2rvDePSa<6JHiLb)OC^<0^?XcB^PEwKyW_WU6Y_CXoiKDqV=5RVkq1gD z-}%6Tm+OCP_+DRp&1g)0;UJ8!iSo8#B3SR0_o>o$!H!b^p;!~q*|aH@4=clbfWixS zr`t&TkQY!k3m|MydZTNI*hpuNIgk;>X!2%he+rjdYckSI&7kTZ##Pje)vUy|Eb6Rh z4(K(%w=vga=5SpN_`F^TGJGss)sJ-^-a)L_~8 z6(rz>d#c0jdX8#4CUCTexzJFFN(WP2)=ivF|t|nl|bh)ey_arah$9o5oRudL~dy8S@{DOch|O|xDe0}&vRJ15$Rvq^2=!O+G+?-ZMvG?lY<(f!N-DRG?w!+BKXoukx`jlLs!8ioZu z!zFZBAYq0)W{%9YL7l22n3kqSxYCh^zQzENLkM0C;%`dL6|GfFU7RyoYJ{YgQfU?Y zKA`TZS;yxgEEuLoNw~(C&NC#CaRu6;nyO_|dy9 zDB0!TxUaRE>6-tSHqi?0DDCwR9fZn=RvvK1roDc<@Y*YTOtju7%V9ym6HbDh-=HL? z@fLNd#v`w!Bi@VQ&I44_kAX9mU%^e&e22#V=G08Kor!~8pDah=eK}-gMu!|LM<0`u z!3sVv^gmQx`&>tLJt=hZDEL90cGA$LpM)QsV+wACGuqxY9(xlI zAmO%$MRhUvmu`q5F!%Q#1U$ARj1s_B=+-5i#d1)4fh|dug9sQ4TVXnafS&SHZ_xxt zcS<;An;%u|zj2UyVyskm9hTf})qO%^8~ zaD_LW*={*^L`c7PG{LRrHRzN@EO{D!+HRyME@!Z;@d2mE4dTB z&9M@S+<(k@QOG>M4nDFtc?QFFO%zH z@4+~S0UAe@8)PC-;Fn37BGD1y$&C6%7)5^Mm=)T5&&26veCG-G-yP~pL@L3CP|A&M znEqs;ko0OCwZZn#Y{nlQ{y5T@H1kx`$~^Pv#>FXN5%>?B*}1KtS}2^=Cp3&Xo3h#e zJ~U}CJ1$@_k0C&t#7t5mSweaBVA@sU-eQ2$zDa%%_B_z78l5YltZ>noxyxg8s_HiQ zx2;UIMhyq!5aMp(ns8HMQX?}qC){IOC}+8xxTG$Obk~>Uc#xLW$`q37qt^iRWflbk z+Nl2aS%Wj^#tJtg7;?yzN9CgTjp=Jg4D8w)gN=8iIe;nQ$cYyX)+<$t$mKJLL7taX z<81}M5KtJIQ%qV{7;`@7$qyWrWNfSvP@*J{&bfYJQ8ZPV6~spRBjVwB9mMTUq3=DTT$ z?>f+MruL%J1oY&s`>zm7t9tGlH6UdEkpcbcgJ;i$%on02=@K`+m&A@;1U&HJxWMVw z3~$Hv#~dWC+>>Z!$UPU$Jk|20crua&xDMe3Kd7ygW}IuX9HX(GI3f%L>JxmJNiQ7~ zr(0M_&6u1gJbAQn8RUaznG4_ri&!{08X(5_0rv~owvX2dum?@$Xw?kx^Wc3}%(2Iq zMWiVX-nWS@hGNfw_lbkra~r7x63*IVMadcN0yBotL0C=MO8!AEH(iyeL*h2e*G%m^ z!2`1uB?u`$(XQiTF*m)aHFZCspB?6cHw&Bf5R|qv7%^?nh}htn>db9%BuDz;yp_Ih zdFamggd_+ZLS+XSxJo?G-8R~%Lw2qLv{@=Y?IW*>J-St(7j>r)s4cF)iMUgxxCz0^ zq!|tg3{^2LrI+m1FSsmE!*pp#YkKXBvJ-)(EG>?;LkL4P9#41!sonE-24Xa@|+^koHQ!A`%nXb!*U;ABix^J%8rc;MGiYxic?kjj;yp(k! z3TBQqn&Y+i&gwpRi>(FF=BgjCxPBu$gfN4Y)RTjWU#ch+S*yS(Mm&C@p%5~t-rH>% zwZQz(oQGN8iOZN^i-Mu1)UKj#;6k?w^M3TBAMA7BR_S1t;$Qb1F!a0bd?uPlTrUj- zvqEf!wh*s(Dh^AuWjR$m(=8`}W|~{Ig&*vou_7z|qumb8cb)6R5o#|Ox_7oXxO)um z5T~1T<5A;h$w@l7dSQt&7uby;ji)KIOTC7fg6AaO{b09b`|D>-5iGZWu&lUv-0f89J ziNKQv7+O8GB;iJ1*C`)3W9J*R>s}mqAy5rou3cfaeFF|1{4+A2lhaEnH1^2LzbT@U zFZ=D}LR%)8a>?C%n%SPz;{x-r0(TQ(0Dr-$ zP5O}rHpd)@yC@Z7XaZnq^XIe>%}3xm39)+>6O~AwGJ8S#O2-cNe&L4ltejrf{Y+p_ zJry7bwakRwBlX;dM-Bl5)3|2gV61tr1A`TeLYb<<%EEr|b2MCPkES_{E~%Z=ieGud z1cmj<-95Mez_2rJE0rVw-wTnK!)-co^aDehxX8xUnXK#cr8av10c|oW32M>}>IaW23rp5-z84B&6O+GuU0RzJu_Gr@e z3gcp|$r=E%9Wq%n3u{4}05@&yxdAbE_XyZ;3gB`p-XSE2HH=iPJ8rD&A&dox!TuDB zur}(3e6^YBzXs>LhoB-qWa9sR(f-tqtul@qtykd(qF&A>Py__;dLeFS#f~c~*+kHn z^4-FpCF%ED(SnJQ^OXC7ROSmqIdmikT`wqKYOcdhP^iH9qb2`{F%UZsT6kr1Wr?=7 z1UsB!=@WeNKiG8BA_p2Xxp8fg4Gl?4h1i2bsAv3c1q3ItLEIl!(FPWg$)~+Bs%|}X zlmbOh)GhZGss$6eev?|jR06%uL$3)^6ebCMO=rh;op9L`4qAlt*4VCiKL{+LUv6g9 z3aSg=?)D2}sjolBb6A2h#y-cQ<4C1t%ny@qJR&@r2Udd~dZwNoq*m~#0tKy$o2>Uf z>Pri(Sb4BlnMWfm{MJ#dDW?1vZRXL=tyJ|k+6vs3oCY@CX{$pvsg0H!&tlMc#q)m0 zaMOdQup)M1GPr#nXfrtb_u6gA-se*KV}j-MH%%9&-R-;3 zv2-uuA#!$L^q{C$Q_9X^P^vrTPYnbcD=fgUsY^9;Y?5>Sz z(J$+oWB$H3GPm%J_8gQd3Kg9qCozZC`Wg?`kvVB?#xgXUW#||NC(7OudFqR}ONLU3 z1~ObdhN2p|C|)r+X6%tGw^LMl;~@t#%`tp6ZZqZgyIuBozGQN#uz`;g;Co@QuGr(S%xe1Ix9++Rb++Ad`5Oz%Ska)$>&tfCk-OX|# zV;DO1#*mYzw($m&GGZ|0$FVa;iS;W;MdN7t@##E&oE+)|3ZAsUO~xZ#DmAqbo$0{f zmxEcZl5aUV_-~bKfLv%i*@Dlf;RwX(Piyo6+%UeZVEjN}P4l$#sPD8^4>1E)Ij z9DK_a?^hLhmMi|}iY!s`f_7L=#8970tW375R}(Ya^59P8nN;H3_@O zRI9nt+UlvxP>rgvFCj=eNe!(4*1@e#Yw;M<9|8Iem z>z^Ay4bBV0tq;uBrDHA5Wktf}P9>e9%Rn*rLzl|7!%Klw_nQ?HI3vxwZdDpC_P?1XlB#!(Jk_Vlc&crVPPLmtSo`(`5yfSk85TkEG+) zu$5m@+uKBHj;H6o5?b;%_cC<#!7{_ls0%Qxc4Sa%K1)h=y0*$bzH5VW%+q@+x)uG& zg6V#;{Sd#I-zDBO188gOd7jkS3(K561Km`6NT~J9K%C||If}On=b`<{(w#G2Jxm$8^v@LDBUVWBhyNVL4<>MU3Xe z8}w8c;eBx22x@K*`q@_(s}QnUR-<>s>+eWjkjl(2De>F?i*dH=+5TupVEa$OGR5;kJM_((~X0g9i8+@r%7zRLMr@vSq z_l401wEh&@Bmk|B&N_~g04zzMAF1(t&S&6FPpdlx_E>!$t(|jJe*6k)*^zoF@PaKy z=L~Ni7dKqX%@5g0I>rLmUOR)*uH4Edu#~$IU^DLicJG$V93ol6_!iK_okQ5@z{tDF zF0NHh`>FB6Xae+t3|KgD1;OL$@4e}(@D|#02#p~nQSca2im{gb`mK8Whf|B4vk=L0 z_k}hsA>O&}0hV&Lp}mbW2Lnlf{50;N|C3DSRAWf(T|wBl*t36qY>>_GF6vlbaMCuG ztIIfXyU0;%Jc6^>MOLVbTj*nk6(iM6IUn_~s!%m;^EKjieL)Z_)R1&n=RR2)l|bygb;5Xb z4yYLew6S`h>C+mIwK;yNL5>UaR%xFBvlNV5aICp!KK>b*8gl!J0MLeD;{L4R@fZ8O z9w)TcB$ZcFrnV}2NIe2>mOFDQp*7upiM{(C@}OEmW=IGr+-s-AEV))_4m>Y#803k= z_DDHsN<$bPFjIvCn$$qAEH;p9qdb-q5i@D6T;w%VrFF zK$l$ex*?w~`GBtBc{+BF4jV&#&;-QI|0>d>XXT!_R=S6=3ak~)V^zi!bKqv6?_7b+ z=`3;Jk(~w(V>+JoA?V^aHClvM?dDu$m@Jl)!_(XYM zLg``AM%~HN0~Ik8svLh}*wKUwumkq>3g!eV4FSt{PIkI@iQk)uIt}t8GM|0b7)idZ z7w1r|tuxs+J`!l+96fS*#Y#89eX`!ZdCEOy z<`4_N%_pnIM^Q$)yYJ1Wv)83UJjP#6OTmcbp@fjDT~>isEG$B=^URI1F3VCX9ZUJF zfN6?<`?{K=3x0VFC*uXV+J0{*-&`~Dw?QrCK!&ub1*LP3@XDY~_#_=@_9)n}4sK5P zP|w;R@Ez`&>8u*9G2uMS`s*J)72dG+;gn3!oTh21DJV}WCb!zNl+>kNJ9^A>^X}IZ zj2BVwVZi19i;hmyML5`KN^bX&}uKT>vx;+=|FZYy0GgyMtCuuH9S;B6TlY^g@6ty0J)Q= zIy&PehlS$s3W=I}lVhU1Sx$po_W(_3Q0sHEgPyOkGe51;XiAgx@nm)0xpLVlW~;P^-cF4D zOpBNCL3~E~O^l8o`$;HLIDK(-giC4lX3@mH(tU$J zulv7C_rNtuB^7&~LxPS6XS3FiTD`i0c!z*H`M5x_`5A3Q;i$Kv{I(IGy-*3v;G9TS zZUjI}aj5Q{+=y|Ee7H7^=@vGzO;i)&N~OS~IilbRo#Zh1I6Y-;LzL zi)Km_zWylv=X?rJPZVC{P{>8yyQBE?djUlbyx@(s^kW7Y6g(#EF~bw{7=6E={+Yw@ zrDj~-NI8JXPVwp+Uy?kWUeaIP_e^uy5b+|c5qZj+(2>5y`em61-JJb(2W>n&a&$_V z^nC;A+B<&~R1FYT)ViA1^fv;F7aq;cT=sGOWIx3URf(?<7&ncsb;j_KsC*~s)J7G* zYr4ZmGyY4|qA4NMFE=c-V7FcCm|}y^VK9AWv(S$Jg|p0rVJT>DNoX+?6xNvC zd>N>N79UP=``BbYOd7s>Q>WZ6+KD#+rwM&rAWNO5h?#rpST_>e|J%I8q@QZQ zC5H=uDlbR4cRmBX@ZpNJ(m3SIeh8r_9bp=J=2$xM-QZ(TdWT=&FP__JKRjJ5nnJf3 zL^*a%=6~eggMo|=)fk4LO!(`2OcLnfXowWP}oX3m|R{}pjHoZ~h_nCpz^TGbS4poNSA*b0P2Okfm6@6ZY0&sK%`OMr4^gTG4+MfA-T&hGm~TX9htrb2Gqa+RDqPMB z{5;w|*u-@T=qhC@l39X;Dii(hEp)h`X=R_m5D@yeo+?#l7LOdKSoc=Hk6n#ur%Z1b z8Us?#6Aq>w%jl{wjnky;R5SWc)E5WrZT@2>e)>?ZWk50#C2sg`Lrdh#uavFEd2Lrp z4_1Nq6mySFK78S}q|c`t{@Yvs#&;-`0(^erE=5Vg+k)|MV(!9}IcW1(iHC*XFCa{7 zNmgzZQ%a7%qYNfP2sMcTkdM|!oe$&3EAhqT%_nW4UAK47xjqRF<~$$GK*=F* z>uH&!@gMB{*adsfE7P>v8PX$`g_Buic%STNeiEmG4h)yettQAFx6;uYpl(o6%Wa$W zA8+M+!*?y^&Y7v7L(^78%J-7>e9Xy5?F+55n)Jk{Uh=j{cXhp2-mBJQ+~E{RS+Su1 zwl=VP7dbv8Kh8VNfv?1fe1%3n!Jn7$!L6&`M60MBf; zj(qtOoPkoo4bdHc7M6cuUZ2e->S?d~0_4va zqORR0Jv+ig7aZNXSx|rbSa>E;Xz$eytu0G>(Q>JQL#vg{|1!i|<>j$8oLNFB*VHEC z$Q%h7^H)qYyy68x?- z=fO$WSE}ga@4v+!$cM{#qT6+|Wh$2M)SJW|(!UON%I-_{N}KZu;I0x}@r?CQMLX?D zG`6-jYt5iy$FLu?)ISDlRnd#xIeI3KxjvPCoA}FcV%oDzLc)L?xZ`j`leLSsh}(zmA@woI zMbPi4PhUaB+yOJ@%~4=`fsQWe3HzH*hGe+SJS((6cyq1~D&HDh%6|CXCfe8^T(PIl zq`MJC4NiR4TXTdg^&UwRH=n+gr-~V0PnUF`B=qgc{M|s4UD%Wm&uZC_8 zOe14OzRPt|(yh++a3w(08b9U?sGY=`1vwah5V-QPd@er(-|MnB!2-PMibWf>dNIDK zOJ?+M(`QI%1|?jlcr{+KxK<`a(XW8%1Jy&bwJn$Bm`6KW6(5vt?Yuq@J$^SLQ5@mM zjgIHNim|bYybg&ui&A*_NBn9?&t)l;epq@A_Y*|Ox1G*b7y*=dmf@eM5X!7=xnsDJPV3toFHu@NNyu~S9zRdm&S$a!0%+=)s4 zY4;J8BiRb2yvWbb?wPn%$-%gnTDrcAes?8kX?i^Xk-QhbWSI?1!*A%x2LG`|A;!d` zTmzCKR_-k>m_80TXE11 zx!`o78+oUhW+NLJed1EWk-o}kP!6d3*_bFVBo0fE7xmNTKi9h#A zMz>jdE9d)9Xnh=S|5g-S^9%Hpd*!G!qwDI1SA58e_Ou*4f3%H{SSfEz63_CU&{O^- zkN4lWJu4?!+^}4R+~{7c|LOiwHG5UK-F2}-ba_8q=#W!mwO~mHd9dVa?1w%R>6QDK z>uWm!APsiiXM!s3zO+bJMq3x2Qa7(O70*L-Wm|6ZT&4KpdNub6nN$KjCD|3@L3bGM z*l}<|GB}W7k|mULHV%-BT#B80{9?v8bcSwqN{ORno}k{LKTa5Y;{eD$JAzuMu5>ydg?V8y(b7u=Hh3J_}4T1h$>00P4hhMhrKLXsZnDPWv&Lxy_L=_l&-8 zjNh)wrs$Cspat#LN#CqK$$^)~X<9aWtP^H4lKCu@bQS?}LKl&xKj^&ghgx*qPPhZqfL%2p=4mIz;Rz~&{UJa{4_k`F0B-j z?RaxhqBI?vv#S9A@at?gVVB%YJL{JCE;PsMdih&j>EXlGf$Q|lg*}GfMLJzK z<@l1_Gp-9h{&abojqAfL@5o!--kB!DI^%;EMCNg0LXXJc9RhFL20)wcOYd-K0lDfg z+W)H?qGn)UWD~@QTGHk0i~SQ140qlrtdSo7{v--8N*Vy!q9Q-X z6ZO~{=ue@&rNaF;OWX1_`X4W%WNOl+eYdB_m`*tB2LK$EiJ*w^H(sRkjOWz{YLifP zd>3!oyOS9fc*L9B^)$5iJ=g z*AczO$1VpQNy08%BkTI)o5Cdk3cc?>YH>V_t$8s~6`ia;e3rcCQa#@2HysbDd>I!9 z1lsgho&P7ucBIxcAufAqf)oL5ulvQwC~?WDnSng^%bC z@4F; zSBg{k6YF+ITrEJn;M}EsTer2Wy=Z|tnJB9=MmiG!PE4OT@65F&Kvur9)-$_U7d>+m zLXdpF4$gU@V9FP{0_m(BDAE*}*hYM^J?Ll^bNrZoZL1?j%=f*ic$q|M)s3++zS`VK z4~hrxZR9zY0AjesG$o*d+#OLTdBMOl{bmi+{D9OaQ+m}UbPS^4&36b)y0Fa zf7jD6kZ&rb-#s39OH{wS(lb)(N*mA12;UNHx6?i7&jVY>7Z|yW5?&^FH&IJ;<4d?t z3NXNs;o!s=&|xUdCPN1JS&Li!0l=$q=gOxgt3ED&x|wUVdeX274AG%q{vW@7>uL{A z8Og0vFz_z5g|z@!z`u=o?Zw|5KegST?Bal@Fh%({xJEqx{ONqv8Qhr z;haP25O8iE6kv*R4}6xJ=YOQT3I6wUSt+l)K;7?Pa!(N>&K}H3U*m~rTgZw9OPLvU z+3Cuf@;x#Q{L*W9OjKT%^k%uXDAoP4!QO518R-{*1tb%<1yon@jfdNdi7G7#FU-5I z`nMGhlYFTOyW#o498US4pPuoNY5hBr0amA&VcyipdN}gH;1Ez^n}6;Au%m9ZaSU?r z()rX6${R0V5NSv0&J`cnZeDrSC9NZKoyqk(QRc4F>(uH$ku%Yl0;MS~^HYTVH0uY> z$MpG~T7aA~3CO(vk^Ua=2=_QCPZHj3GHK+tZJCOr!}3L-E5ARtIhWJ=g@! z8=bTDQc7c~0Q0iC5D93Ri-fHP&>Fd_y93C!27m`7|8Si690D%^-_U;g1|ZB;g$3yH zH>qFk4aiObkf4QJl|&7COTMPJ42G;vjH8i#1@c#;nWT!E|JnW7n-XT_cV5F*EBO>4IKUox0&6#a^pKn=SAYQMjP-y7dF`f$0clov3z00tP|0__ z^$QB{tCABm73ll_N!*D?U~-42t);2eE*SVXY3vR0)3wS(G2aLpl!Rz9_yI>)UQ4Ww zLBaZ(fnsD80SD#ZLWCC5GjD%=HvPGX z_5uI^#Ww8riSaX;#%k;QalJ&_A5J{(+S|1|T^v(HYANKMJ#e|VY1iI)-U}Sc;@JHB z$=;Bfm?X>qpbky~a?p4{U$Xp8{;wlCUJHAM3$#ImpZw>nI+4P=L}<@CiQjACZwrq9 z9_i=LD>9cK9aP@_GR$ifQJ8KXAyT2~3xpcmt&M8|{|@tiyBVI48wudQ&ufxm$N&2< z|F^OHkInr5{NuTF3k3=J=kkvK{ptVSF=Bz_f1S?%pZ{xl&H@-q)|jo1<$sL!zqU&j z+Co$aAZ{qu5!Kazchr-*CQXM|)^zwbQJHj&NqGMpf=G{;$XU|M62q z-v-{vl)vGULPCq@=ZDQ!)hWOWJ4wOc!}H5dToU62$bwAyp6@aXTXfwM-tFO!RLd1~ zj@s!J7*dIl%KG(WE7F(HCIQ^NM%NMnF3=PZBBJ*j`}}+XPFBR;V{|f53txT1yi#@+ z=cFdFA9K1Sbuk_=NCeg`CYVQ*E*+T8K9M%b0``XaN*QyWWSnjn8SfYN(7%+x9B2&i z5zFL`+%rF703j?N_g=x#TEJ}$obBaFago`;1MgfOYR->FurpMzLxE=UWGS z&kjI%6Y2m!xa`sr+6DgWtN!Z>|6d+!MnCXIn5JKT=(+>w)ydX4$%z2)EKURf-`wZ? zVgHdR^0I-&oxAmKMf0|#xb&?Uf10_50F>Pt`1@X&=RayORi}_Hp$y8i>65zfMga67 z1#_#G|Zs8&TN`pqj$;6Tibd$6;td(`6pecdFzJ1tg>g0oiAR>I&+%oI-H} z4kz;W-&(YFjTQjPsi`~Q&p(J}UL^psE`NbBy!*#Cqjwbk0_6f1yW(n7_a{1xa#e?d{4697l24*9Ks1 zt>LQ*Yg-H8;IKr#Um{x8X29Fy#RsfT31j0951FuGUWE>=h>uIbWD8q*XY3!HO7{S7G3=5(vHBf+&V=9@KcdJ6gZ2B@aw=E z;{Gqn-a0PIwrv-E6eXlY1P194L}{c;rKAN#q+38@25d_W z?vxx@*SNlS@88<7zGwZbf^*+<#d#j*5ofO>=SvfuA>F|5O}h@uzkHP9K}?IFEa(3( zAo;&LC$Uw;$F>^{LVPJL^Gb~7i^?%*=#cUW^+9C&rO*x0-N!d-oLwo0F@!fy{ zb3uNY=_ZiQVh>tZ?-90QS)1WAo_^cD*KJ-oa>6M%iT zky{&?f_VryfpF}B1WHx8-}L`38vZx83Ao|<@vuea%@_ksO_xVJ)!TI&Zyo=0M!U2l(c|FUA35>{QL*o_=FD~J!KA>C z@3JrE4_Ec{0@k^Yvkx3{f(M#d|GUE(t2GeL<_uh+{p&%}A3yp!Z<|c-IqF!d8vNFP zb{EK<`3W42>GX+6|F<#Ab*59cxCkk;ob$vi#RG?FE(l_niMJ%i7o1~R6c)<(X*j~7m)(+}>2 z37_+++0lW0FQVIOz;+XR)r)S6essmc2H11%lO+h1{4Jjzx=$2z66AZ`l~nTZ#i-Hz zO^GIKUT-7|BoU-Kwhl%`MAU}yG>X9Ke6P?9SF5NKgz#I1)4^acPWjVXb0$Xze*%lZ z<_bq)DOGR5ZH@KnQ;WP>7IrxWH|uXZevL8g%`t$)#%b3nbJw!QX)AO6ik||(tNb_| zCt;Vzz)+GaH79op9dU5Pjh7I$`DzkN)l=f*1f`h~UNwEos-A z<{G6FY?Jti#%%u+sZ?Ip(s7#)ct`uQPM?HDV7PxW!8I|P2BO6#Y7jNgGR*Y7`n*ta zoy&%MFK5TV)L>E83i5f||7#Ye#Sv)!d(Cej@npR^?cr;rB~xNZ0zW9AZoH}rd|89g z?r%Ojre>L##q(E1qktf5VO9P1{+F_vTP-};!1el%y}&#;*?!_^!H@KCyc~pW*8+^3 zqnnCo&d!~r!5PGJN3uDB`ludj@^245rYqLx*L6`uYzZXB#z$mTPGRx=2<5UHDSxyO zGdm!t%p-QLx2*fNCn>r%w;E&uCL88vHLAE`_jEBz$$ky~$i&KD0esZygjNSp|nUFJk z*Jm{E&G$Q@$8%=sf3=mUXe;-Jhu#ct?} zNRKRBx||WO^&5?K(`P6zd+LMK2s#g6kaEUBM2ax+ij_JVX7#v(qM?)Bh!8U z=+ON{H!4otzXoL$Wb^*M(T#bEM;D)nghngJM;0CnB?c3qyLXI0?gE`7TPtZ3wL}#t z9*>K>Gz>Dl_dEHZj2e(x8s>;o)q~7cy1s_7CJ8!d!0;aT#rLIvO7#JXOS`aDakloH7E`AH^}rtXde3LOPqGbyY0^)ic>Al@M>*zlPm1t(WBpC6}1`Va0V z;j@le51e0Cqq`$iO(*SE-hi>Ckc6u{bVt#XqyQExfV{SB% z8Kg%EPsSo8OHloiQS3{LlQt6lX5GMQ_+_59N}8CUF7A8A{ih7yXWpZnqBe8Xk$Kie zM+k=j7;E3|I`OpG1iNHhd9X*jFY?SMu95BL_I;KIpM+Ow&jFP2P-EYw$Xl+j-%Msu zV(2~USqiFO2r6?HFMPenZ)1N~BKt0e`n31t`vLw=*s`99=yTDA5^rm4pa87|PfDw5d^MJ;urf8XG_o3iV^AENHf>=mHR z466uWT|18`nC^}VXi=t1e-Utjh(Dk)Og@&9n6=ucu{KDTcHfDxpT=6H<5BQ%y5BgU z`EYu$ix7Cf>=@=GGdxRUYL2?IXUg1%336P);+DC^?u{k$i`3_aAz*>@DCDY4+_;kE{q3lHxJnDnOn7 z5^Js-p0QU1@r;UjzhSu7`!QHzH($m*jBH2IWq~5FU4XYQI;4z&b$n?GKT#a{EW!)+#is>S_OY} z`;^|H`I^UgJk6wCS3HNuoestm^|PBitx}7`8vH!rcR0YM)FmQeI^Xi@bK|p&y`U!~ z)QWx|c0Yf{YvjKNjq(S@iSIIB1V57*)~Ubf=U#SD^#dkUyTsT$fyPOYYpXL=dg9y> z@F&;Fw7e*6C|u}N?B?1hZ@j|Bc_8Cb0iZX-Rn~TAU*I(KSYU55g#8uFNJ9Z8r70v4 zOTe_V)}10NH6*=PM|B5vv3edYOw5_y+DaRGec_`E8(}xf$BZiM{PA9gJmaEod~h%?IhDvxqo*SKJxzhO?4+*0n|K1 zAaJ)QXnH9AVa6V8JtC!oHKzoNk0<|lJz&Awl!R=KFOwTws>9k-+Yzh7r?d!49Cf2w z;GD2~U1(m(pP#zK-6JpEfGOuP`rghiHrpGJO|j zZPYGM?Y_!F2iyK$@xZcwGhD2S+D+KR8RP^%at0UrP*jc)5ag9dR{mA0t)2g2PVBjj$J#;j{Z3x?6ooVD+ z=Jx2ZmSMZNWZzz>xnO!QQs*?-TJkmt#>mG3E`{gzCmRuZg*qPP9m9Txq!~x}r^1n=~m+*nY_?DwIyW^1LpfBv6^u}P)(va0li^m+aiRv+z-(8nK`d&u|@-!0ympNS_cc~S08pwytPlf||=~J)4 z%%5S;%|)G{5}(TlgD+y51~wZ(r?_l!95icSA}i%(7FX;h@!$ZmN__pGl6vfdugc?I zyz+rB(Xm8n-GJvm>5ZZIzBxa04B&iv2RB>}5e{;Sf}W^&%gJc+Ni)VH1;o3FW#S$u zeb_a%#0%A!K6fqh^!b9*$i04=t|gC`LO<2<`#H?QvRkw!=~2o|dyBy&FXeUd?nC|B zTorcv0JP^a_jlZpWR*>B?Li2Wcsc8L| zHd4JIm_2*Tl%GST$=WFH{e@$_*(!9kj>%sf{VN`?RznuxPTKU_*p64DG;$@y5;kCq z6hD2+aNqAF>vJXYNh|+rFP!?wIaISF5oy)4{Id#aQ>hXj*pH?9MSr*MBz6N221A$G zG5$wHmQDW-S!uovxqICm!M`qW-4p$I`mxf4vDtyH(B=3GLIM^XA}UrDEA!hyiL-Ya z6HOQu!WGApVkqSA?x(A^3WcC=+~4YF)ZE{r*V^Y*pmhw8pdSkIHzmp{li037@0XU> zhMIg}o9M|Uvh5FAl+0N4@FI1gD|y+ambwnh00nJi<>_9&y3fNt@wbrAErG_`mHAT% zRtq@ck9gHuS95w9kl(5lO;-tmXfbaRc>mn(;mr)C-fvNkl`;ve4&8?c!k_rWv*M>uTdmIr=SEP9q+ zd`$abVoBTX@a7*tiHgCvAiqeE`@!A0?Yj4tIaoa{L0fg^@f`9_F}YcqwKJGEnAYj} ziD!>j-)}VBh$-h#D{V9npaWYrb-fnHs6uFDy-*P$nkdYK(fh-IA&=5PhN9kq*}zGE zAocj21L~5WDA>*b$74l?{)vyJ5?>XP3K?tr^lJoPBywcN_QHy-0lm~#H(%f`smTa`(n%wL>CoSQa`eeI@9vX zfJc8TaAd^1;cb8Z$~~fRmqy#}RkkF1y8{jQwk4Qvg*fI5os8hR&N0U`vuj$=%1aH} zLloL3iAfzzpQ!qAXYD;EI#g)2w7_fX|Mr#N^y@Fe^b>gN4N2S4CqyXh> zpXea}VY-qx5WeoDF0GRSJF_gq8g!02FQFsP_0j?pMzg5wtti1LwqQ4Xf=|ewRSyC! zD@}}Fihd`?k&*GvXYuBA8N>C?-`Am`MS+lqSOcV1)D$1QIoB+#_XbacrZghP-b`7o`W390y;>Sa5zqZ66zxaaedkkG@Xu zvT`jI?{YRGfad^`CoB{uK@+2&Kzz)5^{~l8K&NYRBeX0y8iMdI><7JY0yE1-Rk$;|VJh~C8p+_B&V)9KYNc&6>S#Zm`k=!Hx zAy@*_0F@qU-)NX5Mo%%{dMInpsPpLU2u4e*t`N zQRHp{18F)_NVuJ;E$eCgJhz$8XRkLTrv@XuFBpOgBW&)|6?+yv@Lf#y}}NgQ6<1&GWmbRER5E)EX%JIY|+uYIeu-h?QJ6h(!SM;YZ?j45Q%gQS&M zEhGgTH($AjH@X=_-qVoxy}9$ccz6pNjrH6)^P*XXF&&+a$?kP(?`MA-{Rv4&@F_-? zEMslOy@)eQLx5;F{qZpv^bnCYtQAMNW^d;^F>4{#q@GW$|KX@(zy&8k->oK-_b+5= zyH#uYpQw5%2abiU7i8Fw+M4*9dOy=eKZW$Ro#k7J=wfr+xQA;x%(S-7Co`B( z7=FsMy)OlO4W1rh36KmZ+`N3YgO`IPeS9+WZu&rwzv|)*m;NAsITV~%BCc+O^p56I z#UaZeH#5=R{T(pq87jNqqrQ*t63MGyGHY6@jpQvYCQ+hh&EVezy!UEaaiW38f?I`R z#M=sg_qJlgFNG)b)TMW+Lu_MLEaYdw*9_*$q)H>VO29SFdV!sh2YvXc?k-pLt?$+S zEwYMLbZmjOj>!Kq%lrQsK4GE$Gyf-h+*Z;~sUNK0!KxH(4JYaYdurws*VD%3h8X7|e z)i5GHfZT^S$2ZD193dHYS<+qj*v7J>a_Dcun;q=4H<*ugdmj&A?$gyS(B0Ny@sTqp z)Hc5Wr8ehN<&p)BigouFDM=0gdYVWI5O`zeJX^K0khbMz=RQd)RaKfF0jWkmzB(y2 z4)}odN|$y^GbN$IaoAv6nfCtnM?&uYl6@~9m_cfgzd55vB3rA>hvUBa7yfU}_Fc1wH z2{1_qKo$%|n~1<7QF6r?&RHc}U|reK+`wfhH^@$aanjly4S{8g*F(_x9G?gEc93E^Kblx9Cb3DI_2LLRFE?H%K3IMh^#j&Cd= zzxO{EKpIcoY9?(TOQgs%c=W7I^2_W;cWOdFuk4|0_{6UQ>!5V>VBJkj;yCk%-~a|* zc?dnj_Ao$0i_Cd2X1WWk7}yl5@;xNGlg04|f$ z+&jliGobu9J@4n;T`w%@opr3V4B!3CmipR`2Db~BZVd@@ocB-&=fr=x7{dN~+pup} z^TAQwPUE@f%^i0hc_vS@vNes?c&4|z9emN6QbO&5Iq?QG;}jk$ExXG5#qS^wmf$s7 zCkZxs0rU7iH_)YGes~Pt4tjE{$s|FwAN5vmzNGnS3$Sv@p@KUfvh?mZ0d%BMDPBV5 zCD^&4@e)g2%id(aOm^e`mTNQ^P4!4S^&HSWWq5d6&@Z)muP>;1^KE{=08hP7sKck9Gew3a!Y1Ra5rtsVbY3#f~rq@K3T<=9rZ3v67AK_?E-@)YG!2%>K{kKb z2xoqX8MZ6Mu0V|t+7sueX<^`FQOieQF+cP@PaQZ!2_5=fn{1wpfJdU?xflGA7-ijq z2yC?z12AW`YK{xquICXv^q)j@OR22QR{GDiG^W|W=U(E%(3-JtQOA1OF_HxdZ*BQVFXkB-8=O~`ib^-b!0;sWwFe-aVi30QlJ`tYQzOwXzH%L3D|5528 z!Z{jPoW~5iIjP9phscxfCX8}aEW(3G78-d6+d;F> z%(#ZJZauuWNgZviHlzvq_F~RNkF&Y5BOsqYxe-g?Y5o9otdguvTUfPf`JO7ueaEyS zR#mO5!!eAOwA&dq3;u79KOd8Yyp{JL4_uJqi-1%i?urc!bYYK5sxHp8_;F`ak8;fj zFt5#QU8VLrSm+SX;va$8dPXZvJURLn?3;#sjiu<=)(}hY+^b^QuTmC z#mi6-BMQM~Mm{%kzu|F#&5=1DqTg|65O^U&xmIQYCd)K%@w;oL{hwFF&8x!6EsjPJ zcic}}&F&&fI!G@~`HMLl40ob!GIQsKzv}Yl-Nh364-th ziTcc(;ai}!(`8$bjBAwD^QW_;)?A0l4cY*Ki2iS9m-l`Pp$fW-1aFuMnGp6;qqhz5 zs)HGPMZB~RNntG+r5$fs{%zqKL;-kcDz?-_{o{8Zo|z3j4(M&XEb84S`V`8n?l9S*(uyqBBEq?* zMI#rm7Ec|p>RTw;dGs!?4)>iMQr5g zvbd?OZah`FI_wQ4b)Lx^t-zIh8R{0iN%7)hfnz&{8R~mqf=^9&*%6%Bb@MFH@em5c zLyw7W;1SuqKgm32p3bqA+iP8cxDw89DTfQf9rkI`^A-|b4Hh}uwF=mE5ctrR4hy*E zZ1dp=tOIG=09nDG1ZH^i8;_$3aJ^NNd46dtboyE<<@;tK!9WyYmwfo&an zFV6(X{or6W(;Qome$dCY5wAMwD&dpkAWV6Zur_UOySVrUEzuFrnzAL2C-__!ewtI^s; zq5jZ4JNa|lTWInCv5(e)5)n=~?9eg&BaQaSKNq{tv=er1L#hj+gG0l~6}6A2`1?MZ zP@5tEx15U^R;h@2Z-lFmZj;3+L8$nKi2j*U3^@nCT9^L?xz59-_(y%BlVqfdTKHy^ z;Q}&-nd*o8p&H0FP14v(b0c!X&y+BH^qcG>OMs`)2BueOV^|6`?NB;-&Sl3;l14g$Sx#!` z$_%#@GHA`RokRs_>Xi9-{00a}af7);xhijgQ<|7df%%BZjj*qwF&)2?NFJi5R`rZ| zOk6~FRdnU^eY6IHNOSyxe1u&&Yuaqk1xRW4oQES@HnH5O^=Bmu#%c|F@37y}qC^>> zL@HIbKC%n$A#e=yiI!!~KcF036%=MxArl|iB&YM2p3NEamF%lsn1{>&F!%?B*2?_#ro<(Luta=X8%ZGvJSApBn#an z61RV#{^>o<8l>G?CVu&K_O=VS%d%4eKeQf4y_9`7RJh2=H*Q=tng2eTV$yb0$MUEC z%O<`Z*fonFCLg+VmWbrHDfv$K;`oe1zgo#4JhJ%zDy;m%OZ6ODZep3ZvA?d|I}754 z6sp!7{rA0iDtj4zuJ=lvZw_WR%s)dEqE0%0W7$UyYdfeXnU7J7MxN+#SQJx6^iLqV za$aI)VxT6Eb`DqU=B!Xzs<|O6eB$KU_lCsj&D#FJku|*4;#}f#%If zSw2So7IBKDl^?V;K4J}ZGV)vc>LQ^qjyWSnV^tH&AZD2r<8O#43smyt`HGc${a)XQ znE5K)U!Z@yM^rN)Pb2Er=8Hj-^$`V0JQ-?aLsO5l zWJJI0VM2thKH@m|GVCNYlF?9RCuAu@x@-8MqA@buLZzs+L|fl6@BlSev2Y$m|MjYd z<<~>X*?$YVHDUpEpHHKAg#H7|o_+cU0eSSZQu9lV^ymPcQwI~KvqKXoA^iOrKeUuc zFAJXc!@a6#pHq8U+!G+$q(Gu*u4hI5BVP~M!7iI#FKULP^*5*gZmMJa@h@K%<{$ji z^Rh;8C926*zlhX(4riNY_Cy|iLOFna5`T@S#vq-&mmzJv3MfuUpw^eq&w7-U6>pTSh25xR%%g)H3Xx?pa>Eqcwp8^L%263N6YSa5dj z8<@OgX|usTp{;3376H|FWmT(L$@bsTa^h}bDdn$zOP+K&u`UcM^UTyRYDM& zU&K0-am^UK`&o5xGZ>B_daq55Hz?qJISlw)hW3D45NLupXpizeWWkQeg?gR(B#++{ za+W?ppTr4f&P(a&`R*RkgSp1gQ@lQb*Vq4T$wTD7CkIenBo(y#_kWgTUid!>1hatM zI1$sTE{&^e;CckC2<{)bsrJuLJzc z=)XFeVFo1UB>Zo3!vEQE|CS#8 z|NK-z1X!)jcfoY8ZP8;O_jAj649>8c)0K>-jatbQp6fhK7v*T^+1d{`z@RN`;a9_l zoh*cHgKXR=pvVkO1-vOF$Jju}GQ-tL@Hv+0`!>l0SK&%9oU*($}HIn~Dp-?!TvU0}PM<0JFq5IP~X9OS8XZ6^ycIQ|uZ~pSusH^Y=KBS4n zk0$WGY!Fs#F^{%was(9PHBWoezpn_wb_0PxOoJTkNo<8uZ|l4%a`QNmxY_w0BYQ{c}ya{ zITr~mOfC%4Qn7?d3`kP%%`5^cN9-xso#nS_#^8+Q8I{ixeUL9<+y$L@rz8+VO?<-M zvOwjbUjkEs{k_J(KxzrwCDj8NmNeh55DJXJj%Q;1#eVmUjnTR8F~Hyd6zS864hWRB z{bS+7#kWR4hXK~ieHL_tPr|tCR%=G7M0;{Ov1*Oi`i||6zrErG1bi8mMrj($kG=Fc zX@;=WyPkh|gSx=cxHSqG+vA0UmZ`SoA*n)kx_xDvK=DyE1q$Et%I$_@!)owfC+1Eq zF3Bc1VwRKH-e^a_lbo@-9Bj({HkOQ1^|?1#MVY6eQV&Z6)>P)w63kz8SiS0S?rNjG zM&D+f6TYerK-w%X&=^+i^OQDUw@kH&cWR(h2cy|3lUtSpWU7iR>3hUf*L&Jm zsroO^cXo2s(!}&{(#{WtyBX1#e;uY;~0aPuqge#iyAT2XuCz}YE&EuDvIAAp# zYvpV@RooMv%OM1RoYQAWF`{L4bOYV*uOdJEF`K@SIP=kU-d&NS_VV1NnBpDH1sfv_s$CCayP zaO~!)Hn$;Ubq9q?EjFz^<&Uh-35a5E$AE>=S%94E0mytj8ZD)@+ZwaZh{^JE;fD47Zxd-j%DFIGzuxZrof-Dh0hLFANrNMQD6`sK# z`I`-LXYUAO#4@pJQxAGNo=xO9XlYW+hQhAaTkqQ~5aVpfr|r&TDbsIjf-&Fo7F-r( zHSXC-?2L>}c9>=6I)drRvd?cC|0o;520qvHV>R)u$Hg8|PHz!A7-&PPeVug=%~Nwf zU&qQTY7&uP->5JPvpV77zNHVNOtr^B{t?k0Sh{h}n((DE`aJ5%K5PeU*uhd&!^qI^ z^LbJ~;1!c_>Oa>HO^@-OY;1kWlo8m3E}s72gMU&#i{V}ozG~5D za7y~BIJ#z|x_GcC`>THR>e%l}_z(D&MORE>g!R4F@MiKS;+`@&U{NDG_vK_uk^)G? z{?>{_J;RxEmJ+4x!yyb|X81H(Bd!l@A&u=qm$9cCyf)GfiF-wnm-;_?D61rI$lIX? zNqy}|e_!Ub`KBE{D)zLy0PD%((*Dg6l8^_+%ac`%cZ}jK6>_NSMSWM?I?193zr}b} zlK*vatZ?*(RTz)YnsT7ydmb9#PXX%g!>ue-WeqV3>yC4D1&_Pp;F%2}%y+Y(q_3D7!_ zU}&i}BG)*5LrBEefl81WeW4vdz3f6sc-AR^*(52GbIuRjrPJQYcOpVJRE5Yby;Kvm z2q)!z@9Sjr7E%L-=I`4>5_|x0oc_SbXq5Ym5%hO%X%W9W*)nW|&BMsRrU@ZFaElR5M~@#0T)Q`+!&xY81}K*LIW5#2<%-|14KLe8p=C=#45 zL6_5%H7+h!Rr_>1@8u1osyhX;P}&>CpK^C~X=;QAGG2DsI;OJlt{;36I{78)iKcQM z226}>SI`CM;{kQi?X>+_RJVI%TE`?n?pO2YgEPRG5>@C?j8>9@dJd)5jfNx%M_x}h zF>ZJb%EfvosdJ#c(c;;^L1Y)?kFkCQUNC9#m-V(M#?S8IdM365JLoq#$IO0}qmXot zlfwFcEQ9>bCf+iKB|dQDeA@^}kp|JJCdO2(6Q2MJgIhat4qc@vr16`#lxCui(3Ha| zQ~da?`gjf4!znouqNWimL4oq|#G=U;nR)lLWSUPJ?y2Q}m-o3=ZI3w@m0vzvWue9cF+Ncaa#AH7wUoqL8(VvT+{FNCnWS{m+~zc3>t?C23} zX?mzCbgEC0=o=saZ~mk4b80N5W95&sWD3VRIU^Pm9VlvJ<1Xs>=;FMD21q&DIz0iM<3;T5as+P_lk%92M<$k_aW_`Qj7B(GW4>w3^lal%nxvp>87Y@%4&4$OM;~u>-iAsDb3RVb zI2-=4-{^brTX;j;)#&4gJudc}f9NOsH^5;gaEI0#WF9k(};KcYQRp0Q^ zOci`1E9iXhT1a^99Fm!ju=&h=jvxLc{N7&cOW9c<1=@F{K(~lJRx;~0kLz=pEZZst zpGhclWEB5>vFc()q;&F*D5c@3nMmV=Rpo8tIKxp2Y5|2ZVvC+&(xb8fj8(|qa7K66 ztn4w5GBjy6qR!hs%#!Vy5nZ24f0YBna-)fb*G+$ojA|81UqN`Yw!16;Z09}tbi}Wf zCIB>gqF_M!vE&@h41c1dE~C)4CjT6({z}J19Ltjvf;xB8whzo*(m2LTAfSGR$h4sQDrQZ**B<%8$2_R!w~^A9We2H0sY`l+ZmG9^+cn+{IHC|| zy=}q-G`EOLpHKAhOa1V%OtH=_O0#;@)Up9AbULl3L2cUMvDDYFu2s_IU^lsZ`9Q)A ztBs_wuW8{b?_j1@yXbWECv#LR-tYvzJN1BlAneuHf=eSJO}%j|i{aIljhNcH zB;+et0KL#&%knNi$(1jLST!ho6p3iL;|UJfGQ1(o}%8ST-_@QeC~X2W$ts>;*R4bg@|n^mg97|d zmwH#)@#H#IgUiXYc(Vrd#LK-5wc`|Y9y<{N)0`DOv7H}_|GDzo1!ceG$zH{0qqVG# zQ^0@fV0si?e>efeaou*61M&c{`tnGral^yFx8zaA08=#P_R=({>-ANF1PsAAy6uj# z2%;R46S?Mco^n+uz))ySIw_K!ak0p*GnM-i2GnR6J(x*mP?%e1F`-T&`E+hUO#}15UNqk2Fd|A%~#rT1563Y4OLHZKE;RKlH zR~YZ!^~{O}>ZLV-3n`$yTXgirf~3yLT%=QT$)NSsr1Hs;>|guFZ16m|#4Bpk+}lL| zy3FS?B7?gNGl0{QG&6=lx=Vs)!D0Zm0#{)#ij|)!3oIjT+&5Yk+qMKMrEuxjZ%hJJ z@Z@{j&B-nH4D0!8?|un8MTn#JB6l7C)T^HAyXl8h<}J&-mtW*{CPIvWbB!H74=V7k z_aDISp0WcB)QK7z<$DgE9L zdxy8f$Ia`vhqz5b-FGkcMd0O~v_%Z@8sw72zkem)^@VD>M#v5QqD~8HRCxV;bw)&>3QAdIZ*~Ow++RoPCtCV$?ShXRKsx~P9SVE1HU8Xd7wJNoO zXT$p|p_K`j75&w-B915F_sd-VLGh8+09|d3OHo{B%3@ykO3|}*qnGI#issb~XYW$^ zx#gsO6n#=kdpRcj{0G6d>Y?TSAlh_BkZ*x7r$}1)o=b7h%Qa{}yY#6d^2{h4;ud|d znz~w8bE@gSRC2z0^`H`*990v`Ql2VLfAve!3b}#WcC%woL$l5MEK2?g+i< zTHdJfTJ7XF19XZaph57DM_3-$jNk2p_Xg~Uhog&(qna_vo5@cPIYuQy@i8UZ1`H~7iA z)NNQ&^D1;j{BRtr4UCjW;`pWNTgKiu1if@&-NDS#ED|mh*_T@aSkAJ4-Y;rA z3#=m19yhp2K#!F|d(>VqlEJnvI`^zm|L~KR93*(H-(gvofk?Etqx-Dx(W-7lxSI~t z@5Wp#DxIN98Se@I`rVNEc$wXOfFBO`h4cBXu$LQH{3G1-COtTY-?`nT7DjmfiFNpu zT7P1!ybeyNNPbX0uh8`c`>RtSas~43r6yx=dFu>U(%%p?AjA@x}C4gHO*1Tz&6m zx6Q97AYg`udlB?_Pjo@<*~mb-S4s4;BPPgn%**2mN|Ro?HX1oGqRx!GVDq{(Yz-WX zd@kubJb#?KKS86hWD!X>(H@gfC+&r;YcRs?1wzGS$w$I1D2Wq&~1g|tL$E7x4vw8mMl1g*E29k^1 z5i_fcoLww@FWniw5D@-|WsC2{@yczRCPu?ADr5Bp{A&A8jrI|b2gJS=I&cQFE#|fz z(UhKPAtgP-13vKtm@0gIUNb(z^ZfbnMM{^zCt9ilC0Tn2-c6kMnS0*~=yALf0?mB0 zH&<7*@_WZKoAeW5t&fnY6fsv;_j$4+wS~ssDN6~{QH8B*9*G{1t#|Q_Onyo(mfi~N zrC|B_V@>2Ny_U{Xf$UUnt}ISXj3cx`E!g8D+fO`C_!G>-p@zdGCCP@TehNv$gt5|Ifs8V4AjVW zp76GDTo~k@|LE1zJhc3I9&AD4N417`nbj~@D@EUW2|RO_-2;VH4{NGM-)f#VC{E;D za@YvO%DYos=4_cF$4^OFmCmcKKL6P8u|*|kMwJQ;y-(*lvJ)FiIKiHz)m-R;)l0jc zihSodQL3{_!zNiC{7&ebrSdxlV>H?;PeCiZLUB5)N}f(IH0M^*IQb=9Ii@dvZB6`57%DTmMcsN*yK*L@j9Co=O4%e! zZ}U`je7K$&A>A7C;p(GQ35!SR?s`)jgBgErcd>A$3$^KS8#~u!7n@UCf`3HmBC_o^ z6$X;jg{vQPhl1pREgPT6{*T!`arsP%J5e`r_7Ty;UIiW9>b0i00{M2ue3?Z=Usd?q zzNzn`3)sswNgHYrnGZs#5ILIcAx4!gqp#HF{^$>K^N*VdXzc#5@HU{d(+FZ`d>BZ9BG9zZR-UX1o0T65576Ky4B4+snPt>2{UROb1p zbgzThJiY_pW4p;P#%f~3i=gA6{mldUcs;h3`1{k*me*bmu1M~uoH6>jq#_2nJzoQl z#6{;ClZ7V!od=OZ?xEC4WFjsq(8;mKO_ZI~4IPD#aCf>znzg!LUFGN^#ewoBRm{^h zim9RF(JDtSF!v8R#c6EQ#!F5y}KwQg1|rIIO8Vl zl1L2X?u;vpR7k2rXR0Z0AGwd%s3eN$r*cYFl(L;Y(vBfDejyL6IXn;Wt_IbArMEZ> zWfi=jqh|N$N+=`MnQwCHTO!=)=XuNo4Is6u&ullG1Xa2jc~DQv?0v4*^%bY67VGC6 z5(2A^d_`dw2$3f=WG#5;r@GJ|M5>BBPXyTWl(dkP2%LHL;vNRxhfjFs#_87F_+&IW z2+zK47sk@}h2{~Te&y$;OekDGc(L+rhcTE`>SWFvPD;BP44DmPJs@0Q%IG(P{Rk^_ zfO9^c9$Je|H2zJIERJWY)xY znddfiiB(L}XKo`$5qB}k5qi|sHG|8o?1xks61w}WizNXoq1?~?&{+~OPzmINU!PM} z9qqto-WNZz)^u%ox}oAS_Lh~{=O~?Mzv{u+KR`7nJ3H!=UiA_05XMStqj?k)?;&)g zn!NE2R#Q?aeJq2}8o*2?S^ns$9Y2Zn?H!+Qc(M4RJg9i(!@MU7l$o&1tNQ$I0TXXu z?X^N+x%lezYaL0!5FDDxG_B_nGxD;Ux1+d9utPIj#&cdECwXp|M}2i>S`;68;|h?d1;CrxtY z=CY7n_R2q#)B#{TY*8oX6Dl3?vW5X-np$v9>XgpyLZ%b^H7-iSeES9uV!8HCVifad z1(9T*3i0iXdGI3o<%=xWR^qwd z76B*lB8%seN4mExqF05*|t|JFNHtRZW28tzQ!!ywD%%}rL?yQzCzwi%bmPcCg zoVfUjZ0okwFIDXxd`?wT^~fd0==2S=Bvw%ws7?9oN>s7WRBjr}8^sANb?HL*H8BLzRDG^Ys@$WpZ~N}hDK6wu7@OV^&*7H+Bf;_p9XU0V-=+mJ z^X}+G%3&*htN5Yohk5_Gd)$T_{&L~C7{_K0cU((+7EK-fk?ScpUp17?M;Q&vbqnPw z%IqI2JamOcVT>2lB|pQj+s9mch*^}mXB!?}ecR}U@}$1m zlCjw}IE4egqMstAK&Xnf^E!vcO4l4`8>`S`DStmfYneB=ei1YnHE4R9(D-{@CnGJj z)j+!Nm!{cG(^#V;)2CwTBWpHvWHcAXIIqxfHunx1_o#1uUczT|qN_JK9L7?k9*BCz z@V(`YZxx7FJP5zM9v~KY%d|sa_vS@0dacU`n1}p6SMCu}4^Hr+Q=lp6@T%pf9>&2NmxE|51^SR4KaX>T1>WgD&i zzKViMixSc;-CYXO4I&NFB8@D8MG4Z~9ZDmy=niREbcb}qqPso!diUPv`}R3wobl~( z{?;*EKF|GJGwwNm*Ch#8^f&C=C{3fvwnf2~^0FMvtKA5~{;~%q+TVsz-4ygpRvmT{ z(?_S&=zqO(Z&D=)hY6<6eQ!{ZTPn1t>I=YZvpa-Xw@K>ge2{Z-(Z;J?y<*^=HBF7@ z4{e2Jg88C?rX{`&=q#Bh#-ZP70 z0>6{$#u=(BK6j2o?nDKBdcmt=C1=brs!^!UMa$r8po@MMxV&`4cLd% z8&w(o=>K`9gH^6-8slzH0uO%SK?6xo6P$WUvZkZyH3=%V(C=RF__ASJ_ta)<$$~?+ zCW8<-L*CkT^!=q2@kn1{ULV;)M6x?ppJ9SU+vRP^sOFzY2Zmz-2q`zjgSw*_bIwff zAV(~(C@7~yEa0rTlcueEv z@@y?C3Zw3DS{1ePaFT}KyqG6XMH5iYyG8PIOYlku8MCJXz>vEcTX>aplEoP(hN+mu z8b5h%*X!{<6LLh?3g(VBX*%um!% zUEro6p_!Kc;rMhT)pwZ*S5{0|O};HwUy_3|%>FHR&Onv-!;pANOUZ!7c1fo@o@a*V z{@GV7dfeFU@B=}|hX=2H3JJ621oe1qHR%3;V>_7F+hib&-#+tedn@cZvE1h0g)jcgEWKQCzge@N71P9(9}d+DyI@Iw?$;e6i? zpFrQIB2GIdJ|^`|23irl{ArpoVFNGHiPmcK5d-2smI`6aAA2%OuohuAykNf~kzh3- zvs{x@HxPCd9OfqJ8Y@CD=VoKQ!wnous=Flh{RJgOq0~(m&Nbo0b1pG*B&L08LyF41E(m5sPx(MZpsN8q2;F!|^$@ zJEvrzvh`p*M|LJC&y3JLT`rW(hYQt1G8;UDTioY&?-C?Mlh@O7UwFKO4R9w}2>m*=bFgxAP)wIHlk|kTtt{>YFdv3`I7yt1i7b>%57N#V}*lIjY zWtcM=Tc&Hf`+la@W@Yx-)~33jV3L~Uj(Qz1@GmpIfk7^c3ObzSGb zD-9$QGb5tgD$&jw3XDFAotuTQ|KprQ|IM-=7so1Zl=o*@wC&XR-62@E!C$5tstU!J zic>q+tsHpZ|C@4zwP^FnKG=54omTNRE4~r(vrs#;fd9fo5qL^TV;ZqUR#K1?E3#`< zcBJ>m(AOi(@pz;%mSy~;4LWTn@#E>kb)j}eH4+sPk}cY z;^D+z2^xN@9NX6C?~gg);|m8`m$6PsKjc#!;&@FQKA@i2KlpGQTlf_-WLehwM5@hk zy=iTT=aeS5y=y-Q=8KZQPf2ghfyP8l}a0AboL z%#S1~w-pblYVnQmHngRc>@l*k$)Ov=QaU8klOa3EmemEK&=%}OJbh&Sn*}?T$pbByw8HTSStqs?giveQ)J7#MfuUvhTBrdt27%xi z)u)kcad;+XMh@1s3vDC%XJhsEI|D8R)OS2DLcF2gl=#iN+N7R#>Y@k-sU3841uWz7 zu78sfx?e^fJFH8bw$H?n_jX2(Gj?5!VBf`6_E8f5g_^14k^1L8so)hC?>{vPa$_#UV0pqwM{iwjEh_m)0@fXyc#*gkw}#ll#Cjv)j@j z#vM_F)Q7b{*}ENMwZjL=hp)Dul!epX)$~}2AajcJj4WN83#8cC$W1@H2-o{9L=X*Ra^hU=8$>VX`Ury_lizxgklo0U`WgS|uLg^sC!5Wfo|t3I*TtA9BlITO z${bIXJx2^^wl64y`t3OBpN+jbU;NIAfdL`4XZ-U=At|qB1XV}M-(RH(fFFfS7h}#y z6lZ_XfmI@{JMSlm$Jb5_=k}8KL5&Ah_ys5A9UfpDz#fZjy`+MT$>Eot!#$M?{@$Mo zTM8p_lN{bF9V4qQ`ozKPL-j4{E_51R zxka<=r9EA55spx-1hubs%f_mPW8`Lj9-xC-Ckp+@CXLwvwsv&$B??Lbafz|=SA{O4!|0no6CUpLbVXzbOf=J7W%^x6qSCuRpglVK z&4hv;@>N_%V058Qi;y03PgXiXck}>a_U1L!T(tbHy|LyCX}dKXf^?Ji;7To1MrzfP z8JEDQd&bBhOPSZoga$pC6|o{;CxS(`Kke>R*SXlyki&7Pn;tw@>dAJmXKCloR+5Cf z5;m8lMw+yILZRCeCrp&P3JwjYVVk$^fH*t>6 z#B|?Y9cNo3g7;CSh}KKmKY^8X3hEnP>eCHWda>Clj4HRg2YUvfi$Q%NIEkodet+b| zrEUr=T4zq-QwyhxD85FwYKtcDpClyb5FMXUfVCea8)k<#l}H{ZVTuzC(PHAW1?Pux zG+7#EDG2_}gaw`YmgbWQ!y3L7MywiBQc6KnS&|926}5RW3Zq4_eg-b~jYi*3I_ZsD z@&R|cTaYLevf=;)ZDyIY+ZcxJIa3!-sE;X}Ru9j*vQLsK z_o~U(aIBU$YUsJ4-Rr6C42JraeLKizeQbjwh}0of9EnR=FbyZSw!XQ4>WH)Gpn5yA z3dFZc`pL2S>@wOZQQWnklkjdR!~-T$9``@+%?{Hde!h}L8ufsaUTH#FZF2^?8($u$ z-~O*4KpTBB+*DIIOM>><_IwvKEYjyEQ_W^8;F^nV!*Yi>l-rN34U`m4L?c7AByRNC-Lv}rt9eR``+W#H0y z%&+O+ZFI4|>xm?kL?ox0M^MM$w%Nh~7sKq@tk}c!#^nce^BRGedV84Ca8}LPA3f1} zFRVNoLkHEFek_knV91q)3Cv2^N1igsyS#9aOF_s6vBH|CQ1xbe5XhPCh2~|Sh;TLt zI^VPtp@*TS($bOoY~W*5%H;=_0=!YN_DJd;X-lt1+&#X$CX;~mHGUND&N8};;@t+( z3r0)40j}5worWhBzIX-+UsqjW1JCwGn1#^LEX1{$G1giG9CirR3p?kE74F2OLM z4V^e>o{ZEsgj#FMQfxbMy(NNRk&fn8WrIO;joIeKb15;Z?kDuw9OLBvJvWE5V7QI} zDGC?}vinKm_3^TvNT$$)V~~bYbO}*$E$2|#<&>3Akm}N4Vhovb8D46ZENM(7s;!*Q zPRHINoi`a)sm|g!C+mz=m}!m!FC{Dg*fy+RmC6*!+6BuIZCzwy2VpGfyD;Mm26q&2 z<;vVcR}Du;G=-PrfFxA(&|k$8q4mzaPOaGrdleS<5x=3Uk)^uzK=CGdY4)9{yxy`0 zd%j#jy%)UEkFz>QEGB+($_=ce4m_EB7%b!U`#CW==&lx6aoHTX#gwBfA#a~hHJh68 zO4_&V=KzPxG!DS&0&j6g$z6MbunMjNd&o^Sa9<8o{7FRkyMCUBfzRj&^b!;ye{=sW zD10*FwJR-2U-4X#Ocp;Kl6eKW{`~8IyL2*lRBY8B#UU+7xG$P=1eJtFl!GKxy#koBZD-0Q}JZE&-)mFECCqE!|A*WoD;QSNSPzUQb!w zQ#nXo{X*z&u2K|K?7NkT(k<8SIJ_52xG8T^Ty~h{hIpwS7eeD4EBtZy=cD}Nz1V7k7viBi_gu>FXvq!IQTF_BC0n)gSI8+#`>KLyFBUH4m4KD?9 zmr~^Gi(q zD>z&43HF-a*C@-)RAFG)=wy?ac__9GE1yDt9R})YOHYIc(4>PaCdMZ(U@Px~MBW*h zs;tkVZu`BmVbPV$AM@ux5%4VhS#mCk$x*Z9Xn<>y^i zfw8ja%j4dUvxH>%G&m-RZO!10806{8q>+z(rYTyx%=#NrPUGlMab~}v-wuT3O766AbBKD2?Yd@gqn+U*^-BZxp{jrahU??{TFLZGpf zJ=H-U2Ff-yRBc|~_LFCrYwzefTm4jsGlbwRtzwsGs~s5N>?Nz9py15zsg+;(VzwF} zOGdUrlb+qn<6jVNn6Oxty5(0fNdI*xDt>n9+wsa-Y2bf;{O`>&<*pw8&4eB{?myf| z&Q0YNMsSU}mdY@>a+!C^*ul?_d4H^G_8JCYANcX5goX$f181MgSuJ<`v>)MM!TWv! zG8!UHSks+f5*ot|JwjQ2*M-kwymcfIE;izCuF;%nRbDm|Yk?}l`t00HbMGqu)9vdJ z!;3Uq*e~g)CvPLRtPi8k3AJhe)|-SV;d!h8(otWit!b2^OHv_k@Ys>@#dFwHS8lkj zrf3alb2`3nB5-}D9}*gKI`@{?8Sz-o^dEn()V^~KwNgX;rV z1@fV;L{hGBs;1a-E0NNEx00T2`-RMiZx(uTKi_*QZ(GGS1i8mw!;Ni|!dZ>hlYiah z_3LJ1=&7GsjOLpV25Ks8OS^NcygSC#SM{>}k3K@@fg+r2W!%(8YV;D&ZWC+X!XWQf zvR|QozM3{oPv;z(=;E2UOBt*v?05auU-w&)yq)DP!z8b86xly8 z0JjCUhKg~ZyQqCp1-b{|rn%CuK}q_uD&ao0oG$llvmP;2U&L_HUp8`-&nB2L+(m4c z8bLtlDTQf7)ZsI?_O``U;djJk^WB8q{+uM zSk$-{{;Fiq?)`x@Xb|)zjU$6S?TO4~kn7h=diUH#bi*Cb2N`}2X2lAlkr!L^lpIPN zc3oD0t9>By6*y9wVLf~@Wo7U(m#1ej>my-)_bFAkM31{=%`khZq9>^xfHqYTT{4=r znC*-g2;^424Lx;e2kn9nGZu{HKLg{c1}k0wBjpT+JYEcpDcl^g9@1sk&~!gFxF?t` zU$-O!Q`OO?-7o3YqV1-q1Rq|LZ=n?x1{u&C)vk=gdvHGayqTjal$ApXr|B< zjTlsqWr{uGgdX?Oe|ex{ZmkmE#ojo!aE_y*QIO!HX-=rsS==HzDnX8&Dpqb?v|-Gp0{2mR_TST^htF9X;hAjHYW{*W zd#ZHU+QTvS%ub)=mWk4AdqScEc)tw`XT9X+t&%{|T0!_tayE}X`#PgqsYX{~eE9oO z{nH#~pPk$3#7vtl;@2bBg#>i_kU&WWe4K3CY~3}r6sMF!H{IhTMXDwN=2by6)IWq( z9JzU?JR;^R#T-&lJhpR#bkGZ2x02Xymy~w>Jfvv+iSeqpYCE^+ybFF*4v{tWig8fg zoajm{A($~XsU3uE8DKc<0MyqJo0K(<+#h2a!Yn$@a3`Aiaz!J6B$O?=B3q?u(i(Kp z^~(nQVB~#6w$gV(g2HiSuMwTe4ED&;>qo>1iur(8fXW4Zn+=zKS+A$qg->d@XtLFn z8L{w#$x4$n!EM>kfH+0aK>uoPvNpd@WzzuE|D8CrQaWB!h5(5?aM+G3i>925{ETG? zw+qc2rAD8@Wu8uG*Om?U*1JWMABoEb#ZvX0tKiwcWm|OwU7XLN;gCSWa4Vzvzz+kNe;2X?8_< zf$2a~WI``be8V0;jMauMN$zUdSVs;*d3+1Q(D+_q0Vk`5XviEZF?z^p5bq@4Y@2=_ zb>pnOmy$N#m`T0?{rLLuwUsI%X>)IrC_bUVu+;ok7RP6KC2nmz^geaxh0N8diPgZA z`hSc2g4Ex&Z-!#(*aRnJkH`JJ`>Bn6-SO|)99e&g85^^OV$afG9nhgz0@Dc2i5x*G z1BUv)6r0=7Fx|_nsh4c?k^2NqD_a*utM6$;pO{%Q61V)yojS0G0DIE7`g^?^rP&y- zD9Y$~j=vdAT;2_KzwZC?TJ$heslUFC=6D__$Nf`AR{Y&%z;Q7~X97l)f+`~sr(b?Q z!C`vFpRn{vbp;u!|mXt=nN_{Byf=`Qgk-cblj%@%?O*?jFB|rdOOhoO%L<`Gb#whqlFlehnoABt_Qs6N)HbQqjckH&^_F;tn1NFTh!eFx0}Q|Tf~ z7Gtqn<-|8DhH$;K2ls07{XK0=2U<(d9)G!3 zGYIAt5*O2o{5%I*g=~WnX(ZpQ?N-;_ozp zJu`2wF|F-oF>Ox&mSiPi-2>wcgGW}99cN1}G0yE6* zQzBk@KpuVz;Hw^|XPKUU6DV4oHLP(fg32b&Gru2aI8dw_(lUNYx^4IAkCOn)EPK?o zUX8MHffW?R^LVY7R2wJ$0t;=YHRzsDK-O!7zy|JW-YGjr5G8VTtUmXGR94qnJ)XLU z*1Aaj!^_?p!Mn_koImHWqWZcAOCcA6RPRXOMtmb@k}(FA`yeF$Es8Mvs;{Psu!8^(`YZRlo`0b?-GQJW@T7qE`TZST*In)baThzV~0pA zneM4RV~R&ay>=_>tHHn2^6a)>qLKH>BrWNDc+`Lti^gAj6(yStC-}t@ac-6oGFGrM zEg75S6^=m-{-(+vw${HRP%m!Yw~X+q&$DCLs*g?5Y0y=9z1L^GOgNHt4tJ;-Cq%; zTN4G@sQ3Mu6LeJJt63c;PkJ+zWHB}*t6wa%`K(ve*R>s!Z*5r||8+$x_vgKeHJBbE zHc1e|Qxzs}lc7HK*34bk*+b$m-imat6Q-Agb@~c<&$wOPFnC?Dl?=`Ch&|=R?}S#} zvX(!ae<$R<05#AzQ&h;S1H_-3V!`lsSnO0iNc=tb5%E(jGPpClFHHa z{Q^t!b8~iV()bZqJ;FU;m_)9YzIZWj3#(iDxFTQ7!b^Y77ovzzb%ITyhn#;7zMcId z@z1}XJ(iQSR{SCL+ijSIn@zq|mmppMm63xzNJ1oGXyiz;EL9Db*X3L`|eABZe zzsMo8PvM_;`-%)uT4L+z7^i4aKRcqDHRs-bqAg{mu#OJ8oG;ZeKEb_vGnVZ<1qkt=1UO&Y$voR)Kuzr;dV&l(#Dkur#c`!$< zh=ZcuMGd{xgrtYv7ktrg8&-qdrp|WZI`2%k3<@^{2_(=dd}7+So|R8g-H<3$waS3F zE;oNpYE@3FPoC|{sUFhV6h!Xqch|#E zWj|-;WF2`Y6a~tNF&)T4=G_OdtFP;5e44ZLJtOZHk;}rUQ*J@$Q)<6-`W_;R;e*`2v_4`rJruWnJ|BeL zM^L(9Ns)i6#bL7gfj6bC7WGcoj*-Bl_wN3fFwPCB#nTZ`5Ar(@k;SC!f1 zqRi)1H7SURH*pW+r?YU zc}cfO>GK&Ol9vwsyv1|FvL1B#>zsvQk853G(=E|La2HHO;(0*l>rw5mUpu!M#SqW6 zY9A|=s4o4zh!nYjP4n;X20bd%s3(f_bgo8VegUi5~5eopv-cdd9>OtF1q!wH4?w zz)YOMCdWox=1w+JZ8CJw!#mgJ(PD$J7-N73n@{l zr(NXy;*AJ8pDH~DF|$XA3Y^(B<%pRwcDkXm!M7PqZBtdx!DN?OH^~=;WkcpgrzF^H zjmqRmoP&tjYEFUyoOg{P(2{ArU)ETB1`Cmu9k6=Y<~-h>IBuB%@%Y7M7qqQ#koj7o>!K|3rPtFKr)+i_)rEQ{X<{>Jn2%AvioK+Y(}+Q;&<{ zPy=AJ8)nh=ko24#C9So%>@ZI8`DJA<)xn$TMoBxRCI9jv_b|~Y&qvY1Bc%x1 zLnbu~lBteah0m_ifB!=RQA2a)&R!khWXRxw^#3R@9a}TaA_i+-?0p*I8J1(Qpr?|} zYVs>&!UTp-wZvjQ{m#&vP#^n4@tEe=@r%|4LBh1fGWE+W*H+&qGfdtRnJ}ejE#mao zedrW1yDjPtmL!)2(ar6J9Q^tV;Rm>#2Y2cC;&ifqICV~%j6U8NbUdjh@UP%%?YIbf zkZy@aU*V;d4`Gi>NE7Ww>Qc|hNc7*P^C&GL{2E8IB5r(6jE*<(((DwbFOXB?-Hy4m zt4RFwiBmm2uUb?~gBEIQmI9N%ZwJR9f^BXhUg5=szWZl(iq+dv7^Wm`!l`9X5lm|l zUEdp5J=lx}b5Gou1HBo{{OiM-A(PMDj7AVm28(adM2~-@{p?2wZ|a8@WuMSPz@RBa zMd?$P7Ir4HFM_jabOPbAGmEjDw_DSrg`=kpqfaZkVCNk{{A`TCMNyx;vpjHG_^_;U z+ExX?1ZGm~CaH9pEPOlI0e&1F!z8`Lu$w-akgWe_7r)VeDB@2whdV&8v1Ot2G`n?v4n? za>SH1i4^a92Fc(R#^)O{o@{vxt2Z-Oxub%VzD~Oq@KA<*HE7rr)$?U&%kY~Wwi~gt z0r)#tgSCjwWkT*MyAhNAOcw?M92S#ISwRr4zu<=lXMeuYt1)B_Zh?Ohp^t}VjWJxf z#}YgvHiF`+&+QZrCV+Ruo@CYQqIiRO(Ej)n7`)NJpwXMf5@8!5CRg0Ta)sp&s!2ai znQnZ1cPeh4_~EW0kT8F~V?PA$_E}%TIZAM^;xdjy**cWfw#{m1S`4xg2;KyuBX&8NIX`MIG>n9T>7X-JoYV$lGOmz-<27wP;_LOpNR6#eu+ubH3~-qmvN zHo~>Ka7G@L49iKvU1-eG!)zJ8=*y-we;^V|U_eOPkT~%2oWr<=O8zae%1m~&^G5Fm zfwy8drELRw)p?!b)!(~(HS?Xx3A)+Z@pEkiS@rg{^N0%-a-6gI*?v9SSUvtc;^+$_ z2Rr0WK*4!_F;uzzbQH}grhf0yS$z->N3rgNYsh6W$m`}SB5Tj8io?< zw9aS4jVcKZ3Pn4Vo+o2kd2_i;aD*++?I2znV^v$NVV?flvyO8LSC;hOVRg$ueY12~ z=tx;`EG_EJk$;!wzJTI40SMa&e(8~JrPLHtD&v7VJhcBJ_|%k5oP7)Jr)B`#QI7`R ziiv3sU7wXT1~atuU^JV%j5nryu`QsuUNPROtE8;%E4*Hee{x-gI|>cbr+~L=A+m^= zTsVw!SJ>HMHQJ7N+17}!ggIma34?eKEAR4=V@5ZFw-*R|?%z~S!z%=Z5>%YtW@Gepkw2UySdHx01xV2i z1wV~!@0bh7-i%ztzQm`>=|}L>8o=d7P`^3(I)!tL&`p`(&+#j9@g^=4p|4)cL=De5 zy#d@qnB#wQ!>N#qPSJf^S^V9qCFyN(!#NQl7&~_ zIVwM&6DoTnafR(CTl5jHQ2yu5wB0N_Z?cIX4sQ%(wsJT)(cqSaid^3F^m9P|muRe# zHCZe@GAQg+J6$QrzRUVV^!X}gdNQxC%ZQTy1IIPfB{o9avpm&&iTTzxp zf0b!9@={)Zly&Fyx%n<3Rhzd=_G;v1o!X$*kA1rOqLINVTzp%TGg8t|t0pwCSWk{B zkk(P3;g|bw(PpXtEP-Gj8y3?=n+J!$lfn(dM$sbpYQ@>>tB ztoWuXWlz7{$t-&M?0dNzaMWCXNBlIz>eG2fDUIPyc+L|OoI5IybUMy$Cr4$nkAATJ z6}v>k(*QGH4qZYZEXC3TolBR4aczJ?Ao*o=0BJ2A@f4G~S@%ch+tmHs*p zkv!6LW$k#ggF&9(GcA!v8toXx&O3$v3{*-zkGCJ(2v1rx={q@Z}7RzzmxuyWxuKR=BM(>iAy)Y4 z%({mdZ1iGGy!7{bhB!~GM-@dsq2wHOg-j0YtT zq^f|IwaTNU0gKTkr^HT%-;w3JkS)ZJZoT}EMnp!67G73(E?C5ds>7h^gNdb|nhd+u z^U^*!Dk?Y?2-y_~h0D)W;H0%%%^Wk{pQ0tI8MI>ycNXhuf5EBQ-R^*kCU#5~(_(m- zDXX=ut_GDo-%xnYrl}-F9BU89a;sggm*n4!DNCGe_#@)hOjTgw_0$3^SO*D#eU|R& zh0pyF!tKu!mI5Y@s7A=O1Kmp2(5%$VBsV&qqmo0_Xd*oSJOikQcD}J z)pmtyEyH~ad(~|81!=^5B28*h@5)tV3&5BpiQ&y(2AqxzPwUL$wdrnB9J+|Q708uw z9Dxs9!ig_#&5{qg`4!S)vTi#!r9>hCX;6KY^-uYx=SW*IK;n_w!i6cY-Sg5rw-vP- zklaBQU`?Uzy6rrBNA(WBhQ6a#+ahhE7$CWkdrwG68+6n3yTZ^l;Ydq>ID!EjQDcX&Fn&G&?cT-V4 zW7kuUM8OPj_A%=>T|W(IXIUYjN+n-&hAbjz%_hCp4;uYgrLVMImi zws!eoX@)Xixdyn^b5Ek>S+|iqkbHx?*9RK2oB3)_u~W9j5aT@Ccp@BKNXm?MOKfB) z+~BTFby9R#dV~$QcRTvI8VDRllYKHIl6Nh4jQ13K;DpXnsUo|$p2jngmk?3b!dk%Q z$suAjq^V<>)8=lK85UE$$d+3V_KJHsst+gS%Y{usdoOE>T$thq*-Xs_?;e((ptG7a zVa?4`TuaVA*+E^EK2B{m1>YCT>fUF zBW#v(&Rt#rsqa=o_eH7eiD&Cr8hGVbC)1de}ROxWi5TQ5m9V)eML*46lgZa9wtR_xv`^heJ@*` zh+oT!9bb*L&Y6F$y|iLaUy5}-0`2%4;!ul5h=DX&IWHG5jY`0B1h|kd>7j{r7OZN{ z`|kzY{t5tRa-KAL?zM$3&VAug+-9Oa*K79Q9rdxhd7PU+QY%`tKODj?s6|>?{#+!1 z=3O5u$ItnZ59MOr3#@ITY&Y3~efRLB9W5 z!yauzQXFrN@ViOK?UM-~-ia?wwcJ;oOs*d=pN_Hti6gtm;5ry(c^Q~ndK z9SSIPg#7o%P6}!r1oB$YH=^fd5gON@EfrpnjLKXxzHOdv3&xA@R)ACgBS9AO4M0Qv z+z`5p$!=?f8ZyuBd$xCz#5mhLfKANMB6c69;mV~6>pZ{`9rf!_K+k3$mDckz*J&z7 zAZdQoc^@Bdg6;L)M4jxS+->{1>SIy8E2CK_>5o(w+lrMAput%--rvpOJ`j9q0#Dp%FahM9O^ZoPq4nf3Fk$eUiPN<66 zity`W)}F%pEga7LP!Sh}^06&2kEokyF1uCH_o8?w^(NQwqxWMeUS)-&W$AC~629)i zQIS?9U@yWDYMLU(DauvU=6R1WxTv4wxw-9^&aX+WZD;WQe!rCLP@|F zeUeEpC$`BU_Q~JG0MFZ_?urN1?|Rq#*tQh@i=FEo-7Wg$%|Ex1w?afCf++nR0U>qV1F(tMO`#NrKe0V}j4&g|K2Xd%T+e>T7EL5BZ@rnZ z$k;>1elp6lzdUYDNS69u;AP@5V9*uqd1XSm=jcBV>ekage<+2KDpO};$@55x1ZnNO zfMkjK6->LI3{(u1U2UYmh6gANK+NPq2GP$UP*~HsBhrWH57&*+e}SzPUOY1Oh87lA zJJP2)spXg=*uEbHR*_besyr3{V*MoaDfm~4uupKMNtTfYac%!#32sqbOu#%R2Drx* z{A_pq{+k8R8_1)bA~A{FL_x^sv!44+6L3>%ZdA+*&D!Gt&&uGb*@w07Pz9Cf0-us= ze@SJhPs99vs7gaFKN%PIL8sTIdd3{mq^#xTf8#p{yWec?{x)Bf&XupH{V*H z;y)Cad~h;p?e9>;j{is#P?vrxP<2NjZ&daHN9haF!<<(;2|$_r)il-N|AnR|M1RCD z9&wM$q-Bg*Yu$*nej*L|7Wx3m)V0R=_SgsyfUgNzAF(#!npe*{7mp8-%EYVV&g|O0SI>|B(%=#??&r? zy_x?f`})7W`v?DvL{2#Lgpk8tc(X)Vw{XI7NbIkBJRF%+BI*CD!kzKL5+B{EkZ#C; zw=ybE8kFL;UpJ2cEWGed4YGg1*Z_?@(AW>YpYWC-s_c|gYH&Wg-^o_zR8Tzxmb^J` zfZ3LCbq&~nOABWu&Kel4so2lGgXX$0pI?*(-T*F=b0Mr5ia$Y{ptpi#ESCjNuKXYT z^8fp9wP+5h^oK}5xUN~8#vDjUS43}@u^Yfa&Q}XA?tkx7 z$k9F7#e=#&jCKkzcXMgrA?_m)eGNc9;?V~d9F1YpMJ7-}is7J&ZR?OyIu9xj%AqQw=?Iau7vN3a{&T}I_jY*A{ zK(NI|GX2V^naLC?p??8(Pl#G3`Wzq|hmnQG96mhb{*=CEBiYLQKv`h@MNKEZ?u&ZT zcmkwNy;Sns3}nHAgf9}}Pe_SlAG-q3bHCH$KIg_&(zsKyPEK_OZ`|(h`?^w9VM0J( zxgPESHQQ)D3bT6`StZT!$QD!)p^beFz@%-#2A~Q#EYAD?2WL0Y9(i_CEkK07$!G=C zw>-D=8H;%Ue%l!YOz3;*>5mv$MQ-2OL-kaWPt^kcwUDd@hLJS#7aJ*d>9{(Jw;OyG zEg%V45FcIYd7vS_9P|bQygIKf0GsT_z${vLYs||K@jIBQBr&Rr;*mH;5hGp{A^%ph z+511W%3jxFGyFcQ%NZb1|0QW7-mpnc*wttU6302i`(pxn@;Zf2D$Ga!=X!ughCIqA zR4M_4PlA^JQ*(dlelE!A5)j|+CFXuWMf({1m*ncD=j|9d4kgI1BOBJD&H~5D_O$jXEi<6Lal(_?>+1Q6D2^xN%ZJbaUdL zw-+o~vt2|S>>rZhA^!bIzw<1Nxe;g$)yU8<=6S~C4EXf50F2y=L}o*XZ7QSO0JoE*yO0ve z`YflGX#a<`<<=4D7U3T;f857*AbRMH?Ies%4hRN+d)t~pW;71~kBOaifQ?xP#D2!6 ztP|fB1a9;u?&}+H4DFHJW&8^;X0x+!UR@&)?rHknV?>f5tSipv288DAf1y>&07t(t zBho!pMpSI(Ph@Tn1SijP4G9)b{ZtNqZavU=9wEU2v;G#vPB zL0e^3W@S>#1oI*)!o7o~VYhttsM3tN;KsRd{(;$W&?HL(bw$5nP5)9T)6KZU+2@$qIAciy*$yg0`Y!e@GkBQ0QR+0 zP5TgA@a=-i3RH7nft*EDY=RINzFIq#_^lNTer=TggXa3E2J{n>0Ay-x4k%@ut1D3P zESoM_`n0t``WHpfZ{O_DDvMs3;tzrLeJrVLAIHxn42~1LjLUay2XTuCUeuxSi4eAC zesl|9J<_vNfOUFz;ML+b)v%C5zJ8(M^cVc(JwV@2jx!2(%(+9*B6E+MbvCagfj^%M zivk*Yc}n%!Ik0QcneV_Swo&OK56amOVTq!=hG$?_2|8Jy|f8V zsOT~jU!c5zLE7(zRFwB^EjKZ-LKmmgkP-n z#*j*T24tRi#NNcS>zjG|AzWFW#(EF%b80#hB~3^V-Ei@48JW%>cKEV zb>H!9b+Smcpon8mDS@j)M=(LMb&Twvo#Q{`PQTMlUpsE4`q0ZKETIjuw1 ztMYoMumS1VGoZYSwoe$MmSI`ZcbZpbznknHD2e3QcUGua8%NqHhz` z(4XK}xbY9uR9dGJtn+JXLvsjDsi6 zD^ewuDHa(XU-w17uF7U6dBLZEjOc1Piv)3OG!1kFm7@ZJF z_G~*h-3`x|ZnE2!QSn2oPmAPKb7J|tsy4fsd5cflGd0bB2n+&jxg$CaU2?FWNhTy6 zlC~&=OHSeci?X+lsv`^Z&w0%&Vo+xm%F21`uejSw>|uzqEj?Vw z#mjb?Bn`Qx%a>gsX--@~dffzu@QYs{fc2N=x^5TSs9|V#+eC{4<--v3j(<||4}#w7 z;~-mNJ~NO8SD7HQPg@b3Uk16;trEf1`rZ6m_Fkm2UfSquFf7}oM7>=VDCAL+zYsko z64P0RZ$>tD8qiE+%ZQkBd`S!Kd05tF%U(nN&DCCykw}si?woCtGP3OT_LzkJi+jjC z<_6+|WP+$VfNe;$8v5n0pS3Fz5Dio^l>RKta@Jh(!GNP)T33GprcQR1Q9kp1Pu0Nz zNSeZ1RJg>Ot?)bPh16a@JWx3_uq9LR{xWTI614%;2iX8DSgj|@QMT|`;~-h!x7h9Z z&{{_mMZqpS)*~Rld)RfO3AaaOHa zF!6P~Uiu-tyX^Z)8!LKqrSFdY?CK$ijTt~im|ZGA^Ve<$jqs3Hf(j*zAX^Q2*!U0# z`-WgI0b|P_<$ELTmj0#k^7s>*GF1ff?UsEHO5-(Bst$vDCX4Q2XD1+n6f(Y5A7bhd zNk^K~UQQvVF9>JHN2s@K0MNoR|E0Z>l0TjC7Jn-b8NK|>HR4-*$yLfbRE|vzf5;p4 zp!}*%J{AknsQUE%&uBrx9y0xZ`SKpev&AgL_>&W8ZgH|6-c%S`FJqcu zr8P6?{9gA#Io)q3vr2f>hU-D8SK4Jqjn)8T&ODyG&^N`^VBD3?kuK##1_owgo29>C ziHO{7V6f~2P-JHxh*ikJBlM({=I=k+|#m(kXJO%g1pX%TF(FH06#* z`T=^>PEKW6kuKn@hHmrFeMWroUr00r1fs2ZYl%Z4k`e|nMfc$Stj z3j>v%(DvQCKs-ReZxR{JdbCRJ`DM0nsXYG@gevTbWOI|Ae3d>#5KA1bi~bSC$0+qn zqUSVYJ`d40D3j`yfuKjsfOSma3f5;wmFt61PSxI5J}lxQ6hg5Wk*_=o71IethST5l z5?3GEik?573_WcY`C~hFnIe*lJF8{4^uabqv)5m^iQ6fUDv5-hCp(VpSYzCR&I%hl z%-r<(LOUQ{)fcZZOfRm1etzKu{B@7$$b<8)5s=hT6`uOSHV9+$PFS!m3yz!RKbbyR z?^S($IcjrH@UC7M)}QCF2YuKHkwMrSY!CTctlX$Pm5I^lIpweZpmR4%tj3+r8)z1r zj5_nbW8TjDZW8oKkO(j1AkX?{uJ>W8j8lA?)am&xCYsI`^nxoi44xpGrajQC`_pzQ~c}fa4S~w+`7L_L$j}LjxI+Bzd`TayDTyV|ovT zW}%{INe?aP;A&VihIlxEPLlVMPK`2{&|f7`dGr`gY=+U^Erxf1lq=6x=S@j%3$ut_ zDXOCOSN`&Noe0B?0opC@N`=-B!)xWPtGmazGE$v*i^eb91*tp?v|GnJMDSv?D>yvD z{U33J`+!Dt(BqJlb;b1!Zn%H`L_;kz%Ry+AZ+~1)hEtAZi6k9=G+!_7EnMZ$TQdZL&kdJ}V0XbMBfn^Pq|2xD>$u-;ay{yORdM6YknN z*pR2|7XoH!sliQH<2JrjfWx~G%42pWEpLanGN;P=XrT4U3-*@^lyS z({(HP45wB((s}VNv~Pd=7dLd{J%f1iyjT`io_~*gNDNU^yl4mGOr4MdJCEa?b6RbM z=(9)-?_9HgAGe0{qgTIJG8_^WIc3En0L-o zZ9qCfvKh$|db=MgKG;1b@PT|iR&%0KaZY+M`D$c>MafGiW|IJozP?As@Gf4tc#QV~ zk&^SAjdqy7a>I}|!e2PLTLLGFu(mGdwdxG#aUecpLH6rpSahj)yND-Qf-x#z`-*x= z*p7!OrbO3Kl5{jT{GSxo5ilN`p)voZq(+rmRG#Am>CY7S!G@JU#MxXj9HH%(g1rETrzV8fg zey8)06LJj3eMj$M-nNAyekS?+Y!tyDM=X$s|9Q=v+7v)ze?uOIaqjei55rj+6wSz6Y2S>>xjfw5VOhX#FW zZLGb;cIpjbXhb%T>b~-yeRScU_VycnXf2qd4_nKJVa;=v#(G`(L(u=ei}-)R!T$mB ze0q;X7~p3wAa@+lJHVefD8OJPO>}nzP1#^n9X3kOc1 z_b~(WG{G||Ln<^HI@TrYlR`90e2NSFU<-7I~_L|>6yR3o&&?G0af>-2o^T~ z*jn7V3i<`a=e_ znW#J-A9U<@VaxtV$OcGC(RxKWn#CO6h+KZ6PMIY`g7~!5uO2ef%q7XDk(Gk=_ONeE z#}$U+@$4G=s)>8_A`{8Na0&Tx=m`94)xsqFtNON~#YXvM*#oFrKbF-OmCH+3xO6EtJ&mn}Mv$p|2R)43}Ef+pAw zwgBk+^GD(?eX76oxOfb5H#9EP^m2OA;DZ{&dEHXXvo|u~+R9$E2Dm#eMvI+nglK#R zxB+@)MiNwaa=^#rX#S*3^NrxW2sD9}4l0?=I7y(+W_{~`>Rp?8Na6>HjJ7wc<^MEu${&rNNs-Eq+VXFpcMnM03TcE|kZVo|TlhRpg(x z%@;AkWPy&HWrC0zM->ehXY6mc1opObmcZ|q}nxQko(&M=;>r{Z>gShn zIE6DgRXZfI0y7R@X8pinf_zn+rk5uRk2Cw02vY#9LCLOENGQQuaLvCIlo%!6iqU&8 zc2A<3PX2-x^dVFNF>I>wTA=4LJbcPzt2<&(0M@dj4O*T0E#;#q;S6|s<(||Q9|@f6 zR+On%?_f~Hr7p3A`(K2)d<#{^x@R{Xgv&ogH`y{HsEH^oPwbUnpDv%Hy!*d@Y{Yi} zX3Vl$6W28X@KjrQRncQD6{!Cy*H_QL#VB?B6} zhj1BUud4U?R&mYtq86udhWC$ZWcu_3|L}w<5}=jbn4de8$M(K}+3_68NYW29OJ?~x(Cw17)Y^M^-TzLNGYVB3?nper!& zBC|K8XI8xKF$T|N+B>`@Yz!5!yX@3oPiuZLa5eFAkzuPwI8$qG!=7nJ>O*j5ko>R2 zwMOi|xzWl|Kh$o@pic4hGhCvaONVo-pDxUQsoeQ#mN28zU?}-n{o`T*x35t0#15N+G039j*9J^22|v<^FG5LWxo< zaF{gcGUDPWX<%o6VuqL3W#|=x;9mhsDz`|Vp6Nrj!ESP6q>8AMhlEUYPX|#XbQYmmhY)IK>MciR6E-^>2A@u|Jr!T*o+e{ zON<7Lna8Acy7E%Y$PC)ReN)huu_EVa&M`i%8+P>3Pa1X&<`v^&bU93=>|mWcd{2@R zt131ns%r!sr+Kg0yekzN=8zFysFQR9+O4|d2J(&AnjeJQ0fvL$!62U90bmBdit)a0 zz0J6wE})h`yJYqT=71k>k}C}!(&Febxv|urVk3oT8{4Vtmg7Ph;OQ zB}f&%qILp@SGnY_;V5Ecd<^;xM#MiEM=pOQjQ5R!Kz%=!PhltZk{n8qSLB2tRBwDm z?FuJ!+>_FXW0BK*L$@v&=l`_n_U3Z!&wh;e9M{M)-Qwbu>ILRB+f6n|-;F8O^q4$X z$s8kD2JuV-KXGGxwyWjjX4eC0JH1dYG}9W>p8MSiPp+eDkEril?MV^?{mkce*I`Kv z64~(+5ugojM#U}F2D6KkGlq%LA2^R)k}6mNVyL5$;w5B<{~XAMihn9~JU)-SF9FGt zDo$*Eba^ImDvtc#xYA7+vqVD0G*~q#KkE_Ze>T3u)uLg}NjKS~1i^76#nNhiY^d>T9homhCqOqE$ ze9+COv(Q4#9F-HtRCc_aE&TWb9_An9uiZ49b2Twh16~gKOsjs#pPn%PybSo{)&p>R%)8zOhZbmyNP&Hg9PP?ZYk~-_lDEk6yP>*vKE~qwo|6JRUZtT3*cbjb= zS-G1%0g=t`2N)LQh7wX})h=br~|FPSH7 zBA7@#sh^PcoB+I?4R4zI^aFX=N`)Rx97`7@L2@HTOm8p|_ic2;tmb_6L*DtIcR%l8 zD^wcwec@=W`(`2~8Z}PV^+!l9)k5fVlmw>#EafT!gtSeLx5Qc7T9#l=d}L~S9sG4% z$rdAMpXsJCfOXaZiI-5!!#R)}J|*%UwOTMdYwn1M zU+ICSkks8DCy@SjSeq-$U{zFXL~@Ru6PN+_1=wh4SGoy_tvlmeM575^ETp&Xv73bgY>mzO#Zl4@w`9UIc}4 z91J9%pl>1cdT>{k6IFK=uX;^KFo=X9<>ETV%K!UI0sPP39hff=JB4; zs)$0*cR)690*l@^fa$7RsCEk|9*wAlg0lEE(kYs6!Ue_ZWxl*`4TE5NOOjbe*{pyv&SCmDXreepkNxi&=l;Hdq?uLTgKsP{NeYgS1% zkIL(={}JhM4eO1b{8}u%JNT|^oEbu@FWouHYZEd6Mw}eGI8!}gY`++*>~ZtWey#7B z6Dr_6lCwU|5O|}_0S-n{R&)S?DY`gzBYI%*?>y6U;Bp9qt($Z&|tBA52BAAH|qxfHg z*gjX($`Z9nU(sHu%`lr;1Qo&|ne204rA1@qWr|9CdALYHMj{nquO;k@Wl3xfO(Q zN5t|sYM)6-hJ0mzjkuRSUpta*lyyuvb^z|sh9nxW2`D-$PymG178DPdZOi6!P#WVu zm`*>M1IFvr(~qQ?^uyTxZG&^o4}G#;~| znRAhZBKp{0(xS)a%g>Xe&%VHrHe;Zx0>O68#43WhZ#0BrvMptlS!nL&Pc`Uy<-A=P z({T6);4}GM%TPeuK;YR&_A%~*)7%XBJO%uusFxoMao=D4e}A+K=z5;TbeC}?vZ}tp zhEz!y@2xrq0XAWKPyZz*nV{y}{~Dk`HH{%Ch7O)DX?nmCu0Lz`+b&e7r+y6}VL3iK zaci}zDny{ZpZ^^?Gv9%~BpTTM!G01&%Okfy5v$?pmL>n>{rDgI3vCq6s0|-%`v>6U z|44d#3Pn>Nv zRn3w!YV8RKVHus=&oNs8to1kl6i7r8Gk}(+mu+GsY!#qc?jUBfBm=~+QXNmeykeAq zCGLHr0`$WhVh47>9`(n&hd#$82G%>&!UtxkoZgGg^#ODmgG*gnxMXVj#fJ=4@R1h> zd<7R=s*ueO_-r4XfL!~YdyexKSN?~{B_(G=Icn2$yU);j5d`F8e4fwq|I-V=s6n3A z$LarkbHB>Sn#~fReFA)t_`&{{%Zrg*yQKOr<119~#jjihybhNKzXJ~QI;pYyh}3Dz zZgVLJl5U?oxS897HM9VN#ItPtfj}S__)!kNm@Yg3`4WX9vc4W~2TP^D>4%qXsN=*K zBgo*VyRr(T1+T$2U1rq^u#W&+L%?{~7wjtCQ7Fwx%lNC6p`G3)zpEu$6xVU!V;Zkr zLI$XYj{>ba?~}QsvQy?*#SXAeBTcXJs$CG$5&$B}z64~zt$Dx-XaOx!gH1w7EwLyX z$m1xlU`7|MXrq0%-|1zD2w2IOM;1YqxptmXdZ74shOqM$1n;hT%wYc|+uHp8#{3?v z+A!w^FwCv*eIAofO}1wV>=Yp*X#qAOYAaQ1K1oEa!wO zD~(YNoxaSScoSscB3Z$BTl?bnKB?YUPC#LGIv&^E6YAI^eX`fv>>Tek0!HoM#jz~i z!GF0yH}BU>n)oyU<4l*vhg9Z{z0DQTPqh?Eq+9c5Q6Sy<^xZq$3+xQ9m}G6UDdpbd z6N3A?2x2tB)Kad*PvaH+*FcOUmUr5ux*YE5BYi$zlai$LTy^g#l3yU1c@8)&eCW9V zzZ9?G011hn9Fe)0z{Hk5k>^Mqt=n9!v#-dgH$@`<@11p5dmu(gW+@E%TJ*i)X*zqj?vdqZlhR6JTSs zFn+DuewaL4KG8{x)^b6K99X#C^=z8Y#vCvDff^-Iw3>QPD?bsy!Qt#9>pIPVIhr&8 zS(`{qJ=sZq-NID_-{i4Hv1Ff?*ek=K zCf$O7y?WTZs_@(L!wc2kA4~I~RsD}w%IDT?h>NO01dHMFDDwO|*5xY{*y;(qmr?1b&BThhdVQkHm zT~wL$xhgZbqXSvURd*#Psw_80I?qwrItn_vF9smher5)FP|(5ec@esh^vV~g@g3c0 zH&t3z${X#S3->Ta8p3Y0R}F~EeV=6c`?|bE;ZrN${hm&SlCxOHX``&aye#sj(>GDa ztlSeURJo%|ASBAt1>t8E)`du+po9l(dF_w3%!rv;jHx8#GPr)KK9p(rkmqtCs91>zeY zbi_OCsC!(##(vkwhVzf#n3LaVB~JiFGWD^NU|G5;z?0N(ZhDOuClKFQ*hoKcB zwDPeG`DHL^JU6Dr^mpa0Lp+FYqcAWC~vPQp~k8%Prf?p%Ihwe9{CiaS;i~fFiUQzXdXnyas$Zo%^^R z+^3D$I6F(j=FUYb9O8{yut7&X$pvmcy+|kWrtV*0zYLU4r~5C>#os#>GbzTWPs06g zY|C^S#M||@m~U*$NmJO*N}K-@fM6T6jZUdX`oBBxS!7NWWr;y;C!UP~4HyQ~uRxF_ zE}ZjTGbV#HJ&qDuMf|G-{x#`M9?yxlYX4ZBH@zy3X&?i74Lxh59cm_l|JCBJW#Hxf z7o13vw@yfpp(NL3bO=`cDrX*U9WKtoPuXz;z`@-JXK_yS7!69@(6^&Q+&}q#5}BHK ztt4~y`Vq*`fIUv;6~>*2!;bN zc!{B%btiHmtG<-J?&5`iA1L0G6mxw!YNc6A!56hQ;Q8d40buqdvw{cagl~T42NfT7 zW7@U4>GS2Xpi5!y5%=MFE&P}L)ZSafvqMiX?f7AkzpI77nl@ueC=ff<2%2*r%)neK zz*i^-_i%*=z+>w{(_OtC$XP~i3Qk=4^~ns^snS&21V>jD#(@v{#5G2|Q{>P8)@TA* z#nas|{})NlG1o|L-z>KvSEL*ea zqplOVJsh{|9%YeZ$l?H!I`vUA%E&ev&kSv1tp9TArG#cipk=H_lA$I1<+DTQ|Xt*Vu00R&{$YuDTC0>*%H`V z;_Jk#ZyK@~TpBUcX1uY7e=Q<(tSkAi&wP3s9|7!{N$-L#?3L04$p+75R%rKdgSeP|5J;cs{9=1f)R1yCL8f(nqsw0{V}w|6=3)QGVnl+LFfFF)S^~Cg1yEIY zAmS)HinK;`mY*|e#N%DX&-+yKZmtegoB%B~tUXfn-G9hFoYu%{XMuKw+KtF}wU7RX z;a8V=#mpm9H?Wm3{DIO`@ABlFNBJCOEKnUM9uBMW@=#+1?(YFcq==yNlj2Hyk``9THpnrL@0qMwPfc1m@r7`yfZ;=4R$k_)*hjGfH6K1jyavDqnnVSByy! zKxMO(VhZzG6X?xq_^{Zj|Lzo!kLUjG3X1b%UNvmj>LuiHX9^w!Gbar{^O5Bm7 zX5;OXc%^@Z>J0}C|9t67WNqoj7Z>qS{gsPD0fv$j3o$|D)=Q-G|9lm?pn#z}0}5+`9fS9hLU z?r<9e>CKRm3-u&um5A1$kUKrapZR)D%M_I>WmfHk1F( z)R*n_6sa_rFRdSnqPM*yM9$iM$}%d${nQcHZVzxaPoxzTmT!^Ya*mdch z8Zd-xzkPy#mDh3Uce{6MCEl6JEz{x(kQJ*XfkV)03+xKvCGIa9)JHQ`ZeZK0g09}t z6U>x0u8A~<;m|noyM5{=bx*|-4)`oz_RGY-9dP|&m@?2G(cly(l>Y+i^zdL}wdMe) zqiy`2SElK;^^1Ayfo_)m!H<`+3X$yzv{4&zdDU@mOpmkc6K{fHSU7|i+`W02d!%4h zx81%eez64ztz1W|oV3=KUEJncJBI6*Y`XJZ(Mzxtee1q{1W;ntK%edyl{re0IoEO6 zr%JV@IllvvS4>))jtqyK(vwK_;KyPwfwSo#RUquR?8}dSm;H>SU*u+}F%M8sv?um@ z1FWwG8{_$7nwTvaM(wDQRY~Y!i|I6121>*9hC)5ty*h=g3m^B z2PB^SAU2!{kQMNQIPqgF-FH)1RcBCM|Is>NSlPc?7`0Q-_HujBD2f+q!|^CmK1aR< zBPNNpcN+zqO^726mgL#@0s@T&#_Cw6ZNg*E=TUkM2eGSQu6Oj^{fRkLRx>T#V-(_B)#7&MDhfh!06=!Gi?*X!Q@qU)QoAL`ekz{eg)#Ls#_~t! zg`@zmvb2Bg$WqukIbv+7?&o<)>yt zom^+SiZy0G%`xGfX3x!EpbP^d6kWRt6LilJU$q^cBkk4l!Tj_gUE?9*Yt7Y?vgj(7 zZ${}t%u904(ZYrT^t5_hx#9&2rhpU7)W1Egl}jB^IE=`Ab#mP zM``?RVq^rDD*Y28s5xGn_7#gyK*RU|3@l4dYH=U^;DgU%PWoq@4nX4Q&d zX>IJ>V(z4g^Un?9<3*1e{(ov*{|0p=ObPcRZI-0YA|}w1({(Bn={iPo{0Lw!KrdGfXN{+uf!@h8;P>b7j2fX+>3h1ucv#Fn~z^(U0%g_)9iS5!voAqW_=m~`7 z_bOTLgKPf`VE+I17`rqWCgMsjepeq!mzj2-_{F@ENu;6gF}6lJa}wVD2Q+3DrdK{v zGXABvVykDos_zHqBE@}~Iic~*0nFs>H$P#^*_G&`;jF7|XtW26x?QL6fZ~NQkyGrL za=?*gF8Q+X)#*GKah0_oxIKJl_NwUl?sNCmO|qCH&ma<%W!5>ntJmQx$oh8rL=KmX zkHW9!369K~Jv*MS&qs+`Ox#CgsXNvEkYuH~;vIU)zISH+jELLj4Q_)Pq@KmTmVK7l zfjtma|B!eS((yU;U7Mk-2q84~OR;a_zTjP%%`*7sN)&?!_ORgW&!&L07WW3F{MhxUxCI(w?k;+Fk}1N$Cb=vM&$TR9lX(wIilf@ zCxza;B$LzGyx8t@qc#v2`84_2{^DC|9;1ZEMeQQYl3CR=O|E(Y2r;WpvKK-TULi1{ z=VQds*QbA?7nb{v&-!)SM-PLht~|+g%F1)2cPAzY(2uzuezx>L)lQI-{j=^?m=sO) z*W(CcMoE!K7Lkmk!SJ^D2uqG`bI(x}zJ6|zk)bum{!{PAsBpii4s^wM zq%~LwW2f{CtMd1}V`i$s{LI&VaWJr;>X7aQN$+O%vc6#Us=Nqc)*8LZVav>axHC;y z`i<1B8}N$#P7|LeIqwcw(o_{?cJVOUidy<$kfFf2=iyHF7w~6dirRm?lG^%_Gpz4v z6!89#wA_fV@@b^@Bz!*>=hfBK!qm*|7F46* zT*xl@@KKEEHA4CPnJY03>$XCBQwBpM%v*VoxORqQ=Rk?pZSWDiN@32!Z!~^dOt&XM zl&DW`!gK)hh~!`i>kG9>fhg_3f!pRg^-;|_-xN50v*|<8?N|Q>Pkz;-d-^1n$lA;Z zsas3G(~;DN$pHC%grP2ro^s8C-cKk5M=%8_6O&dwCF2ry;go(@qG5X>i-R>4W zrNthCBuG3-=CylaVOKe$a#jg(bBMrv_TtBVKljMwbG1)39-K(swT6a@q)&G;^&s!# z?u;M|z5Adq2^Fz7_EQvIcO+3>`M1GTXo4t9f)Wj^kx-J~N|(Y`{YmwW%nRMz`2vQv z_;gM+6QEl6jC}Jq+E}?1vL*0H# z4_QRt5g>1HBR||uWYi#YMp-l_T4kf6m{?YxFrnA5bL@s=cp?d-J{7k>91b%t)oR6O zwRXt5_s_~NOHF*6PfSC}*Eu0|(dLD|e+4=LR$W1cct6*%RxHn3JpGSRKybjM(xh=} z%h7&PSGlr-y+>|po7)!uT4KIBj*|G$q-(;LSjO6To*6Q`x%PT!RRY+HTMu7bq>-NE z>Z>P)v0+%!d(YBb5_~GdtHnxKs8){2bLl_fL}Cz$>8|2Cq(GniUU&HH31#>U|Nd5O z^@9=_nc6HaURyE49)hcQt6=?G6 z`Ne0?#{Lkbnte~^M(D+@5pXQe0gj624ksf=|1)Ya?@@vNXBVu_LM7k1!~xg9DiPLa zSU)r^zsu}YafKIr5al#e;A=Hsbac-seIaO6#tXlDY=hl^(KgRF<*k(^G1bc-lHTcz zJVhx&EiCAp)mmBA9W+Y`0&ra`U*JuP!>)h9S>Y|BJA+p#}95FdrxUUOO1 z<3Qu8bDpHuZXt1z17&;iSzH%s3X5ZtHH}Mh4K|i@Vm~X^n-#A#I*y-Z_b_=>D5iO< zlwGP%<6elREA?V}1M634t;tU`jA`SEf^|k_Q;Ta0oXE#RysNWH9n*VW*utB2tn%6h zwI=hR@bV>{f+zux-XSJ2D`lM9Kq_%!KQ^mye20=kT8mA3sIOnX|E+yB|&QdwfBiN+NrAKc{5Jjry-|Xp5GuhR?~lB4A+5 zcpaK58I^N9dvES*bw4OqjFUAxUzlLS*Lmyay8ph9?Y-;oDusVSxBzz@i8@ao3LdsEAt9FRkK@2);8@(>e28}%Vt zp5$i-2$R)Fg&>5kw*j(?k2qv_`VJgzgwd#VB63LT5fX7{ZFhx(L%yIU_imF!P!F5F z3j?0tRbFd<1w?}=1ilxm<={hm*(p`~f-e2*Lqfd+rFr?OGcPbuJPUwNOU5TFgoi#& zlOSpc6L(_A&p3sOPU*dGyo;B+j%6M7Fg;KG?f@-4{wf;DtW!DLx}J}H zQokz%Xg_m!-=$&1DNLepAGHdRWg{Fd5ODo=Sv9Ekgl%s+=WGpd9k^U zITGo9Xi}j^KiquDh`X%)COEA4h-pmJ=pL?u7Q{@+>lU?wBIQ3;;5V>Ev-B`d6%P~1 zUB7=xt`PA0>tw%`EMl_U*)Grb1W~^{^&aZ@BoE66QqX$=%9CheMWF*;$oWTw&*ry} znpXrG2hgcj9?w?&?nY;KPTF#D)$bZ}ENuJ)vA^R95f3gOt{k;pd zf(k;LEEp80QZa|-qWp_KQ3ySDX&Y&%w1wFjVUTz`+mOKY0JjDh^HQHP#0Ytjs8KpOW7_Z{ct*0g=W$OZ8-~~|hsT;q%sz$t|LzeCtdrpc zRo9dM$Jmg?X@E~UcRpwI2>vGDORS6oPTFQ(&e2pC>MbwqYcZ>YjwrtAY;}Ao^662VVXAjjXAkCX)W@7%lejB zHUC$MhM5DcT$;eMasfq)NL~ZhHkTXVSEJU^zssl3HT&B#PzwE~`ul^s1X~h~mX>ld zTw(*y@!+&rs(y~jUVobA34j%81mYQq$|;d=Ih z_B2-e8)Bp2xRUNMkPR|2yjwcPw^RA!I-K=pL7hk{lan%LzBrb1@QVw5V^tHSqNWv# zHANsdgR2|vz>8v~vy2nzD`|&yW{2nf==@eMrgPH7%Tnd1Wbbu>g$=!D;J6B>H(`?U zxEe2(ptbUQT$k`1OC0GDL}Mu2akf8?b_X}8OpD!cKI2u6N4iZX1w+v?wcTUkSE6;Q zlLtEG_RR|;*J-^b#Kp%)`|Xbpf|@y33X6v<^NnZs$NgS(DeIHw3}8~K67gGcSvP$) z6hw8u7f7{A7Jj7y@jH>IE(7whS0=Ww3%6s~H<=%=Bf-X~TI*57{1rB*`zs~%>gqO@ zT==vgwJ!Cdf?_(w%Z}ooxL#6g{r~Ixx08;OkkYr6?Mrh>L_>T_hZTp*{@5NMzAmpZvpRI7@W@5`B=)7czPdtUi2GirypxlOxkwLYT`YYEh)SF zR6FExFbwAxlX&>2TX7=cJpcM(eD&kz&9y_5Q({X#SPJZYjk-;H@qq65iT)=~y$jhs z?3PFW#%3DEzt_tQB}mX8F&$Ktz?piCc026%>pmxch1GulONDmZm44mg!%qq>(7VRu zw;y3B^`Dn_Kf$Ay+;`Edu?p)`zEukw@HWxEfm1qFH?y2=zMG63A}TT&9uK-tZ}n=+ z^Wv=p%WCzzZsDvKAJZK?h<$M$U>jF|r`*XoO?Xm~%M9J&f5SyQ?n10ED+Q0+_tB5& z`oM(!`b8_<%)zJFTx?GM!et@K%RDkGP8RPkq7Y3fr@xlss^s+?-pilKP6fH@*s%(R zsCVIWy4fb^Y}-Rp6J{7VP;bwl$=rw;&vK%i_Lvi-T$Q6PuzB_m@6aK8e)6M5;}K6ZX=)&ag$P^#eJ1%oiChQS`#^ zTP)hsf?e?<`LN^7RLhrKqVt#?F_jvrn*^%Be7BZ$?(mc8b>0v07^6gYd6RF+Z+yyR zKJnE~={&~}ZD$a*604w_ee8?9dwk`8LvKZ#xlhp9GTFQ8XMB6=2ll&)9he&*s-4Eg zum8l?$`vHT955D+#*if_t zJ9ZS5+lp3(HQ6kQ_497=4NZ%ShW&H2GJE zR$)}mWw4vR1)ZgMXBeOFDx|hgWGRp$DE2E{*+CgUAc;mU+|bv`VbarEPhLABJ9<$|`R_O0#rv;~jOg5$&Qc{fSKVj*Pi!}WMJV7W%_lY9M4Q|x9ml$MRuH@O z;G<#1JKkRo7~h(yOGb)We+qL6NR~XPR1mB=8w`@XH}G~*$8gQUJacV2A!fqx3fX+f z>=W-Il5&P0i-a8J3%uS}O1IQd(SsCt5~ml}91svYRzHY*mRnVQ|3Hc*G0$haBsQ9v zD%yBlglsDst|>!0LPK_QKd0-+x;({oC_#HCkL+!PyqxfEiHGuK zxf?!IXMd@#^T;HB35H`i(*3E%Bqna#$eq!@mVFi9qvQA|Q7{ZUk8hdr zBJ^>_hIW6;6E0c3;GxKA8`7x)aWZke*N_C33uLngZkGF%YzhzWuWB}GkxE685xGVZ z&izeT{rb|Kwqh)y`ZAZKhGlhiuRYR~v4-aBJiHc_oQi9KYJCRmQl#A+&`m{N&^Rim z){-cU^T)U_4Mmha{QZ3TvmT`n(0HtvQvrRD;84(s_F)pEppOFeVmMfP4&pSMLb?JN z`^j&y=9h70hN|O$s@?5=AGZ4h!s>XmO>qH?K4l8&$sEm=)v1iQK8{g6$at6G!ZIxo z>~1SbB&B|GV59$1(09H_a5&aUtki}(D^M*`|9ENI6r!V zG`DMKZRL3;0`GsWBFLLWZcCr5f!X(O1Xw`G0*J0wPVyYpkn`-MvvPdi^}A#ya|(lS zn)4Jiq@m5;_RYzN zi6eI}!a;B2$cY?2Xw_t>blYJrxC)1#T#PzJ)ydBWW;aB6G-t%9#&T}>Y=$k%RvvuW zrHz=te7P$vG?sVi7G617XKSnwn(?}FGI|JzaJoLa-wsVjWj>dhp%;Fgj0|#oLi+vw=JukZnZ@5WEuIVWWrwhvIdl;mRD;~IhMUu7e!FUIqsPzmFYYXs@t7v@YCGnH)|tNG#9v+bHx7V5U=Kp8{>lQ^ zFMSTckB3)AghODXb zfK1(bF=bmEIgRo6yAVn(qFAFo{7%WPHDgqeeD;?nt^*SD0xY8s%P9Y#6p?Ov=Q}GiX^jmiBua`Rx9Jf=rww1B`4ucXuT~d%_SwsJSc+qXd`spX;bXm zFb!nJo*3LC@pV6+KtR`nd-L*1>4<919(Kp_sv{yWUlNH=maMe(our@f#jH~&WVteq z%H)_dROAMlj6Jdk%HXL{Pec$m{YtZKrwYiC78X-Rl0;@{wSGCY#LHVen07z;$yWKR~S;5vyLVIUE2%PmpnSNyNAxDwQc%|%fkKv0dF z$5}4a&s!2NC1b8-L-!v$!YZM*>#+;9%1PaE1w>#tyuc1u?XPSfZCH~@ zc~JYlv@%W^&uM~7qaw}8V;=AZ11}1F3@FQ~Mm@TyGesi&1WF{TN~vUFHqHm7kdE8? zymh18sh>jLhzf`DZN7Q0nZuNKYAE$Or=FK=wz-Y&`?3b28sM~QaqF@F< zOj>omG!w9DOtk;T*Oef~d>GsK0n--8Rr6TU>KIBnQ`Gwf~pJa>J` z7&Ag;7P&Y4A(icQG$k?R+wk+Z_u6#$Jz7aBzoncOjVY-hpOEsmkg?3}JWV=_7it>+pzDwfCP2w0uaygxT#O0$w{qVq~+wAL?!sy8*!k7^KBUhXfn?FyU8ai-Ezl}Kn zdbfANjG)6XSTONz{(;FIj`tB;i7e$BdY9r#wD!nPAxrHw#7I#V2|J@ph)>Zwno{!M z0Ksim1N(4;{4>CpuT25Y?GeDaJ$~SW?VAQIvHuQ&U6xlf8mIMRuL8nA>Kah_xm8Q) zdr5M}S8E@Ig-phBm8p>JYO#WFS)kXdPQ?ii0_BfFgix} zZMeP1)BH+1a_QooHzMXnVK;M}elsMIe=vKFF?NYzE>>Q3&dJSsfH7h0kzyUHb_(+6*!o)rHS1{z-t^i!yWnn}6pMOfLs@{5RI1LzD-&$^Ew8^B=GRcr3 zLpR)MzW2Hpl}AFw-v#BySjN10kbtMVKt`?{)JYSVrJ3hNe|A{dQqu#c)2DC%57C;B zweS7@QFe~0HseIslm=J|4L)-#&h!XYPpP0HlGNE|S3ptEeb?W+T0F3Dc*`&F{D{ly z#g9D&c%fYXh-^xHSx+3PSDl>3cu=}mkqEw(VbNW!*M={v7g(*kklpX8CY0>nc4~#i za_n)Dv$vilS6y!xyW97-bd$vWl%~6{tnJ~*)Fq~x!1?FJlDiCHH??D0xaogjpTzuX zp-45{Xf+*(Aj6g@#5wuMX|B|&X*}Hv(FRvgb;R`zBJKzyDu=TAGrVprD`4$UInVx@ zPhasQD-7+b!5~J&Ni7;b?N@sIJK~Yc>^)I-;fY?r*`XZlKsoDs~S&sGIcyYef9!o zqYCng{kTA5)tPUX2=z?%`fqWn`CrmHT@ClOo|lh_t)rFV5VpG`TEg+-*@(5#A$!jg07^)*qLhh_d48sd&kN@tR2Icrw+ABeh`i(wGI?#@_qkT>9$pyQ*FyvToggLXcTOMs~V8Mf1FLQ&f)JZ{#wAhi6<33!ZZQh3!)a)#{V zgc~wEpv}Wx5aO$K8Ww`w3u1=H2wTzMWD^Z6RhegU%wn|SJ#IEEOSX}oSy4r~%LuV? z0AtrZ>L-m_EM8+L_EjgB9?3_telPjV1b(#T>yE(7TJMiPJzmwwwdp_K7o(dgpd z{2A$$*HzBaEmw*^z^ei7aj|&+dz3RPOI|G>C4-)n134x;y7C*z$mV5=v8|k}*KRf$ zMZ@N4cj^a}9H}$Ibcz&FK#{@qg3?Rh>6FuZf4s&lMq~W(#duykT!ZW@aoYDncKZLb zo9ePeoVn`wnY{OduJ%-Q1=Hb$?|*NUmH3lJ8l4+AXk^oqv>JFR>C|QD@>#P*3s;lY zdcCh~TtR1p>TOz(5b%X4&2w3lQ#tdWHH_#{#8lUXv$iQpEONzBh`yrxkg$5?E9jrF zTNwJ$Nnp!9b0Sm@!`^C`Ed#$mwBO5OsZ<9p@)7nny?(9g4(`7Fph1n%gt~yAkuIQY zTLr`icL^ZUck||vS!}i33G$rTqubE#jyk(ldK(_*0+bF6{@*pS`u86PXn!HG$mu{X>MJ`d7}%ZySd|SfU&oZFJdl(dTO9H!{@N z?nO>#Z-rbn!fr&w-NXH z)Wu|vuXGX~>-9jql~N%Ts?(w=^j>-YCqPb4Mokf*1Dy% zlH&$>nZI(h1-CpF?YjACD-h4kjy|pCZ*G^c&DR2z+ zR5+p{4Y`t4(El7P5CMIp)H3US<4+FUvt&-|cftbbCC#)6g(Rdu&>IH&j=l6a*;_7U zzLI9%`Y?jy#^cms-eyoBaun}ay#DdP8ZPPIHun68Rkw=gdAG>iQf1!V#;X3&52!x1 zsaonJtB!N_)tl_#3QPVe=k_WbpK*}5?>d$7@_7@+9`&KaF0VGR@~B?!hY{Srue{-Q zP7(upB>gs1o473T;QZljn#nJq?+qYqO6^F;MI|9QYo$yYlcdIwe{a7?b{TcY)q@51Og@2eUm@O|IiK|?? zBayq*hR-Eo(KFYOZ}9OTf#kVo8&&lACv$KSQm+_C(p=ra@CF$<9Z+<2nY6GoR~Eyn zD2^dR&dWEUKvYj0xEK9 zmbl;2LV`{!L#Kx&<0T!}7(Q@&7Fd5$JHOsuBR&>!+3J(%u*_SPhQ%}=@gPo(V^q9= zxjuhMSHr`FKc?YPlZ5LP)kE9cV;XyxZ{P%5o_100k~6Fize>*+_;Sq1#SDAQY*Unu zQ;|=S0GHt0#tj-AM)4 zWTGwiJ`9X9H|NN!SS4TRquibX>Vg3xH*KF%WorMd$jL13px73^6Ia-Yj-Zmi{=n2}wumYulK>@pz3J&aGWMQGnOM#k-+xL_aQq+Y7MHip9z#s=T4i(q8UZwLGahDI}rbo z!;LEmD=t1bDK9Hw0*q3)-tv_xSkf@5FJy0Ny|g0CCcz^r4sWI+EeHPLLuIS`G-V|V zP8$_CgKBF7u_Ce3+rKHvc(0bp$mX4<4vac#gS|Ogy*aW1fXZbFCs?u&^i-vcycs z(GHQJ&m%*rs-BXtMKSuQNGEhfV_wKUqKM$Iw8Ac~{6nC*(Ps{)|K6q8tBZh-AZmPofW-<&iBqfXk;HDZDemDn%uVGO+}{62talww;}tyc$RjWY-AW>SCM9T~ z67dq<5`O?Vm^Q&y7gmw&8W+NTR;M zbIWA;}wn2Q!IXS_TD_1hv)pHzi*MN676>EpSM>H;&m|s{m_;ssDGvgf9M7#>J zH1DC;asG>3h0_d0nR+{&a`s0;MpdE^NkP17(Kh^$TXrCJYMq!=U7_H%pAGk-Jy&qmorSL!0Bn>j9s7W}%E z)NP5TB-<$~PtUrcPDt54N{t>$EFQw?;|{;a&qnK0Xup29%P9Kj2NgX}d4~QJ5bXcJ z&wn23#}w#d%&gUY<2>)NYg+q9#q;u>h#-Vpgp1jNE`w;gJW@9*TM)lsiF?HQ!{X{! zt}S6>siGi(t=h+Qmf{xN^M!JTmu!O0R99LT^o}CD;h8M6UpAB#N4QGupLyjNQ>}!R z;yYzetvuk{Y!QmjEV;zAoYe6MMfT!r6c3{Hq3w{-UFvroNgZ!veM^-V^t!V1dU)R1 zy2?hON;oVHPa&XUKP3^&M}zD$Z^dCyyA_L zt2t-y(#=UNYE*Pw=gbK3{pSNP?&C7Fdx|&ZW60(gE~k4%8=d zCr>Pq*XhjiDF%=2B|bTY4P|yZ!C`o-MNaB-rd%8Y8;Iy_O1B5#r}o8Oru8cft2SJ^ z;~8v?Y$P_;nv){xj*DFT*2h=HcdtuiM3o`kH2zTchdRT(Gt_xb_|;}*AHjPv5|>MrIzYV4e)X=1dntZg#4-fV0$<=f|r`vAj*RU$N zu>^Ype(%!MN5W$DMw5OC&+dL)lnSr^H}V?~9btmOM;UFK691N0YXV(k$m>@sqOZ2N z%5wI++P;koEixE?bZ!#!{AMgCzG~5M`m^?92-Qa+aPxlpOfAVg-xaQUD&!DSG?EJ6 zaER6Ju~9z~9=fAFL#d}4jgfWz9KuLul$CMnsx)T!sco~81W!)cC-GDFzy;4ZIET+eEbG)TR9hrZ8)%u5YBdKg6S zeUz|7b0MF^)6umi7l!D260J3;s2x%qPI0GiYmBtDDOO0YtEkxXDfxRH?^gW}TbxXr z`MRWl@hn=)KhC!LWp6lP+kFMDnT_@jp=FKHoS9GhW`+uH&3`gw3gf=3-1Dnydicrv z3|+qul*@in`$K(+9`510!lH*LZVg$R*8#HJ$yWcH1z_&q_@n9i6@loM65OT=zG8|_ zIf`oGP9u&O#$}Wh?#S%&M-<#)9YDD$Y-5TRg>S|3uB6;0)0zM zPPEAya}OSL-5Yx4`M#QQx9yc95d~7+`1g>Lf;K(bTG-rT$vyX;N8amFmVvA1%*O$v zyphg|bJhHgd_!CTQz29T75B}~E=kilfs)iG6!zkovY4saB=P>Im)TR46GmINGM|kT z2#I$K_CLc{)GK&vs@~ef@5-{kE^7HZ!!4=|q~if@*UvT%YXgMu-w`h)?Dhf|zW5c3 z?9jp%dSdYb#*t?uI}Z5<)3@hMo7xHQJ;#3ML$?H7|Mdom1y(8&WBaOM5ydGNmjrQQ z4)0hHFj#B5cTj4JldwH^Sy3Ceh`GW{mah8#dETsp{14DF-ni%eD2+4An0e%y@qNN$ z?Y^_O>E$&In31p{sUY?`?ptXDLfr2w0_y(`toTSzxHn0)AyecftA0tQ$SFke4R15T zGYI#P`*BF`s4An1!YwK50mSt&a7FP-x9>ZeTAxjgK<~!IFupv^rMP1GNzTV1`a>pT zgrC{GT0Szz+Ane7U$pG8yZ0Z)mgf)2QzUg6ZLFp%L}5JKz(LlkupC=AQ&X zX5OifLo|TlS#VlbfDHCqwGw%IopE9DyPKk#Ppr0JUOXmyzP_jfHsCs~&eCJa`|C~9jo&9n!N41O3HsAU)-kKCf@K`=&Q2pe?^EbqkTN3g<>rM|i z9ge7#^?2{CoJ5= zY5|0-%M5&@cU%wsDU>=$teClqUQ$GtE2wLS?%^Z2c^AzkS@e`Q%<)8JO2|gSS`MxV ztcT4oeF-?ec{Vcti6V0EG1Yx1>*h=;D(8*`%fK6C(w$QKC^2pk{Nm1|r!^~I!=#+@ zc6IsPm-s9(VQcBDs3?0fhNN~>oGd9G9=|>F=Zs#uft<+IS+6@+HC>N6CPj1wN5q!I ziWRCE-KZp0h*~u~vdlBrfZ>qJ0)q;MX3HqG{Yd+WOuHnQP5D#gyB#%~BmQJfS&o1y z^x#FnIOO>)h6bg z0tQKJiYt396~-R%B1b|kkmWwx4+$xJu6$DsvAbnd7R0@oC#mN?fD8KMi#hS(qwqdn z`Yb4)ALdQbzrGhwU)j#r)5oa4b^fHmMEJqm8X$l|VGKR<35Aw#ml=6XJ($HI7 zsd+l~aV>v-vC`2HL^bo6yY$gSH>GB2h3`@EF>~&c)V?pPT%?^WZmo{SKUqsiC8K3$ zHHcfjxSY8(??J|TD(h|kd3RgCc+Y4PZ1cC8T}Lz=nsKHbw&h$}zKtwm8#wlQ6qL`q zUiDT4?~9%t1k8o@vdR*dzbEs1c)p3UL&Pf5l5gMWdkPiL*-pM|hx~}GAyXX)rLx;{ z_AYiyqJ4|_YSq~50wJFvyXX8q_oI?rCfx|0OE6)qcs|KuF@n6twOYCP+@9kA^ z#hY%CM0y;=`41I8Es(pn1O}h-g|oLpuqAM@IAN_u^F{UksJ_qBguA3>csB3e%`Zn% zK)j(hkb4)e_p-}GeZTzsDuH0TKcEzV3Y!29l$zP!KL>78yIKQEQQyfGA`d+L#W+eT zB@NAAdhpZ!9Ci#4Ki4RaNp#wCP4J`9n#m&2upZI6% zAaKe6e44~BSt4}Q&4v-W;MzHZISH8?T!=&4fSb*Q#Yc)mD3|;!(zN5=b4$wu>M{sO zth4A0yOHjj0A1iAYjBdA_PhzJOUT zhG2nhAsbwm&MWD)U_~M>&T^E+3OxV1vMGR^Fbv~*j&R<8Y1O+t?677ddlL5huiMmp z;7V(Fi?f9hDyDRb!*(Lldygr|5*Nw)bqq#Daa?uRjr+YdfL!*z{be*V@m$AB&RbKa-Lu|v@kvsKE~(v#(5VX_CuGnxM4-3_!iZ7j018U zz;jY0ggIvsuPy{v6dls7z9UGW&o9)0JHQ9)DLXAEd})UKr!TvvmBSsTJzi;zP@H8{ zc14^D0Q&P{`>Wm%)O4hy1F+08ITv-tvu5lrGr^PC+SJ&rJ)Jbs4;wZ@r#^@}`^l-J zcU;qpmJ4&vN-NuVNZS6v3Xpg#MUon_M`NeduUs;5a^eFP9!mF$k4OcQ4_ij63H{!m zQB#T{7%i?ks`Yf9cl19UF%9Qj-O}2znz^@3YUueMtzKkRsG+{293G|=a`op_kj+k)RCEN#29eNRav_|5dx(C(c!%B+ z@dIr@L9nB5Ahd}yDY^XNVNK?BX(AYA(l2lqc@$XbJh_^>D`WU-zE#!{%&9my3-&7> zn+2W(NX#_3^*z<5I*b2dof*!5Yh~xE0uD75lzpd=M&nvSkApk4k?yCa&u`s0VOPvx zU(85h@b8{W#%NjvY^tK}B};26-JckKcYMtcAu)W?LvR(zNuyekqiMgO4EvGMzznLw zYfNvOAe%b0X>wt?XODXOH^V3S{6p<)0voNu;-pTQsE0U~mRe&#bhMk?wFs;{WJ&gd zvFzXSWhr4);=RQ4{ty|k`(idYou1x(f$Dp2(=}N=?k+)=z@K;p!Nz|_pEVKQei`V? z^i8%^s|_fJjZ^8^egfdp2IPNcz`5`(E(CCv$lq?w_%9+dR|vQ6@X%eT`e`jv)pCVn zj_f#tnCbGd`!{+j9CT0n^sHojt0G%sVZaJ(`^2h;UBea>O($a|<;Z&b(;3Rf4wU-V z+g)IFSk0~fd6c8y%%9(zKHRxX?VP~3Q#7{Avcl&K^krXPW& zOutbG%Y5_KN!Xy02p7lQ$@Mj+vTgH8liJIqP074Wj^M1W2KU~d zc#jQVg9~3OXW)>2>pJq_Ljqw|5#>*K8Y@fLbB@1{ATf+!;fxTatl(?o$s*KsnQ%(X zl?tr_(C<2ccn}l0h&x#=?u*!nT7>4D*+!k1f)6yNZIm_25@(JJ{jxjrORbRA74#GT zg{x1vPngAK3&53HfM<_2#`<1)NfXq)oz?0MtjkA>0-IhvyzP`}%KP%!GZ|6kT^xmf zE@)ori;G}L{F)Y1J??o#jM||issDv>L(yO47muVLX^kHIRIF1jqc2i#qE+41rRx6| z_NE1Q?a}}xxH_IOec;(f20oA+R3FpTlpFsh7a9Zm=cQE#=Uc#YgI&20Q9oVK%s2*^ z=fK9;KplPxU<6J8rmnA9x*mqTC%6{$U);AQX35)9zJMb9OBl5puvO^*qCJGzQlk3- z{j!07`3{ArGJI-j3bh3|!5W||`NH!X@@U%FP5X@o-g!T+w0Cf@p>90 z)~>2wH4RK7ilF1J3fwZU2K+|D?+jHtiiVc}m+?*z-W#Rd7`NIs#GC#$soPHV3B2OU zy3anF{LhK-lCX`8r?pA&-oX&`AlEN6&^&q6RN4Z$@l{e`N?X{jfYZDpo%4ar2;}{8 zbshr(@X83V9W62v4J`Ziob7*q1s@{`ZT;zXLHlt_S2f7G;n>9G*LsyHZxiGL_Z0tS zW_JhlBZz&w@748)?Q}3%Lx@Uux4e+D1_H&*T6R~&vZGYB+1Balhj zAplKy`{6V|sM-R4oWIViC@BcuK!>hK``6#lAajKfWP8hl6M#Rk!ar`?n%~cv{7-p; zP4A#(kQ}-FoJR8Y4PU?+)87E}zMkkr$Y>M5QsO?$sbGB;5>3Ye<9QXbG6fUnpgOQ4 zJ=nSxl0<s`jDYsYQiW}6hH6ZqJ(AV23h$P70`y_77k^SzGsGC=o>{yDz@NS2H5 z8%tzX@NfyXW9qAohER>@BH=P1R1BE zI>HoQ0~E}RpFqM-pmV$XpTRW2$NrHY{Cir|Ww`LIW>W8O5EwZTTMl8VTXnw4c1qW* zMXjJ#GXa$3yA@#VX{Q1*V$rWQR&g9~JK(~SRe;Pj2ryjcF%_b-iHWYn%)rC?xX;@6 z@=bW(TFlOv*Wttjn6@QltB&&6@>aQ#FWq!|bq&1QW`SW46gIs9;HMf8zV$7qA{?l*#q4A|2o)X??t@O6B;j7@W{T5I*p0m`}pndr@)DDD(y5DD0ziAz# zFn4}8o~_&bN#p@9F8L?$l;z9(Zr53z{ZtLSE{mk9OZ9j7E<)MTH1#s5Uu}Wss>1Vk z0Kd^?Rnm;_#~mB@kB^a$ywEyR<>N>NyD(0fx(AOOFo3hY00EQ>;qYz%718y(H&8w2 z13_*8hDN+Q%*Y+0_}O(GQU2p^upibdl4YS8#X{1*V7f4%; zS`1X0`PRf>iwIB|yG_pf6L^My@c|NbhrM=K?(E zO&SsTA3*KK2ZYcmvu5fM* zz>&29m>LewZ5zP7_mf}b=^3EvEZnP}ev77qT!CquDd!&aIvoGJsl{i}#Oc`w{emxymk-dXB0Go{;q_dPnXf>j<3U$H&kBi*-u40R zBD*H3bV8|)fP1v0{nrWq>>L0AOar$IgF}c;3G$)I&kZLlR0msj#YRdHE3gK_6k3DY z!-4+y5%!DY5^Jh1NIQdN!{#hab>D6I-EKgsoJTQa$8-TO!&yrAYj=w>vQlWZCyo)7P$esQs2IU zz&dmzp5BjBj>E?V)FDpjQu%Y8#m(@PvV-N<^!s#%tq$Bb;M-4frJA#eCOUrkrfDTkC$y znkPCNZZEv)*5`4hTTvSwW%6iaBC>w8QIr<*j#u7lx2&9CTgmmB_lnfy5g%60l?mskL#c`(@nHS4suQe!%A;0{9BcqEm2T5dkbyZQVW&&Lldj@EoM`B(ernuQNP>d#qIW40z) zQ^#E+AVVMvC$OfTQH{a?$5zI4dzawc z#o-G@0CUakQY_DMM<<79-)YcY@x}qae*XkQJ3VtI?lt1#!@(Y2Psr z9frcXn~LjGQ7L!4zI%~MuCTcyWyP7#z0~-%;7FAL7j3r-F?MP11Ym13WTJWjbnxx6 zie`N3S4dtQ1%oKEU6foLWPyo?oM@*m=*QoJoh5tUMsJdIlx4mSTVYD= z3+{Gpc2&^9o!NlyjiYN@W+7^Ea_NBrK4~HHzIz2u2TZ3DvATJ(QP_tITw`Z|8@Rqx$-ayHUPP1hRdwu|*EP~A*C9L$c8=2- zl52NXTUG@cj};HwxxEn{o8eMl5W)1(0!VkNpIb^H1kZS`acBEy_RR`CkaneRqh@Xs z#D(L1@esXQ1Z*U!e$Nb((zQ^~dtGe#xd}N}O|+)<(1KLVb)jBG^s))}nS>2o5JPR( z1!rAeSp~-4IP?YYgQZw?B|F#G4|xA3gZhv16*$=?=_XgZPhm?WPwY3vyeWK8RXjF8 zrF1B*X0(za=8H?bD?KB`z6XZ~-iUk}n|Z}N(l=F4B<;Azv{n>vhAU$66qiS+{Nw|^ zwsTi{{cynKHMh;k`1NP|G*U-*=fYL&pI%D3tLa1I7&k{9(5#*qi-YJOefOtHTXNlD z%;A7X+pQ<+N67`G+EeVw_qgfk&kzNA#({n4ZRPjQqs05I^g79ll2rL>SNdo`9pIJ< z%h&RW6EJR(t6!hvf?yRz^rHX z+k!i6FL2ULOg56ANG_7nwI4-hq-l8?8bBLq3aM0(rWmiSpvRHEFMZZZu?q&~gFrjh z@C71@^(_&gTHp03&mg{0zj1aW*~jc_3m&uX(yg;RftyR(yc{(+AKn8tdSV{>!(nVj zx0j>hS-L=__Pf@?GUm`7*|us<29|rym$IcKrFo%1|LnE+`){UMHn#gRcNRNaJthf- zizUo4qWC!|J2#URX%CGo`dj}PVvo9dkzzA>*balFzoT7l{GApZ=j8hyq%A+3RGYujQT=DB8c zYjfV}X$22UhTinAcW9R?H1{{}(j$Z9?>3ux9LiV^>afcp;uc+elidcX^!Re6nLdcv z=)Jr~c(WAo484+L>%*1sblNj1j6_MRpuFofzlVsgj4)BBole%nHYKBcEFwaao6!nb zfq$zHo*m3ij_vgzrJ<0b|8k2DHs|^xPej@=^WhekQ5}3i^&FsgYaKw{YT|Su2J)v~ zol+Ah*`Fl+veJ)jCwzD4F98m^FZYkU+Q?)a(Lv2Ll&NmTIYwJ?r|mq~@Pt#+4&g%h zN>*qN{E$mKNSMfE-75k~Ma6*zV1E$qJ-$oFH&nHl><(7SE5SMD;{1*v#^~H-^dbdTlS`k6*OR^p*wl5T=NGxf3Xdwm9%&$Rtcu zlwXYTe4}U3i}f6JurgbTRNr)GmDmzxpO2uzf87IwfFJ{(T0F)*7=$`g|F^c!|MA>> z=?K-YZIuJqgBxy(%{s}Jb3rmcuMSizQR92hbLfXKriWjAZ1V^tZUMuSaZQwx(V(Df z*o7PhT1$HnMsa3$WKMD5O^^!By<@W<)8dDHj5KLC%V#&oyAA8gS5r^zN8j_R8Qll0 zS1zuRR|wekYg=WqQ!u;P?;{C^0wLJA^TPUX0p=Z>+@3gM;TNOz{4kW8=h~GupK01> z1MQi5C?2&~NNt2#ZSza=Z!HO*EyhJAP_}4^`*|Mh4Jy01RmHk{f;HGY1%1Xf?fYnt zKuo~PCe;>W=0M-*G~F=NeK}&kM?_dHya43E79F9vpW2ZE#d;vl{=0K@Wy9+}e_;|U zxGOOFTGk9WJkeTXWYQL!#KQ2BBJ89B2G0J%Q~W0eFMpQWxBn4nnNa+chY|1?vN57V zLKq&Iy~!nscszSaP8r55*ieFMv&CmZ-31?57`n>fI-@bGmm*l{`KD_znUa#36ztrS zgT^0vCU0!?mAvR|Ft>Vp6{frALTp=KG?fht1iBi zCH0e8aN3@v`oN#W^}hlK2b;!v@6)kJZwNy6SQ2T~1Vl6j=6LHN*CH*Xz#17FRYY*e zEJm9yacQ)O{XX3lqQ}7Sh+Ke$g5zHQ7d3(!$nhZsG6#sBQ_({d;roP_#(Drc>Q@wN z_Z5`DLosnV-LcK2sZ*1@Albx2=7HyY?>(%QvI$xq$L~8Lk|)y7^*cAYihkuFWk9YXh6cxjq4zS3h=4#LWDiqwISSBv4~0#x1Ag({w19%Tvi z1zkH+y5FFzS(UH*O1gd-HS4jerk9G7)yO-%OsT6PP-k}3X$TJX2lGhj$riR zMgd6X3LGsCSM#kql`bLAjZ$Z)xtf%Qezi$8 z-0;#j`2iTum}P)DaO@>$l^0#_G<1{9oy0w#EmKxvi zFU_EpO1G5qpI3;BXec<>dvD;0q=sGC1XF-vq4n~?TG*xSJr;cB#S|AL-EXjk+{?|j zKF#`!YtddrTYqxYb~UuiKKI)IX3Zh?91I}13VW6`bf@Dt|jal=}y8_y2bsH}oC z#n4mo@S+|D?uH}7An|+a-43Y7h{@}bmGIa>lQ9vx*$1gk4>|?gMG!rM*g0~E1@moF ziC@~tdnmPRPMTv5b8%o~;s)Qo&E{rd^qY#$q+_K8o(?nNGC z@E9cDHPVI4c(3wSn7`s){`uc?r~fn-eDr~$Fk|IR3Lze0#8nho>U`T9#{1sKa^~p2SIiBmc37X_nj3u<>3gCjzaz(>X9J5=%R`x#Ux3pMjNL}f z`O8RVenKv<)@qtNq|jk!X0{d2Wee#2RrzF!)nrg-<(fi>5%OXiKxVg;iOhyfaGo=Z z`d*QCMTV0F>XGLtf@>%7agGSKOmPOBxkk(VW>sJx8+n6la+b$Gj(=9z>truuTCUHW zHID)lW=MmL>JZIc_vb;!2)}G1Flk9TZ2-MMX0v#M+oVJV;elI_w3NMD1Wla>wRxU2QL-N!?(=9>)ECF?(hF7=W89fERv

    z4S+P=j)yGHgQ)=U={LaAWpr`aZa6$2Sgc#Ut zzZS$_XvmD8M*dy`Wp>}djB&F%P&Dc_JlKF(S}|HCp&>TF7BIEr()tSySb{Xa-W`fH z!5THP;iy6ac^GXLYV-yYHY@3LP zL2TS><#1PeSV*Sl9WK|qCTuKrb6UEvlzeiT(<{a2(*zPrp@9U#z1<+lx3riDY*2CfDQZSqKtKqnbG zx4DEGJ`=d?QEsaH-NRFYn~3v)^~VW7+aLOgHsS*1>D&oOLyR})(%>!IbbW6baK6uH z?5zfQuGxe~^GjFYN`KH~b8Xgu;0GeVf!J}lZ_ylOjIyEKTH-%#<2P+VGx!iIUxttT z@GFbV)hFh*T{ysm>5#0u73{>m)w$74V zjr4{)j04}KAM{w!*H=jJ*a>9jXHu^C`GHv~i!!%~u0ThbI%yZ=>g4=MV7{I0sw53c zV(Wkzp9Eg>^i{qEGbXC=Ey+)-ccGtpo z2}dgpra@06eo^@ZuFofZ4k-JZw+`JX`Cx6c4D%St3D9j4hNmNx45~l>VhFS}UKg7T z^sW6o*U-bG(h{o|2Ez-Rk)0Z$!@E;>AbO=C9Wjmt@cSZXip|hz?$fph?<*t1s39xUY zgd@F;KKlA9q3|`Fy9S6Ix9w(%kdyeSmjrmgLt_1-$EiVE z+OF%ya9QCKK_tw@UG;9CnPg%K`o!f%>%*@6@K#694a5j%My*xRVAQO}oyJ38b#Ss@ zJ+2_K#|z8i0b_d)_Tr_?;-CC5W*=0=<$|sJ@aj+)UaMf~6r3-f48P9+tz%PP!70DG z8Q975u4{SrjnG~AtXMikc*7EBDk$3TAzXQ{2KbwfU$Z!)o@~FSomyc9mvyJn;9g$k za5E}o>jVGbyjOq43YtyE7Yk#P9AB&Y$4k2PXGtrFFqg&@QYPPFkN)EJl=5el@XE?+ z^I8;{N~FERhGjG?84lwSZ)1Dxfr#Ck`y?YLNsXTh;E0Jq{6rj@E7#qPqK|Y5G@N%Z zJWpVhS?H5c7jDrSMaPymN-KH`PETjwG^e@FMOU9LxaxSdaUr=M;Pg~lW)a-dV_J6) zkg0l3!x{mBgPaNht2>FBgTlwksj-J z`Z2o3o)}^sy$mq~TMR!V$&n)UTC)xV0`po@$6TjIEeFU47*YHS0;n6uyrfwm6@9K@} zhrr6|;dhdC-2$s*7H+y7^zikyMK3M!w|dynX6QF{^ENiSz_(_GHK=EUWJqFUKt8s- zv_pw|BKXgXADq-nme2)(>hKqjo9OY@HfmSzE!Lm^j~J38`y5QDIw!!nXbq;(*SY|M0^o8p8#S8SA2&g@NWZ@rODnzVjSFd%cd&nnob)C; zd`&c7jlA`>t2q)Hmu*m_9X%-*MCJMiNSO8{L~RSq#tL|c1(MRcx5~P7Fa*s-p4t7h z1~#i5K0g(I5q=w)oR+W<81n$5)wLCo^+mXHKQ*BUCRW=d@eXm5s8<(zuAmL)(o&yP ztT3j2p3BB=vSiq;xcw`&U0tHdhdefp{~xte7Bn_~$KT_6odeA2>5gB}1wKq7eEBhm zfelj)0QQlsD&N>5nn0sdVNq-fkVyyx7RAQq8`pE_E<#?(yE25PQDACGUe(ExG^Fs+ za0wE@LP?9Z+2-c~gmY5*bEz#B6&~1A1e!&FX?{$TBv?eg4RrorP3I>ci7N09l zq9gD8oj(2_6@K6Jq2X@h(N(7v@a#jqMgaxBe+BZo_O*mP2lv$;M5n4eI@_+=*lH#1 zTk$vO2h$T%d=s%3M_|_4gDT~p@^m;1)kHIru16L)QCpR(nq&pUD~Ps#MPO-;e^pwa z+ATr4fR32e+zW=OArmmz1yO|=GF!0Y95@O~9wV>%I69gf0R@)Kt3D;+W#+2Zt0Hro zKhe!$9CWVR&S)TWDo-rWHvSt{{XhD}cp{*;BKTGe$Ymi3WN;kz+7kCg*YjH767uEg z)|G<@7j$MLmC~D`sr@1sA=EaHaC-uIq!o>a7nn;fREU5#_h(@J+j@5aI7&K0@bvd{ zKsm5j1Pp2TW2(vnVHZ$N=B8$l4>8SQZ`2Q()$V@G&M7EwyJ3fA1P6Wpw9V9XEf`9M z$W&OmnfPGIoLpA>_b;9MHGy8z#uh$5P*cB>whmOVxde70u?rZ2Ji)tH*1;Gt>kl?; z_tGzZx*5b4(6RKOTDK-E!7RNzsxTXoMQHo#zKjfIKU8-E=`_3ws0VLe{`X6n|Cz|? zVIWX4DH%$;g0slpFCNIRr=YVU22N5?`zL%Ek-3898;A5uAUzoe%7hItVa2_F@j7oU zVHCP5)hqG))QIT^h2-rEzR*ZLG3a5$jfI3gFy7^w6%ZvX0{R^ycAaPcRKDfg=?^_y z&PZkL-^rN&`8@Z(|Kw9wuqkX?1WGl&{qq<2_hRxTFBG6-@+v<6^MT{P&m<}6fELX% zJp`PBxN|U)kB~Emytlv^|1DwhU|NC@>gYM1_5vp;=uqd?!VvjVB3+w-{icF-SVioaJd?4A!!e+$$a6qdIcQ>4<*PFcmjPPA0t#x z{+kN$OV%OZ;-X~emJh}(iaBebp|3Siq}+Y#4&UQHVl{|Y}3Yl4ik4nZ&M73p(r;~N4~p1Ws<#xoO;crw*K}*^84)YuuwO}F2mF?Ta2{uq6*S>?FHhG|FW`1%Lwt!9;#Z`<2XDT7u(B(0G&9s1NN8EClF9?Shtc0 zpz7~>RRX8dYT|p<9~X5GK#JScGW7L5|-* z;;{xabV85_a8GwCF1x#KJ5}eE-d?A&_dq{=cp&kG5yL%VGtl;N01ov_wl{4375FB- zto+mE?+VkGv{b3*JneKMX6|s$HJcS-mkb*hzUUcj3of;pEN8@U4k}cpx*5Jb(=|Pm z^j0dg*4*R$(a7=f3@&i#q?a#qTyu5h(dzvd61D=zHA=)<--QYH{Pb7nBtDf0Mkaud zbayRkkW$}sd+iYTKqjwdeWUm&3FkMou_*Xl8G3US2xEehz^EZ}Gn8%;1l~kz-$T=6 zyTG3yT?&00aq4hj`GfTQY}03R+J~mPgf-CH*pGzY_RCjc3j^Bz&L$CK|ER$a7Dd`O z+a`c4FR`#&d>qoA8Lw`}X(E3>w!+!lfy?u3yQt5U1buSA@I3{T5c_3spY9<{x6E{i zz%=5vWRp$DQ$lxWoM5b--3YmCo2E2whuh4VivfXrHcsNOrf=TBi3)A-(j5J$5e@v@ zcI-XUiwgw7h92E@ndB*1)f=Xt9H?hne|+LSSj@2kw94^|{rfH6Ac>niT=>2>(^zXO zm*m>oRIAGBc?7Cg!B(*~QmGrod`j1|`!Tynug~G4EU$0r{~8#>d+w4IbP`y>iTXdY0JdnKOH% z&B}BlE>-i$8GKEJUx7e&QiHBBYVwBC?;{}W8id-JwE*j4I=+J(cPj5X^h?mmI&~U( z5o6_9H2%hm&=pKC=T8?T5>_Rz#_)9tdwuRa5*1RV)meomK(vkN?{`a30=gcZ1V-v# zSp6PP1NrFJpIcx=LjMeLYyM}Sle5I!qI4W*@H3WQluxK?u$lbghe@W=HN_*tej^4W z-qZJX{}*L%85M;Wzj3N4NS6|VAl(f^NC-%GiPRtr2#A1`(x@~H-7T%8lr$*d07K`{ z-5@c5gn)a;-~Yd7_w3oTXW#LLH#2kZ{l@b=AESrhro*E>KfNq{w5FNE%H!WP1 z8@}=WD|idBe`RxLzkze3f5xe;5_mC(GmGj(FF%^EHEGHKARCW{k^S2M_W&7;&OhYcE8)F-XRhQw*@ z_702lPyIAGs((5}eFrvy=|$A~W~6zajwk^^>@?5~x=>FNV9C2(|C#Y#>vgs?1$mPm zds-70PWj3Ex`bHg+PfIJp5gU8%(yUQWbMiIuT!NG7$<7+$L9yw(_WRbuv3O`YV6-P z_0>ke%ce@25)=GkzRbO7H=6Ig7pXbVV(Q1XMm#Ygf?cchu)9uoW7vf-+}P+3vjkXO zPvWG`CcC7Q@DT^jGt9TX0U5c%w{kgd>kO)N?`k$~gx5*!OAf85=PL~sIi%L|n)lK$ zqX|FjQ77>8t^P{OJ)Y zh)g;I-i?agM}I9Ip@-y&*AV_xPksk!C=MaaXXL0>_fZF?|EX+HRr zk#s`4Biy=*+5$cl8LD*8o_N`Q9`+a``;YzxX|GpY4NNac2MDlclxiK#?FFUl%(sWt zb+T_@w7JnQSy(;4Xo;^S_&CBq3ekiA`JT@He|=Bt;A2Bv03GJwX<#<+E9@ncZ&D5@ z18ZM45gW4S3qEE!vH>ulTRuPu*>A1wW}fqb#a?2#?F^QIf4T(pr}ONVm~!FhY&TEl z;5(M57{N`QW7T?*TfPr|Doy*42&%&C*7_v#GI+t-fgWpAQNt>mHn)?ak@e6#qtLudaXZ9aEDlt`DZuW2_)LxgtXf98v~MG(#F+&z@)ky-C+@%cOjr{sFSBK3%}h7~ z=Er}IRxe4v4A4&{UKL&|OscuD>zS?qeitHoy2; z8UDU^x#;iYc0&IJDBsM)8T>M%YoUH&J#OQrYS4xDJ_IoHDQ^!PI!?y6H?DrMX%>nz z=X|ebfvq!XdfYY~^%@SaJ-6Yzwr4i-Q3=Elg zAQI+EfSPFj9B51v55<3B8ep4WPfgAVp%}B#)0HgX*WJTdS1LSyatWHIe`}Dz8^ow= z!O*JQ6;$t5!SOOx{PzCSgdg95(xhBq?Z8~TfgZ#S>;c*TyZLn83B=zcnZGN|ImI)8 zI9@GH^&3Yezent*)*0WM40C;J>p5`6FgFOGz@E743P2Wnvz%&%6#-UOm=*4)U0bmw z_T}`&8&Hf(w25a&8eH#)*MVud#Qv>$-&H1hj@RCA0OwSw%1_h$Frxc)#tS&spXVkz z6W=_7-4>~*eRF~+T#iXS)`2iJ3_M>~g3m95f2Ihm4EkTYBFK+PD>etOH*Meopejr| z!gI!y{-}>9hh6mVFhebhjbBdw-p|UbZnL8>|FJ>2JXMcbI@!QRFs?x=d&^Eq7tobk zRe(>0a--#&MBC&5+Oqk^yT1n6M@qa=VFMofkwUYWIalNz*sbQvML@OfJ6N$HZ5vUd z-nc^fM^%1wPL}?&F4{JQF<&?W=NPxDU*A#VCG?$-V(^RI5cZAoroC-TnAlNJpBWgL z*1&ipbha~~brn}@eEvNfuciYaL|b5DF>tZRUER0*8%zr?MI^T(U!fOjXe@HH{Lt}6 z>1%vfgg$>J>RJr|QkJ?4G?N){^DEM+sk0|W^H+QCn!pT-jhM2@0t#!+FQfkZ8ON6* z>nSff>&NU4=nG!^odnBZWbO~+37mob!$%Z^s=aPK#49K@XVM$qSZayU-T28bb#7*( znJm`Vi<#zCXLpOj4O;H@0mYwr6mIbDT1yh#lb0C7rT)iVp3*u93ZW9k2qD$|@*@8( z{QcuEwZ^ZaRc_EFHl5b+{t(k~yaQ8zuO!ivoXnUH-+;9cr|p%TG}-wBYU(U@#(nVt z&p{XJinEHG>K6z1T=HU0A;~>Lm(rkqUUT-t6z4|*CUoC^2nA$O7VuuEdOsSKRj7&~ zGC$3#j6V5onT7&t}YD2rGq^{eUw_YI15=`*DFy4&m^ zm~m12msTM;QwjRSC~4B!BTlCiBh@S#o;x+KJlw}%WoKx!cLpMcZ;kte_H~Q(JqYmp z4=Y;_7EyxKi#$%A5l3{mRi2_r!e}8ECK#=~nib>r&6lN*shD6ZUfevZV(U0o&qs2J zdD|}0Q)eB|$;%C6lR>N1!2Ja*hftnO$>mlY=`s|eCIN>ZYngZ0b0Elp#$InmeKI}esV9(!ML#MJSRDdDQMviPQ$Y;dk6Sn9Q{rU#wjr6 zbp|vD9>IwVpWYtO9o^cYbRt-yS{Y`*E$F^>C^Jdx9yI=4#9PX-GcU;zRTaI^^Qz8guG+ZK51ah}7^IK@n^9sLWM<1j6s_ME>Fq(?+6 zl4w>mXJb^%b`<#+iAo5HM`Nu%K`MfzjrH61Y#{gwPm z(MWu$4rmwPxb5=~{xmC$CGEF9CE*0V6k$;5N>{@^lfYohxGhvks#KkWu4~q?@TufjFee7ry96Oa^9X zh#i03?9BOEVNLui``?hyfoYL#8dnyo+}$d?I30MY=zm@Cv8#S(9-qB7E`iUx=cB=b!5i$l9X&FYHi-^Mspd0>mh3se(S=8ZVd)*g6{gMQR@J*iOd zSy47^a4~%j|K3jnxre~2n>1vDRvJ@v@l~N=MlAj#VV?X;q0)BtxFr?P;UrJFutcff znE4U1ca74g^3l0al-3b;jHgFt#jIX3l@5LTA})f)UvYHN*4U|&JVF|qzDKJ(f1eOP z*I9377GL#Y+xLhhYP#PvQRk1_Vjk*Yzd1spCE*FFT#m>!vq^g9oWhmczc@TSR}54f z`wWt^D-^tR##<-2GK=@A0p@o5Kg=yGF7I!*VRxmNAFOVLFci_bB1)I9@w>VDR;b+#M% z87<@AlP26sOF3;wzUtk_wQGH;I0`3e`rNRA<70C`^ zF~u>iheroXk%m#LYiF|?(x}69Ltv;Vn|$6uWd@mu!M@^S-hydhZgW3W>ZtoAt9gMp ztb|iw{uwQV;lMa8~3 z2pXyb80yQ0X1=BiLaDGcb3M&MoPA^t0T<-U(xeNDvSEGq+LScfjj5T&&R8KepUCuO&Q6Z^9)lDsSIt zhT+}4Si@WIG{u$z*?&tsoWM=1q1`0JIw?{AJ>trFa*TatW}(|lq?M;NWS!t$@nm4( zewoR0SN>Mjj03zqYZoYI+q(}-{`&u2j1_%c-P`ls)!JF>cX=4*)-jk{_1{`%b@`a9 zFu97tEMgVw?=W=Bn4Z`Ih;B;1i>yySjj~r0jYsZlAbD@(Z@y%XJm9=kqd`Pf9?>f& z{C4+)VymZpAnb@@)*0B0pNdCF$n{EiEBu@+-bh z6;2~r$?aR1ug~GfZDB=_dn;=oa7TvF4rZCpO?D`d48A2%v`Ugzv_JUXTEFI+u71>7 z$a-14f@(O$_V8@L80OYt&d+h&>PsDjD^FRMCe67RFFpizlvI$QizGLS-B(H-*34or z*|!LK4SHGO<_4cboi>%M6FC3G|NbJ z8b3HT4Xs|xD{u#2iVv?<0hN0_@8%fmf--Uyc&2tf2`iS{v|^6>%l;@jt~i|`N5Y!; zlv`8kWUSd#SGpnBKF-_f#(zuKi^%VRlB!fo`I{BC^3x`tHXqZK4gdc&rHCB{Fr}Ay>o``;C0{*kn{j-Nmkl&$dlD9q4h?p_& z;_lwTKPf-dU!~i_*|{R_O`8&2ckL_l_yZ2ZzvHi-DtWlrIPv7^UHg7{GR=BP4?8|2j2S0=3P&ww3gb;W1CkX~w0J-+ZDfQ?ld{wD3*6M+%%jASq z*`@=t`tFw<^6l}&ODRk-MVn3R37!MK$`W)IAS}wl>vH1AphDcN%PH4KF^n6{Wk^l` z{S&+fBzdkAUl(M*5)asTt!R)V0n1qBFkF{L;QdYXmTo;DI+r6~i9ERJV3@yK@QcMk zO4Ope^xSNOvDA*i~;8-=Jq(1y~`D<^HLkU#m`7O1#z8ZQNfEp1? zC7Wvp2aXqmFD7AfVb&L;k9;rr`A>0vS2g1+n2%;EWdzdT#FO21=p9u^+r@arI*s4r z6PeC4X=WpHlITNRafEl{W+2VWoTV(ykmlR=5QVhTBLzMNInA*86 zkzYjkg}}+kRw@EF!nu|GSMnsO7d)bLUXhB#u1pX0=c6B=*y+CgA=|4CvUK8tTfMYS zJWcNT-XMK|&FHivt!459%gc;xK;#Qr87Yx|QCR9O0-Z%AX8#dD7=VbL=Qc&94b7RA8GxG%9QlKaFB4@8*U(yPtA1 zJ2gO-48u%LiwUs`%(j~PYL{BaRoF7e5}T#pCe~_@Zi&2qi){f0JnGMiv<{1$@lLVi zUxS=K+451$m%n;bDwzK|Xh)&_ZuSUjh^cbkGV^_T#+mi1R%*{mfM>mV#lwkvzA4b8 zYma7_FDlUHyWZCoJijW<(S45zNI^NJh`ILf!=XRbZ`t@J#0`{bgkW&C1aQkel#)SB zRUc$UH%-%;-9|H9DIDE81v+D%ml#^_CL3n>#80cmke(TBIJH_*p2VH!~l3gUWh@JBF-KLhi zKjZ>s?$zf!DP0+b#6g^!OI)ucn!09g;!jekNaufeqcAV$L&|OE8krand56I8I|DP1 z&rdj`-gJhS>QqCO2bm=)&4cXkZD}d^h7`-QqItp%S7}U7|+jdtxip zlemwhiye4(3mx3KEMHOaDPOo;7d^Tr6B-z`Sc7&A2Xg{0uSf8Z^=a3)?vmYbhFr(^ zue&*9!os=j2Z|Ss?|SDGAy+C1cJL3Ct>x#$@>wx8Xz`cKH|fuqD%x#>+T8uDUDaq- zs%0$syRhNZt_%)eLq0+v^hE<(9KIS(FHs@{1Rqc|?n{>sf-!8~!pA=Md9BXB%AOIn zMp564kD}vZk!&u-ojts~_rO6=OO1B@Ma!^)&)EYeI)Y94xK_nW)+5UlXr{Ond?pxe zbT75#9vcwDeLg-`vRY3Ogg(nDhSA!ocspDB4M9Ws9}L4x*&|@HjE7e`1h8;yEmfas z6n(!SD_k4(IatRxl?mHVjlW!4~K<1Ybtg;d1Q3mFsHt|BLk1a z4N^T#AwQvt5QMe$8VQ|Zj%gwYwsxp6DNag!BS@!XctPk|O0v>$EHnRSrE|@|&srMx zI{d`KiY3-ClzzqN*UhNQLt{!Zd+iCHC9mgQU`kAIqgH?cZ#_lw2_1(u>WKB~2xMLL zL};}9{2b}kHNlf+E|6gDP^RLIl$#(_>2eBK*Z~Q*6id+(AbT&F(3*dl7^^NO`fTV2 zeGapZpqYC3Zg!Pp4v7PD(3lqw<=`1K9(lb?M6v?MgFjRSPc+gZ6sgl!16JOYD#t46qNcvsoBfb2cVDyV z7jl3*A4GQtAK#zz&xf1pp{5*48!-C{)8LmxjHLLupJ0PtP=bc#jR+9w{=Ca?z&E{aj|mpMnhhVhw1Az z5v~z&YJf~8Usa$a-^Eupb^D>VbPs|t>xt(f5`qt-{@k0Q&%kNWOm2MC@rhC{IfT~$ zK|yT~^QJrqYUXRSEZ@st7>G_|M{sV+G z#Vz*39l@zC)$s|K=VpKwx$wV z|04vgd3!L0F}kz!G0D>8B$L6Yz+}T6f!mySx7;#@bL-7`y@And)Rw(OHtVUeCwFkQ!RRo}i2#;Bpx2L} zpcl#xhw6Po*piH%T^_95YG)>Vfw1sIe#!LMst<(GJ!j;>ARRPIWvX2vAaH|A2e$*| zmR3V{ufSK?nB(3byvY6=D(NdvM1a zpp+S&K=Z}5aiGr8eDo+Pt4p|b`n|CX@%|3yo2{94J_T1gJY(Dr!@mM}lvCU>B|~6w zkzPc&|5H*lv0(If^K%35}Ad+*8+jKKXC8b__nTK7A$cJE(KHov9yP}eoMIpQJLB_l{jmK&zN^DMaGU# z%oB0BjwcWnH{Zpud(=gxyIyl-MZuqaqV)|25T0LXz zx+TJJq4cZg>n2t`$&n6Cb_W`@mdfup-kK(XD2N&~&a%zL8F#g#@uv7vKT0I1_&YB9 zwoKQ|V9Kj2VxVCl8R99Q50iKk1V; zKrHv4N@|k#`&n?8vzEPK7Z}DeXtQ|mNp=GV3ms@&v@Nfrd*3tIBGQ#WFdBfjDul?rLTA;`-Iv4BI0#@vNoI&)x_(*oTWc%N7m z0&VpAd4K*ToQu)n-usxcFAI`+Z3Fn8ipf}6Kg8e)l?M`rU(6HxUpHsqMzuhS+#8QQN6*DNe$N$dzymomLlr@sI${*PnBq5~p3b{|k)n2|%79x@!;j`&A3Pmn*O!QMU*uex)XS`;X@BsVgKeikmNt00<( zs$P;XX|u1INNnt>Ex#;1=XKW%!MUwge#x7c;QrRcp4W;hm41mZ7N>#2_Xg#J9p-7j z#60chwH2?R@xt41XOM!;=b5WcJRR&Ozs-%(pH)#pRR4S8OOaHDh5ktwkQPg2rmMJU zNx6|rN-r)a`;13}jS`h#Gu%-;5`M+BO2(k)aJ!zZ`%z($rqEUkw(M5$V-D7(9=_=a z<|}Dt>ZA11u=bkZt5e_-mN&x5K>mj4O>8-Qy9vAN!vKqdSDG3Xg)~CYidj#ysiI_> zJG9k`3|5DNYMZ*XB(xgXBB9=FtD*#Bo&5Z!9cohByS=Izg(!)gpWoJ#8tr)>USoVu zCE?O~9xUG|!hgA|tTr}MhZXhS@{iNx8=-fjI258EQZ)_Sb)8%8Yihq4+j$x~i7DSII3Ft# zIWLJrN|o_h^r*w$3?Ss;$A%xc)#c?je;mjq4CwxzG@z$ZlX%e7=)gNq1NcD)Bexvt zAH}1$z7g!Ox^t2ZbR4*M;fnu`fV>#y4gByWp`v?Jx!t7<{LE%qPpJ^9=g8J$ywQp9 zrwK0=^b6h8mZ*Pa=G$VKUx=OHdj_Mcy2q93?h-AD6qb&j1T8FaEe~I}Gz+N65FT+{ zJmmDNAxntNqw2Ypq49^xj-`suW~w>XhJnGnUA?wG5zLA1kmmL*|f`^IQkGy23 ziBLv-nWXXtXyFYzTCfrJB!gFs6fO;`dOmU=wSwu5RPyP}z|e(_chq=~UwH6yom`aZ*1j_3-@!v;p1 zliWgUCB--&q^s1nqCT5@K4wUw6NXvAK*`iD0Z7(GOINufv%fS9JZmgkVUP< zCrD!wL{5~sQ|YlFc6XYX0iBAO%}#>}^4VKLRHLqF!}d9z-=2a)l(UAFza86Zxx~)y zw+YilSK!}X^c^kTVhW>diU`!D>el)s}A+*yY=+lVssK9UNwm%vY?6U;GNw3=kwJ z<@&SZ9#ErIVIO&9xzT8q_^#oVpQv3S~g0K4h*Vjm-=1UNFDhu zzN%V=$$BIz!mZH@~UUuol+n7g(a-`@@6m-Dzi z4s}5MVf^thC-5}?4)LQG`V5dawW72fO{R7MxlDhb;+8}w4|~44DJBJ0*p2yxyT^+3DZL1TPj&DQ6x`ZGG0!hIu?=+2K4|p~kT?c-fU#<6|NF{FuU*4zw zxTWxR>d?;OVtR8CL=zC3pI-9#Oj|+6!bCGi-5(f#cxDaL>q_(%==_k41deP9&w7gh z?dh`DyP%*~NoAfB)M^kM7*S-fIA&U*S_%{nzUUzFq=zbOQTH+taJ8kQ`vBX7!++So zDM7D`QNBF)Oe0HFgy5+&H;lDef3-~bSz)7VME-|3W3ER*Aq^i5&0wlw3-&NQ^oH@f z03K^-eyc7*!jK?5$V!sYkMtKvHSmr4qumEb-zKr!09)vzef*)(GTwfGtbuCc zmEHtTLM6|6YE4b^d@sMYlj0|QcE+olYOnol|EunoPr)(I`0tpPE#RK;OVFN9^T$-G zDoa1Lx+EQ;_v{WM`hpIH%fJm3^lJtjVP!PyU##1T(XJqeT5qatOQNYvBzdu@tCW*| zR9Uhd2swAETo(dum?I^8Mcmi?krUMf_U$Y4`Mbu?LJuXjVEpWBkd7yj^%&UAlQt6e ziRELiF6jcUcuo{Vnd_BUbt_TMYa9N3;K0-gB)C}8iF8NP1b_AVh_F|@uS%Mwuw*nb zf3ZV8t;cR45HOrsW0HBNaUEHvkkw%c9S8k`*Q2 z=YXQu6*|m&=}yP~G5?pm{4?Al%BhxSLS@==12k%>!+WWp^(a@9%UzSL43aR&NIoV& zFotMr)HDX#9&h+|lyjUSpWmYlc%?${L$B*aH4kN)G>aFb5ujv;tfk5hMznlgZ29e@ zFYK?cQuK;y4r&lhwjMwiJGIF4OQ1@%6=+a0S<#&d9D0IzG-bOKeiMq$63|`f@-c>b zw-Yt!Fc8s5Yw(YGr+`rL`t>QY=|l+%ze#l+WKSd6f*+M*@oCS=8oOJ;9hVSG3q;LD zNy~+ZKYmH8;0301E-z9~O{wWS+#{rKWqEQ0>7D}~pw1qkPK_BtW{`k)!-dkvFzy(w z7t=Up;uSL^>6%?sWtQ_kFvdl1hF`e;_exU$)m0Qm zIdcH!mc7O?=Cn??n-wu#POJH84u{#DHJ4#j|EoYR6;>MD`q4dH+Oj^J z$Z$in_%zRw>T_4B{*}AT^eX`D6mPq~rP_-g2hP#`W+CF@%<2T+gij}^<7kV*o)=XA z9mWE!JAuhh*#hWP+^S2n|1EYTOE&iI&B523SgkQk$Rok~;@v}Cgy|Na6leE9nf=Ke z6Dx7E%AL^9O)xiKiHa`|B&RTDQz7Am$PudGcgCM!OV1K@?`;^&UhFVQ9Up%G5>9;; zbTgQWqcc~X!aax7Y|iaaHI&i(HseTS1AEe`f}9leR=#>pY!bvcRRE?~6Y6C_KEYAY zf3mJ<`<>;E#i$mNE;zP-rmCOe2kY#M_~Q=ycxO5KwJ<{^hrZ{lc4}ycX%w?GzD6_K zmW*%n`#}7P+~YeYNbQh(DI`Mvo);OBLI{QgyA|Bb?rDp4w_oU(je|UWI_D<$VQp?L zyrL%7WlE<;m$lsTK--ESuLH5Cm+(nK3R3v}Ln{C||Hgov-R~CM3m5r9v$d>}8=7|D zZL%Vi{?yC$`MNuj;M@N+$roz+&Uy4ndQQth`^WpBVs52Nyw-I~DEd9i99$@&wU^Fa z6;{+z>C*h#EUvPxr}^^{cz03AjY-<_Bd$hD`3x`*^SF#4r3oDQMum&`+(V@#5$8Bn z_%)DYni=I2t*gw{o9a{&bNu-16KBBze9VsI$TIO_i_f_Cf*dKvqxIE!DzK)#KHTCF zp3D^ctzJ%ggH0EaR1-HJ-{_nf5aJ8wg>KfridS6tnuov>+64fxbs~J*f@2NIhoOy@ zp=3-vZPlJE`H6!|X;0f1d-UHB0KO!XZ|&=h*qZhd2*DUsv}U)vo;+`?s#o#NaN6w# z{cVciAAee2=yr88JtMv<1(n5r0N9t*Y?uSs`yXU7w>Nix2RTS)Sdke4cVXdHTx=Jj znx%{_Es_U;lYlv;Kc{YT37!Gn{mI&5N5$oB;)lhw8)*Rj*gCRqeUt>%+Hr66gTBku zn6ILH-8jKQsNv8(=-wrU){4DK|2H4AgVC>(P0OC;Y%=jhik^Z8G-S;QDdhg=zv8&J z-V8!X(L?^fhqC#|JwZD!ErJeeRBu=Jt0=5yg?z!nOG@xh6wnThsUvFtj&vu3St zQ+Wb8S^%`@botJ&pRGN{#=XjEe}ScDB7?10dVVyk=%m%L!i9L*_hDA7xTwM3Sw*Q6 zQf+T@JO%N`k2>IU8UdS3hisBj!&t~4dqGWb zR6jZV&+ZLGI=^=$51>~G=lf&L-_@i`9mfCNvc+(+$_@Km)1-9==HZ>lq2MtAZ4HME zoO`KG%Q~?Za01S`w*od}%l3dbgsi}a8+)3uzp0Y%I%p&9?k6(JC22*UGIop4CZCx# z?Oe?&Qc#So@CnZlFbHqf<-KRhu%u}|oo8Fk1kda3G+Z!Hu6vB{$>GXggBW=Q4k}NX zenR6O2zLd*b)?QL8C{U#L$U&yLzwyZoY!R5EEZ>FU<*`3#`ALay+!u3kM;oJ%t1l@ zaFZ}qP@x2Z7@^^8%vzGlqPcwp>~D{K=8oQllj*Y!REFvjMa2!9?*?6&ba6Fxl_cjI zt>@(8w$+J9uY?&OyL~z)3(sq8JDy&+j$){>TZCu-S{d@dOKH~kUY`g%BU)|sehBet zel(1&+yGLoG?=j)vqQWBWNq=TOAq}WS+lse_JVw>P_NfosiU>0M6Di9a+aZ3^hvRM zeAAl>UBif71)l{gI1{m*PjMGrQlvK{j`Bv|l;l32)P`;8pPs zw2uDNU7BbFck9v%;P^1m=B)n*w3&bR{|(xV&1g)47fl~0XzLfsVSEyN8%n$#16sCv zU4~U79`b{rcgkO^QT2Aj_;JzmK6g<(UZW)9jSj_Nuzy^Oc`fVqf?a4ZGc5(AnOhmB zjM}9Hja2usAl-fHFhFZARQvy;HIMITPb(_J8Ed#7JXTFP;P|&XZ$6=#vMN)1kTx9y znC`_e)8gF)tIRT~LSZx4AcS)+X_!2g=DZ zvfYEh-?|a)m$(MhqR7K8z2{~lJEeYfERvh zVrqB=zs)B#3p^+HTR}dsZT>+e|N_g^pT1uu5hC6ORL3ucPEBk%Uq<>PIl zyMYyQ9d<2yw#A0Gj##bRKt>VT<>&8L%7z3FIohbPKZeEv`QkCnKRonvzFQ>^E|qmJ zcBU|VXv@qXc~XWa8X&)M^W=M$J&gOFvVo{R$D8nFQ+kIkMkV}|{IckGPo58%-vy`n z45@IN5c2g}#@psr#e)9TUxxwp(C&NTF*)kP>V8jIf=E;Y{dMofdL|K2rx6}T-t7_9 zFC^$mNxhpIC*Vq53^MrQ-HqSL{E-|K_s-ssK5ba2g;sbEdJasj*i^Kf0vBubThRh) z^KhmEgtAj0WJ#d;t%2w4Bl@zlNY5~mB@Trf<49QR#9bBtek?@*oYqXoxK~*AFu>^- z^#>D*(1asE6kuNg;Kn73VWs0_<0Whpr9~8dKAilZ__s7`^O)K}@0PvpwSLDF3~$=- z@Ly~*85#{rW#eT{403uQ2Oy_AguF=A{PDYRe_4EdI|QcUQBfR%{8b~W6VIu(b9;xZ zoQtF3OZbgncOS4xCI{nX(XxEjH1UruZ3~U6O^ibI-_VWDF+fOvxhx;me3;D#Ov!OJ zI^AKXj4cekRHQV2TsMw0TCtX@>beResqSze4p)-aM0$gMcE zkSZA0z?8p$kPc<31_WH;MClO-1vE}^KYzUHFp?J??E48$GW+6^$NC3LH-}<6x(xgI8#bbZMv=~yu^Q}b+n(K$HHj zALz$J0>M!V9L8pV;}{38`Z`XXco7*rKaw~#Zg^O3@;us=JARDjye&M&IYvqjxptb- zTG$IPzG^rQhG*~@hp)&xc<;k(_f);(PYoB5kT=AmBSsoWM@&}*VbYD9a}t9mN=3M< zquA&xjMLWRA^$iy_*p4m$;5b{N$F0(RR?jn2j7Kod*{SMI7~vlPGm$HnR;jIWV*t( zk3@t~c!~OpA+q&#WUesYh*irZVpeWR7iBs~CVA)*o~a zZjBC6aTp57`_{-({Rg%FNETsxUbP0St9$RC_)kHtsYXe+PH>j{-+Oqt{5T_NzY+VB zm?o~hiQ7`1%ukKXr|D=XG{e%2G8Xp+AU}=+SqwuPx2cM53F= zsFuJVwqzCrd?wuYSm;o10wSAdXxS#!htG#u=OEiXGDQ`n^r|8R%;k>e9_k)hzin3Z zk(7<^Q2038>HuYpz`jt%n*T`=Aii>YZ6eD`Y%fyd2E^~=Wnhg$w@@QrQ}Mqs~GRpLVejvQQY$YQ0G-8kLZDdwlCn1);$0lCPN^Vh6&i zw;|ttBx<~#ekiZ~tHov{U&UfEGsN4T_Ds;?=mQO>ggyAg56m__&tyZrZ_Q6L57Rj ztz&uY4)1(f{c)eJ{dsC-`rCp*p>I2c{9)-rd~u2c8NAjpT)qliKGgR7Z!NG=WYz^W zUf;E)PW0Y_9k`rn(s*t8VZW@Q$@*d|YDIV`(eb!dGg@bWs%R#{o4^CCEe5w+5v+G< z_6-7?xgFT;5CnuJ%l25b+i*jnGs82fZ(EG#1>L#<8I()7$bqx>Irpy~6!0r*daV%C z3N-I1eylZJ%9aQ`H6oZ^)#}~#po-t{*myB;e|g8aCxWYL47gkj&nfLi`;Rm>C_FN& z7*QQGAk0g(k^$%Kw3@hm9Mz8*3;%?RH?-#eVActTMyEvy?rNp9?%8%m!55fxMg_C2 zI$Pl+tyLoQK^R6+!|m81&MIyh+7sz=r@O5;6~Awy&RfMyLG4Ng7r z`6V;lb-VcrCMt7_7@qY-gv&h8`FOd0J2tyu4|8{sK>bS<#3!|Lq93Wfs*r6St-9?Tr27x1+iIWQ#PtozPu;S`)*JUon)`)m+0X@1yqlfb)9MC zwBi!R(`_hevPKw)+p;RC{B-z&>MpTbOGDMOBGYco(m5kyJ4eLrp^a)G|}i7zA9nFWK*}!v%J1c%6}+ zZAy*_5XOhc4c*IqwH}^h=kZWc%YIfn=ZE+m-)XaQob(3nnOTa`>duRp@vQ@cb?cZ& z@;{T|PdLc65rt_iKA$}>dL2;K?=0t?|H$dAlx7V; zd1*cfmD*?K1%^e-Y1cWVL|sNv!+$0)l0nP4@n8v$HDBj?}!11f> zXaFyJC|T^_rCx5^*3Yl`_nR=1N~~G_!8fLaJI`=j*E=SgV|(|%tO!L2w*2^+E5v*+ zVERmx?oeUz&))qMGP5%gYbf)6#{V$08P^CamXf2{3ETPS{8m_=D$_XL#jdPBHfV8XKLkJC z=g>-<$`z#o0vEza1V3Jx+Oh91vq#+W7-n`NQs^Hs+v3Wy?H@7QqFMV{c{{y=16AF2 z-S0(;8Oe(9t|M*iehehrD3QCVM1nq=2tq3bv*z`h5 z0Z4YIVhUtC{_92Y3=0T~Oe?K@3LSOs_)`d$`wzW92pdNY29aI-e?es1;_Vds8-n0h z{>nd`euKa46U^AH|1XGaSL?sC?rL??j1EA87UDh(BSPErS;p(1vi0yaznuX>JdBQ5 zyN=e6%};eIEV(3o_##XK`l5#%R3Pb?={~55}xE!dKWq%(#dxWF*b?Xif;mlwWz_y-KCUrf5RT~h6}oaLwH|SX=lvX ziIbIITad$pr?fk)YXmLm@g7WG8~3tdZ7Ougpg@Z%VMUk7_ve%MVKU5zL?JqPB;+-n zYh3pkjJ{WGku$+Rb&BaO_a<(5nExmVWi_jC%!+rT^(C(O6nZZ!`Epei(6!o#X2dHT{)QCsm5Zti3d6VewsLx43v|yBHO)fXLuKey< zKc%MS0l8jit(2xM6Uh1Tb2N{K44Hq!Z`%doH0Z{S`&sQIoSj-D%MsJXcWBIux-RhS zez+QpVGj=Bz!d4Jx^1mx>lxHj*-c7YkIg*P5!sXN@=XKLCk4e4I~#C|*TP-M8+)(u zgbIVqR9~II@ffVNhwx zN9yK_hI+<|Ytaa9dAVrOd}>yZode`;OX*Vi?$;Kr25s)*X`M_h|E>m25XDeF$&mhT zCO_=qmcD^g78B~34)RRh7?`b!FViI{JDyw+(0>jEQ=$K#%f zGFe{KOUo%PzX?h{J0tpOR?Db6%Q=VH${2ZR6y6fnqM14Y;!d@;Ytbc2HvH^|r=ih?(mav5+Nw z9d6b^zyZbOmljbqss^IbGj4053U?3IqKa=Km<#2Lm-Y`X!#0Mk?pmU~z;V~q1M9ky zq-=0)3@f=F5B?vVy>~p;ZU6Y+L`BlzlogVs?7d4!k|ZmfLRLcdCJD(dGokF26Gy3n7NlyJ38+ z3{o@sbn-s+z2E)V+`IE(Gg-wADN7s1jX1B6Iz@i@QR+q;@-J4x!MbWhI>TKrUr*{>mZ>)ASA zuz*gL(NfAdkS?AUw|=WlKCN-Ft662UzRd#gNhRv-(_ znOc6)o8y6vT$WVVQA;|7{4Q zACvQUrh4a9b?TU7=@>?{+2&+en9e8t{$pou4iPIqCz_-$GI?BfKwK_UWz?%3Y=M|&6^aJcief84$bsYSr0(U0s^AQO&Zi2=Bii`NWnc%P#|Cw~d zao&viUOvct8k?N-M=&fZix1^Ljebj$`37`<+Dz?+H zp1B#0G+S9VbPIqCOq^nLY>ycV=bJ9qb^Kn83fpr_QPBz2?#E|uXyHs!f5lUF2Te4L z@f+l{PlIUepl;BHQ0?q~C_t+yw7lSk$a}t)jeuTbG=JJXOYqF8$xeuosC6`Qv_EJy zU2_=>^J;Frqb>0AaH5cn-nJaI0b0d^kYKi?R-UB6F~;BY-?2eYlQJ90=+W%I>%W<1 z7A|pvwRIwyw7@a_$HeM3?>Z<;DjC~7m+WEsC+%epR}~NGP9a-oxczeN+1@fcW`^JF z-%MctZepbO96}$R!<5AubvV0|RbXmRw+BTWV;V!Hh|$)=nnoIXtgQ)+d6i_@B+EyB zO>m!lpGS#y7$qCz;Yjx$$K(Pvg3oE!uvDw&*f|5W1;H+ALKfi;Vkh)f0J4QrAvJ4AJyM+zpKAfB9IH~LEjR0`*pA}V*G6~%2r?8H5<9W zoSq#nJ=i7%wUcAc%5no4E#H?<$mF-6Xt@kl-GDnNt~=6Q~$ty zQ|D&)bV&tf5e9aN`RvR^r>1%gM2cGtn4eBud@kZqzio=x{JuA^N&Qk-q;etFe)jA3 zAl-t|4i2Bj>Qh_cHh#nF>ke@dwl63e=67C~q&M9%oYr|vt)2F!P;y;t5BGEQCO4v^mvK*p^KLM@$u4)AvJ~4Te#AMT z@(k*G$U1uSMK#6j>-{r^lLHncy_#3^L2{@-T)O>~j$;%&;pdyd6D~$7Ta^N**JwUw zH1<{1!YHB%w>TH_VRDtLDz}9t{p>^r%geSe0x^P*&`|+%|D*}WKaTs4n((35$E(D~ zQ_{RWb<*$PpE|Z++gAgZY~C+>j@ofS?EA7C7~Da zCGC_rLg;FJqTha{aR9I`>ufaY-!x-}d_OZqY|+xfMeB{c5jqdiv4_@x%sd;4F;*<_f_o?r@Oazm2>;vStcYf(T|)?QCIhxeHM9iSGjm1j*=c#xabc-! zUWI>0mzJE<^AUN!zXcL%Q||s(qALw(-wZ$ihPY=xUv=ZxOx3tzN4J>L^3>p)JUO*M z`vdmc=puEC??{cj#NDt=s_r8kAlI^sf zQ^R|BMozs5pzVv4$9TJr=B&PSGf82gJP{0hA(JO<2bWw&B_8ts;@AM^V>JT8tkl#7 zX3ehpkplKVOipnuCwT7oZV@pnjS%EAop1s=C&hh{=dVGVoa%RU@ybht9Xfi(8AwGH zg1d|5l~pbALTiaj^10O48$}5P$9k@hZNo>$C*=6lw3coFtjr6J{@36L^6)3jIClgfYFqnHyKednkRVp$vmT>UIepn zec|qMy;vaf=y*Kg+^D!9ldFifiH&}U7^lO4NW>rU)8)kpz)BA0Ay_T3*WeP|m=|2I zk+FDY_qQeWZ$G;~U%oOjdqZr0;Eid2$e*X>!y88%i@;hDb?}t65#izL3b}_zZaekW z2neanNvnW&EASXQH4iYT0>nGZ_sIb2rDkBCu&Oq*J0~#&FDgd`K&%5`d6_{NNeHRA z;&`9jUx&zF&%@Dw^M`sT5%TZRDzNMJB1BpWhER=XxK}SMmm<5Av2!^9)VY{#58j=Ah=_SKUSJSTF07sJ(Lx}IJ}TLbi|nC4ffe?i z<&a1U%(Q&BoZ$cb)BEcu{Sd|d#4w5XEy_69Ur#r`wc`OVLmmPlc7Wr;5-hzFk2;M* z>V=+&$SjsUd|oaxU=DEJ5IeAes|v8ebmD`B9b~O862H5B#m!m#uV4P3FO}bT=7gIt zPe&y}Q7w7)yU74cumkz`ii?UIK-3=jgpqYk%bU2I10mNXlSA|rB5IShKlAc`d9)jm zlWQVm%?s{{3sQf2bkwak*9hL@*ATrLMlCmIQS566rhSs4#gkTLr|2){2`p7yFoI8F z+RP^i^VdfB=i~Tq-!nX|RMuY-HIRqQ$_i}ud4oXfnullM<~}gUgsNLT57o|k(cb;A zo%YMN$;k+2Hfsc1)&gF+3$i=T1H}C%_Eyp6h|x!Bu4%jtz&_?7e7Je| zO-w#^6|c0AtO??q2o;C5PX4l4Tm;>92yY!Bs-9*73i*6+@op?Xtmpm#tHEE8_Fcky_u|L27j+YrvMZgn-9% zW6-JCZgbdu0MKmH#7fVcqr727sO3N=I5Tu%!twd@3Rr+PXPNaD$$#a+nV)Y}IIDIR z{Rtr2Q5JhmRz-5;yaY^(?eL=2SGPb#QPv1}Fdte4Dzz-CIZYYLKd z5+^vw3sf{c=wPHTO?|Z&vRVaP=7mU`iO{ID`@)g1*?BrXq#AtYLQwZgo_lo`POX@P z9bmcr5?XX7yf}Jl&k0t~^(fY$Cu{sZe%dJouy*=E(ssyNr#Wc0pI{dgPOb}{^zsE! zX`mM7C#2E?25E&8z}b(X5v;mW^?;=xczgX|i0`f9v&3tnV2(TJXKcjery5fGeO-q{{S{W70Sk)t!5`I0(6hYu4uL7HpAv3Rya-1 zjkVU2*!ufv@}EE8t4EI-k7;(W9m9&etl;jf0ecoUf?*JLS23kB#XTF*9Dgvj%TVas zxMZ>Ua6g%`W+rR`csW}?vIczXXW>W|8Xjl*(iMs2q;LG#DiKl1t2aYDOhKIwNYi#y4kJn zw1+pXm>tY5LTLji(`b|F1K=QOaWLr{!GPO1zcH%EAIHI>S#aZK`CIKHVZfnIrj2kM zA|R~a9J?)8Zl2$%9v^7C;4M-wumPOIh+6B>A~yS!6oD1-D2iM=ttGYy?kgz`?n}2P z$lBfrb9m$S7ZO=02QxcNOXXYt_bPN`gLb%EB7kFS@cu_4Fn$DPJajn)Du-oH5n5Pp zm5UOiOW!WKO?r2Z#jl9Fx=u1s4*i;iwZq!lQz5sg@0gL~l)IeY6r zgNqAG6^CozV-}COSuxg-U9_|=`gqSeDvd4heP~iVi0y z364`#cmKFOxLL)GJlYCZ#|b>i(bbDe?p8N38K;K7o>t{FNujG&xdS-n#iwzDX3Xf+ zMJ#ea2c#8hL==EOzJ5@G!s3YuVXF0m4h|5*78t|6vMoEz>-D2+%=_K|LKOvr7STpD z)n0;BcE_=={rD(~3x^$R_dEBa~Pm4$BzsSGqMb8gSWE#rB#V-kxK88WMt7e$W+lv?X zjxW1qx8}c@+!;J@w1WJAcz81hIvpffo~09bgX}tf^H4EUbQ^Zt_mU8##TG=Q98R)1Vv zS)cT_tJ}cp#nKWOS3D`IZV!(`!k$$H_i85NkSKNEAqwf45t~<6Cr)|c^=VxhUc53v zU@ji(ikku-sMQ!`9Fqx?iNXntHcJw{qRfWCA~N5?lJQ$S|2S&F{J?9M!JYdwzPgx` zfv) z*aq4qqMyg|K;&z=xQryNMe#E3SMBdy#`;-SkR1!UZ7w{%hkWSE;r&Csa?918Et9y6s5^9=iLGu3+s z3s(t9kI^qBDh66T{>AFIxQ`kodeuHg@Jk#+o$A;z;{a`~tZEQ7KJ|*#xSc?+^|51s zOw8x>2!Tkwm9>U9ks-rU`H~%GXr92fo{dcr^|l(5?ltCV@%ZaP@9lm%tl?>_-UA{|e>!hEZIToKb*y ztWe0uI?dPn;7)|ht=W$wIeXD|C!+Mjh|0xOv%O(;hzIw=sU*SoD-rmjdr`^BTvMhuDhGiA+ zR5TSU&wEn$d&+nhF0&7tNa5QqGP!PWZZD1##y{<{Vvs`IBj{E+i+^um)m#ZE5Y&UpDtg>fifeu zCp%BO>O?vS4V;woRQOYO+OX>^*p5{=nK53TIrpVUPg(uyO!v5*oa&2Qg8GjnLVwRY zM9h%9omWRt>m4gv&DWWHJkfXiqUfi^_wkviUr-8*b$+*AWBx0365_7sr))Q`2|Mpcn3}pga(M( z`9*u7Qb(440dwC-qa(>ztZSTd!p=J{ODb&^l4ky-y0$@<6t7lbRXzE^D-d;xmaldJ z)FT~fw%A!(?PQF>hj-RJF`qwi^*nQ7Te!jOfFi%Ym8CHRj*|H0Jj{Z{H$E;!(7Y;f z99AP{sgvHylS zv+~itd=|b;xwvn}@Y=!H8>PPX3+?l=^7^;;S*-GiBD#>J-^1VfMM_dmh5vf_M^y?K zoctQk?BV;>H%n$J(F9h%JxbGbF0n(UjzLjiWcZR1jla;@OGS7);&a?sF3wYER3G?s zaolj8z?tx(mQZ@9Cr0vPUGA8y7%*}bNPV$@((#wd zU5Jn+I_1!E^IxBL+b)n~uOC&!S7Yf*7-fA#Hu^GWz>*e{7bgjitA#hpMQBn0o-7uD z3V+rS?TbPqSzFtfk`(LB#O1p^M2RELKt_=oR63^}x7(5Po+*DSD${3hyDLZhBw36} zI2S&CJ)LWZn=o?bjhmQIJnyx*&B9bEb;V6G%VqJSsum_?lsm8bH8J_8eKp6eap}xN z{AAz!Z~+@r?OY?~6VVsTOORW&bVc%2G4eLEx!^ammrpmv|GLN1rFxHwLTx-;j*1&T zHr8{O9N*bgd=!{b&DIzKGo38`{aKAqD4Y>B*_ozG$Mc|Gpz|cuTsVylzhe}IljkRN zEqsF-o<=Bu&Yj1_ctffFzhA|0_P~v1hZ1G{EtalXaT%el=lU$piupZ6DSfZg!Zd~u zIyT!k1g3kJOly%z+|*o;JIyY`dieWo6b>T%VV>P-lVWxiyWwcSZ15~f(oT1@X50_? zq}lcyZSngd$tq9r90Ux!ROkUHHn?vzT&WjIx-x@Xm0&G#0Y(*0Gn*Q=#s;IGj(qk0 zW;>9h{mvXpnE_jougXqIcGj`tV<7si-z|@hwY!Al8}jBtb-seX^>Bq}cpMaz^1alx z{SZ@JfJT74(9?;Kdqi7@`tF|J-NTra{R} zV0a%A{s0Sn@6XX+dPM(C-RZ9&>u<~aw_?=Es#XvCq2pB738>NV5VSIPrXdA#{G1J} zT{8DUK_TSFbu0t;$JHUOXCrreIh$7OxD{r_Ix?lgdOr`^4w|=L2$H>p1@lOJpS286 zY=(rg(H69|feX2W9?`*cMsU-d*VGy|)3YBv_Td~##&9|=*bOqS2txryB9^@-)Az=` zFq{Zl;HeyY4 ztPq4OuZ~6D%~>NCLd<+Gvms|(1!4_>VJ3-dS}l5qQmX&)GuuaT)+oIuIUqY4OX8dt@K!e;~pfJZ?(-sZ9+5D z$@=QTmHM+hu9;&oY@l?TClK=>hBi=ZN3VbrGrHG5Eb1)(KGv6D;dJx-sRo{Xg-yZ5OQursOQVE4NpGo8| zKNA>Etv~Oh`xyP(c=g{;s;m)y=vabuodY;P%mc_oZ?ksLG{EW1u?*p;@3YtCCdqG3 zb)(&!Ar*HZrDV22x*B+ts0BF9{Snmzg^jzLbAU^<>5{mcY@kpO=Wo7Z(mGGYhDgvL zh}l5&@8r7+|DXyLuvvbGl6>ome88(mHmqOgjLYxU$MR4JnyTW3XT0@1(yD2e z9adWk-@Oxq{Ny8Ei~)%|veq0}xvn0jSe+;*SwlAF*jn1t`;lS?z2us$u- zOC;0SjUT<&-(w*_it{)g)Eh4XN@Y_ThDg}WX@ta8Gk65b<)TrhYi#)Tu&!^kQ-AZj zQ(lt9_Kf2)`%Uuay|pT@5^r!f53d+uJMKIHrenl*ey0j{t(@hY7gBnP*s8`qPKvwQ zI{dh4(H07<5@BzO)Z+&vnK|&U-o_`It@0%<5hiDLbB6O~EFt`mYvG9yFj%Hg1jHd#Wl5ffxPf zS7rC+@bqRne$`-a2Zra*!CjYFD{eKJylS4)gq9ZqdBl0kF9U=W-jawD0pO zueguP%IK{r$E+~_=>5hwig6^RdgPEX>IGV%Ljs1W~9Frl=YV?{F50=P=q zhg8@RD{$&1X3iFvg`9;dcPCB0a9IyhM{goK$GZ^NXfsNc18ubeq~E5ntfgkDcHO#? z%6Jwt><^*oG&v>{R9g99zm>!dOW()}UG0VCR5-=l;RH`I)knVK?V03xD5T!nhrS$f z4(Lw!7jCs6!&qu78#r$&c|?UbaCD5IFK!9qbj5C-Z8`xfD1hdHQl#A`dbnzBO(K?L z%T8338-|v0PZBGY+yZB*n6kOhg=s9=KJZ?tlqp7sBLrWtYIqC{vu6vnJ_ z898oCQ9*y`i+uJAh$}^;@Ls}3-$X^H7Bf+(kbg-*>1%92HB%_ksX&CMVs2QMFDq!8 zY9_P#+W-Dx2`NJwuBY0DP$!MPH*z&m$*FYUr8ygbx-&PK0*zm(g1$d2v{fsUzhFPf z&mdbZO%CcdZfT8^p~P_A9!}fjrNsx+zV4R$Ju5V77#uX{xW7%X z26-UPz7#sYps%(Srx zjp*lo|VB(^V?FtsUF8w!>Qyw#a z7jiAjwHvVLrU={~tNl0tlMLK5p?m)_IR3YPcm|bY(Q|TcBZ%-(#1e|)?*$2We5xV+8zowXVUpY;bK;tp#t@9}K0<|d9yJ(tC9y0# zQ4V8EZAj%x%#=`Y)lcPB=}E?qSU6_-p>x^3v=7eMv;s)w_#%9RaS!JMV1M@_pxpTK5(jiCWs*|719_qZfz_=|Tv`lo^ z=|=1!^H3R`RnEIMdbhcP?YQ}@L8zq6)D(zP^mx0z9E(d5^ns-H6E4Mg$f6$grVU{?yfiX*v~%+)Eu(zwb~B>nd11SvAT$NO--j)o^Z#yBXh1-4JvkZF+kp z17nldZavcoyV64FX!zmaOWBBkCY-s~ze_xHLIyK-_hDHm&HN_JHLk#RK{Fn8s|#tV z$ANHhsvyZ7+LaDpRfhJU@j4GvC?Au)h#u@BGcX0v?jAs{SxzaIoEbR1Q?X#vY}0U$puu@pr~R5(}K*W*;=bV zmqr`X!A{_O>}WAHJ9W*OQ%XUd+x$?Jw(Htb8QfL~?LsO~sly0!C+IgxZnw3IBjoam zLF4OgkD}k3L3Jc>0iywRdJ89OsSP51lD3VqfrJEb{n><8Cy=abjUnnQ4{7FogVVuT z9^CjZFI$wkt3xU9bbtB&Bl#tG?&L2U3M_PX_tj`c#hEOY;s!f3k~#b8a~85%hHfC& zyW?yK&2&JMYAl$By1XQQt};^5XI;4b=w!=+O~%j9^8{=4CL;BI{dVOgSD(XpP&kcr z@zNk?(E(me!wwSjT;>`dPxmrG%qyqSY2Ss;kprymgUnAw>leNJ414)z{}VRZzG|y`4#S>x)4`O(XwhZe;^$ z{ZX@DICG*U&{WSE<31==r7{g+`}nf0`*GDI!-dYKr#s$7{_=`y*<%`{#<}Ys2>Jfh zEkcXYG- zTxsMGS7tZb&Kol=ZJbnr=r_7gAek(P8f}69A1JTHisLG(kE2IJP6os^?o^XvZMzTvrfD?!iX9CeX0F@ zZv5R+LdQoBUjTne6W0#P2e?-&T@cT0#jo#?doKkZRmn{=!tB&3O*vPkCZ5(%yX}u? zhg*W<9#r2W$%qSER@5ni9QMF&ESEO4zHf2DYjCIeG}cVMHY^8gYjD6${_5=BXOgg{ zXZ&MkbIJwU8~n9}?;R@gbl7 zTS69dN7KPZmRC^qoqczKaR;f?j}%gJFl}F%AWpObODKb(y;1*P!d%X21GrpU_u_p~ zAzT-&`%{?v0VzAkad4GCwwZX)o370sA>FSLm7%JeLoJ-+Y6#%GZ}!!E;TlwtKgU8f z7LujczVKqYaON47&A@qV#CNQ`$VlHs9mq!oA>~FW6iqfve+RLplj68d?QU#9gIPuM zDp+ts98X5)ptj^B#D(B5lxUZPenZKlU{^Z`DX9ms>?+JZ+_fLx*FlB2L{vft*!kj# zYcU#MtPBL(ej$Ebf8nPFCz>zra>|#4Y7E`W2z|GFp8Blz1oHCF)-=K(<%f|{1rZm-|RlMC^}d-`BpEx7V* z7klzEu{uG)u^fhrzkMx+*CY2>GhBCPd>y7wiwO(joNY%1XJ+)uL`?VTg)%$AWC>=B;A1nGssyRl&P_;Z*yJ z?%|B<8z zg|miwPF88!^pv+67;?$I*O0(t{**9;j?wB5{nRy4@AfBw4uUGJky$V1&GYiTC01M( zA(xxx8P99Otk~+m#9zt6V#!yLo}%FvsQa>zM~E*Ak|$rw^v9@Ua_0~Ix75!O|1B*o zIV1HO>SH=y&>y=H2)7Hw8J{_RNtqbBquCD4&gX z+o7hf1NALr!0IN?GZcn$^yX z4woOUJJ$1L3O^30S>|Sg7+lpGSr(lH_+$HrCH;LFMO}mwg-zpy#)ly?3Q(++C&O`s z`HRbWNEHmSx=*IRT02-C&1!KCn76ZqYp~*zpKLm}m%cW#sdp2;A*w^KaNN}saZ#CQ zS@u8G-p~;e8X12w`k(BU}B=R;vtmWHJRgVMh@sE@TAn{=tJ= z0Bf@UvDB?`1tpD>8=zLNveCGCnSSzXr1^Wc4>;3Xk(7Y?&4C&NA(`qVeEe_ddbgkl~UC_HmBUBnqwxaoR_DqLhh(Z_VN z(w85`VZznZc^0G-^+pl>MH5+4zKs3I1u-i89ha*g*dCT#*gr!)>6^`1x1HDQ@bJTI zE*54`<|l;@Y?kcd9)S6NPrMJy_HCdhkB(toN=bD;5AROcvnp(*P4-85GR-Kcnuz68 z601D#+8vuH-32;jU-=h)OYB!k;c-&pI3=oT6Q9K#r=zLE-dG0#6^- zQ4qD-l^&XKnGa34$S^5l!i7s$+;7(4c?%1Jn`xdrjz`r3l2wic%#svux)wmr<7r|6 zVF*K245R|htX&oFY@RQV)mA3Mq+8ckWzh~;h2_U8t&h%=8RdRmQ9o*VR~NHUMS3`= zS@p)}NSg4XIuY^a()ai3)6rfl$Uvr6%Cr91=2WD}1sS*nQX!IGrf3dZwrgJ^C4G}Y z74;c1c^6Ob;RX^mt6NZt@X5O=4mWAri`HTGHNcGX*KIO)bI5jx9juTkQ5a7&+qxXF zD$Y4>+~=L%V)@fhF-v)?ZLlN<2~Ag{mm^H(kg2rIPAY#jFTw!raEmSzh;>a#nJmjO zRo0R!?EA8W+cyO~RL6>+SwL2!#9C}yPQ?uff6J?){ZMIX9)MO&rTwU2H$Xc=-wP2g zxCZbuyG_?FTre%c-pq4N@))Z$>Lz)V6S+D zezQ!}i!4~((T2LXwJLIVjNYzJ<3oh%tdB_+r`gfb>B|Nw!*C-ai^h0Hl%M*zP3#vJ zbxvw34|{Jfe3>Wy8|0H1UEul_!Q*_fbckV0oI35?tuazZs#hgPmvb@Oqq24XM}#X6 zBHY2%e-h!sdonf|Tm=!Xs#&A_-4*pB5;l!+j!#Xu8XOh_HyWmMBQ@MePwy+i)yFfW zAtp>*T(ZPzJxo=x>8dH*LZDW6_w^h!^g*1Bdd`=H?&>@z*YPgJ*h1lwm`4?+`CReS zVTvXXlBvuPD=kS{VOnmss*E3ZuSHQltIBs_W#dY%ZoA{7S0ui1#X6*~r|j%)i+CuuAH=ZkJ|< z2d7HlO&T+rQA(aNnVLD|0@?dR6R!IpkV3ltG~ps8E+s;z;WO&j)0fFQ&SJID&-O;V zsmZkj_^y5>hxo`s>iuqtw$n`6r}IMY>w``QvTV0ZQyUJ%j;s+Xzh3iC;V;4A_2ILWoTjUZr`_)MU{l-`i;8cBy zA;B%vU)8l9KL5Oip7C59E`DNjw+(3KQo8}!sTF|+@BcxBYn16vPW-*<8; z+N@;eTXka9#A^t#|cIev1NX%k7l*0g zMJ<=EqxJGm(=~EiCG(MUcJUco7;F85!zz{DnWMt~GI%45Pkn3gx-Tx1?TvTi#Y7)q zG81G!ENiDJe(88WN&m(l&S|qJ5qwTF6^i!_KPs*V*lafz%hnnwmSI_5b$|^Qvz+n3 zDy?um2|ra3$5#73sYG|UozEFhqFkGDw>s;g2?7niDBH>x+T_JGH+;1NP5mA=xSWz$ zl~j>Za;W_EgS%_Crg)|lnQIHGXWmSDbE%nEsF6%0XxO62R7Oqnt^=M6J<52ORQfpg zF!miiapCd5e7I(a57(97v{p)eSfhAYsjHg#Ie*;|BeXKwt#xTn2ff8KNBH5<49{)5 z`)I7SJe*B4%PZOUn3H~X+s4Xm2x{T<+*8x+M*1kUyxYF^uH>Gh+?EoUqp}hLlv$yA$$T{0|tpx>&@m$PDhe z?2|m6$Drb>kD{9#(j!5J2c}e|8B;`a1cM~{+k(>3Fxzk z8JF#MGw!8BGj74R?7qv9asX?{@A%z}d+#?hZqnb)xKF^0>$?l{?C&+x+BL$zNY*=J zSsoSNc+M;s|5(PjMfk=8quyct)l>OirVE#9#tV`x&KR7nQuMfhE-eiE99+K;U9u=V zvzpCSrCX~wap}2*@t87$j9s*e$KyqfcXFErVRMlMMz#Kt*>hAuyt`_2RGYr*`KfD5 z=c-z@RG&ytoXNxhOB|~m}P4ig%WMv#5>&WCxq$I5?ibQ*Ny^0`Ea*C5RqDS;TDg8as z`W$HsO|YC_cVGQ>&6Iq}&*VgLPxjAt1Z_?Rp;*LijTJ8L7`_haU1=b+pq zF@xeqPLiGa$3p8NGw1k7Jw}C3UymM}(Yco(*;_8?34PPpo52AxsZ*cEC`n5W@3A^N z(y~zdp4VAYSX0cV{`IZq%yFV?r}kN%_#e$1baOU6aeRxKJY9dJc>D5_dRnmdkQ#;P z*K7FgFn_a<_w{DZ!AN4}y{Fbe`7VNWHr~P;!%^;cv%lc|sl|1hPtDeJv!;1{USXf* ze0vqu@SPH;+v;v~XAv*%KAtD5nq?u8+Np|J=6B3nRw^|_{Z7Bs+~|af-Eya@OP+8I zC5U?6JraoNd2pv^Ql@y+%B!b`mTBn`%8%PE1}i6~d<|QTuVeaCRqL*@Y)ab%tCnz~ zU!d(&BWFMq9vT&VKb+&*c?oqn#ETnU5m)!87q?q`xX4KT%xLWHy85>uizD;_9H{5d zrbY$N1)$tK#b$;DKL(f#lZ`kRIs)mEEGd9JSa*m}UvW{}en2}WZvD)oW5jI88{P9P90@%FPUskeLI@HtfD67g6)l=)@wNj(|d zQe&N$TsE{RhmxwF9K`k|SYakZ-ubaq1anQEE^bvcj}Q>vo{BC{7&k7g(b*4RB|@*L zKPJZcRc19z6GlLIN=E3dmXAn8)I!wr5#9~zY3sN8d=QE*sWdAVPMA{&i*0$GR~}PL zWfOXRn;TmA9bv+o!H!Ae_z~Yp<=sDdnS6*>4c$R8?kdiYZGidoOLh~Hf-Rq_k^$sl@z_1ZlxF6 zLE}@tkUZa;!!3;~p!9YZc}FlO8(VftFIZ)QVjn+ux;5IyH70S8nmh<>y58JC8&5_< zCi^>1tqXp#cQkhc7ghaFP8+|RtKF1}-LI-{9+#8pm&`*GlByd=Z zn5qAfuziGv1OG0z%OT>&8{F0WtDg-_#}T;ITBGvp3rw!@4dD$fuVh+80^X!-W|QVj zXUuA~*=~9Ndh6F&&SweDWa~?LDO0oD(ue*@zC-_{m3>pE%Z$len_IfgG$)}H6uMs9 z>OrKwp+7V59jtUM}2`s^{Qm^D&f{69?>5*u)R{~XNGcOwgg|oZ-+oJjykzLezTp&b;p_d zM_%T^mF{T`wX(rB++hhJ@+mO}FbGnPQ?~6Cj`p(nt*M=EW{JEjn+E*91-PniAi!mzxio&`^bzWd3AS40RK?fR z-ZPSQ#87YT&|Z=#^xYFlKQ~Nx(kSpKqlrG5WEc;&(?;Thb?&pHWVE&J9OqJNRNgVa zp)*X)Fip>RJ$#zyZH#MLi0t!+VRAAI{EEafcL2;edSO!$fUnxo5~Je9?P2s81YF@k z{oNGwSwrtk?t85U+ewg{+`U&Gb{0%Q#rtsTHjEeggSw)Y1O6 z30lBWl4{|I@(PHCax{OzBZ+!wdjZ#x&<3aI^`M1-NAnt7TV~#oYT%J^5459@AWN#T z%NrrR6Fg8b7OYr#N%Zc(HH}_TIz7F+uEXA|77IZRPx@BrW#d?JsyMi? z+&d<;D1mluz2P*wo8NfL(5rd>UFmr;7M%vytWwI;eLeP{`Qs;7ISWGvI#^gvpp83R zR}4@=gF{SV^NaGie&gjh!`z#EIwv0R*`@?6E@obPRlOpomsKlp%Ou2QO`K$D*ucx0 z2+H(9kYDWZU>Z~L%5Qn&l1@SFxm zRCVqn;_6(nk)3E1-=7xTHX?D#8iBjH`9LuSo~Fg#B3r5b!^oCB#Xiw`cRuvb4MS3# z_ho*|qlQ^$JHu7(Ns)I*oUFMIj7(s37S+|(coYDp9?N3aeixP+_XSVU?tFn??W3)o% zixSr*1MZM~q}`cJQMrCM49Jw8Wfm&R8Wz#q!+zNoR}~+THmqM?`GZkZ(LNv>Y6|4 z_Mt|HsQd}08S@YBk>*W6)mh0b+`LUP7Qd1FT8#IUcGE|zjM;SWW(|r@%MVbIgCNQR z+fLKw=VaW*PUE+^+0Ohc=bQ@1CdtE>5Kq!+$iqE!*tQqDlX|MK{dTsg;(yjpqgX2?`M5gPic5+y}!RvJV`{`$gQ5pRIH zKfoL(^6{pi{uV-{ow#qRK=$O_coFtoH>G3}{*x?coI7adwmY8tpQwp_)68S$EeO;s z_?{l=3^O~AlSco+(K;sR?X+cQUB)BoPwf+ZB1dDj@AWAKEW6(~|J5s6bf-;U&+$y) z1mEW(W44Dw6k@L~xjYFEs`}QXSs6DZ*?x@DEI5%^Wgp1|Q3r}6m z2-YE@TK+0_4dt>yFRJsxaZ0DkzfT1cyhIp!Eab&o%MaSlk`SDaNV`jikLKspr@MZC z$1m&g_4P}a!;XI7^4#%TWu5;z%%xG8@M-DV6D3#u&Mmdz!Jn5SIh&(jRh^UT^WmFL zGqBh)n=}tMZZC?Pq!9F-Ue-VUVov~LoK(_zKRSt*@Z`6}i|TvT*Hxc?7fBU`ELtX% z3q!Kf-*GGa$7CgJ5K5`w*&HgEIX|Q4vIS#$Y4fK2T?p{cXXuQxh zo@+l1Ds5kSwcEtLd^vFl`c^acqCcVTAhtw&xp@k4Vr!rhCNU`CCxLnM{derW6<=bL z>!N>}l@@ka#XXFk-t6zfB-FQzp{r zI@DsJFGM+YMBpb2y*|TA^<#5=HIgjD&9970SuaT(=iXwKtnUb+T5P5*Byy|c%BA1#+$P20 z8u&@R%2$~5ZCJ4Pr(66TiIeh6pDyFfj|y&{X`bdK2pA>olqOJ%NUtz*g=P2=1n~rL1mC2B-VlTPAghv|Tuo1b z)~i_WfMGwK`Y%?NfbTJ?PjR#xW)Ii#KiO>!+kzi+ahX|*SA8`4KXzM%zuRps5WDTQ z-|e=yF}2G~fhQKwWMO=Jabz!fS7^k$Yx1QJvfik`YECNd>9S~(AeosQ7Bo9X6_J5G zul&IH`rB&C^XgsWi{fdV3L%kP_|G4-W)fuDUl22E&)j&LGqv_C*XStO@U7>V5d2ea zeM%xZq&!)*3(`BEjNE~%Ju*u$9yhDqb`L<7UAM*(ZEk{nI-iwK`%>#uAYNwwcz3(YTM+0t+w?Mcr5XU+Ez+=_Wr+k>cWk*q;IC+!8OZ6U|YJ5<|-9U`wglvDF^Kl*s-{&seUv!;ga zE6;@4R$js`12|HP)leG3imp2ug-jm( zPq=B9%#~46lJO+dgnY^C$F^Q8%skF^?`EX3Lx^H-A{S0v`>b)r#gwe0Vu-#$=It-I zjA4609p?bujYi9qWqOSp6OTXM=ZoOT8WC81ERxr|>A#xZ*B#^Ao5;p9eCE1G8a-OJ z;Y-$qGWLkKo!GobUMs40ZLQ-$JcX(DwP>+iy5P;>vj2~{w+xH2YZtIpe2@l3LXlLu zLApdrx}{4%KpLqbMM_$vq>=8BZl$}1?v9~{9_m};`@HXd_qX@A_x}0)^Ehn~i|Rk7xhlSp=$YKs<`OPnjJnVr`Bg7L zP|2Q?bIR3|d4${iG>*fvy;v|rFOk9S#9o(}qV3IKxjH%SrUW-hfKyOLLsnS+d83Z@ z$b!m-M6z>FQt?VP&{vBk*#wxa=3%20tds4^SI@9B$OG3@9Cs^SZm>yGa9G(!vWtFv zN2C;)7R~Q-2J5F}MQW%jJn82z6!g9w|Iu2u=OgSh$>FJs@-v2;e1y@)#9i8=)1Ns$ z3OeY0;HdseL*9!v*c0<}r7Z5bjoXuZZ9UXmO0ji-ay<}FbmIELwCJOR1=EX_hllwl z(?m} zNv!tR&83^ZlFpgn68zRE?*$y6FN=`~`w5aBd4yT`PRXSXw321bM%8P4`<*ow&H1J! z=~T+WPww0VR#jwZE=O{uFsIU^AV??0bF}dI=F%X7uMC&UQPY|@H>@}-YzQ0ua5y~l z?oXP57%7K|DYw1YpHB4wmU?-Kx+flgiJYZ#ch)?fGMGlME$BwBcoaSDJ?3ogZIM8)9CcisvAg@tEtAI??H(iAovoxwnZQ$X<)gnP6vK?DQJk zIG9F$bvLn&7!c|c+y5=Mn&DS)jNf=_%IG8SIt=U^u7A6DC9wBUD?#{qiqpj# zO@vZSXG(tJaA|gQ^7Rw?C}-VF=$IuJC)Q}!WFzQkKE?!HzlE==@_t7sNMfNQPjlHS z(sg{9*p4WX?s&fSeC{14R5QNv&NKHCBEgD48fD$mFC|ohlwEBFMC$DAScyuHVvQaA zRyFnR<5518L>rWpE8$-8j9Cmu zDVbjXJsm$6;9uCK#EqaDu=vP*My;r)?^!~GJKePn&Q&n41YnSp8o&r`O1Ib$ zOchC8%zDC>Wsv;7?QymO)n%hU(6LUu6-RBL{mkFO zTF`-u`A1lr0G!ZmsvvTgBi2cCDyCDs-yL3c87DO}{zTLwaKDW4SQ^ zcHl`T8f_-~D0x8FBvF^0j_}*zmvi;D4xh}3AD?L& z=^2bR&!}*_iGS4}Xfl~R4uYB}L|&bAG-qa%xVCId=9zW^f*Z7-_szKEQk?|3YF^o+ zzIPMftJ}HHg(^$X|BPmW`3Jsr(TsnRZDSocr-cQiG0rG$KJ*olk^}-JcJk`$Kf>^3 zRQ3TE`P(EE5^APp}aHhyIT34_2l~A@WKqitxHp`r-NKn^dsREy5U5EZT zjF(?9F3FGg?ksfM7QZv;R&Iq0j@swI-oGpaw$Ury%BEIBr5|plz!w5S@=z+1A>4=$R^Jv}(F$lc>t5@GA24AI+U$l+;Qfv} zecYqoXXr?~#jS-WdxOa5nLUT6(=UjlEjVCp*XQApI@L*019mT!< zR%t<;YhR*0xfLd1z3N|<7s673sihbF3?Vi7m_P0Cds z>8F+Vp7`5On~1p&I&llyC?0?N*nv)n`k5Iskl4c3Kfw&GfTvlLsXcVjq+uU;6v&5z z+x1rh)+fHl6>18#n7RF!Q$uI4U_V3m0g^j$ek^+zq%2DUQC0^nLEpJ@Vorq4Adq#1#j zC_)*^t}wFMj_Vi2Ue!}7Hb_oKMQLpw;3Ll}Gc_sq?|xb??@{R-8{nsP`gcFALK7IX zNIxy|R3`Bpa04!xSwGHMY2Tw(07I9w8syOHAcs~yL*~%hrb(F-IMX#c-mC4zpOy@2 zs(wT37m1&u!fwX<@6_hAfJ3N3v?;tWSMK$l@Ai+k)}r-#edZcAdd`W9b&ek%^vUu? z_wMs~hB@Sf4yqVTS#8r~$ZFQE_uTMp6Vjth=w8IXOt3m$j8R2AB*#{Z^bpq=OZ~?x ztaFv3a*CG&+^qld(@r4$wDjA@NlsN{D=MHQmZ+<`wlT}jqJFBVXEvdAG^+K{Cs8;+ z2O-j_Fp&lNom<_q;BwHW1c8pV(80QrtWD&0V}*acg4B8CANds&{!#V1oW*iL$kNJ0 z5Ejx$TXBQfj{C^nkxE(lXMOcIwWzZhKK$1J&07)wu8_O{rr>>}tr`zv+AD*T;i#?; zn~dOW;`4y*O*<#ARyPqQ5W^;Qj~Q%K5s>p@*s|}9j`ePOY=c{OY7|C(Bh6RS(%GU>WxzG-w ztepp>j!IhD3=f_lYK*}0`eo+7EU(eWXe)D@puLZkHo&F>b#8rrIB+o3W2rgr9UKkcu3y#5YAPAl@CE*9_zK=J6c1xFB3q|6*Z5WHqaiuH^~w1pUYxh`I#6y2A>dx8ZTtG90HleIfiM z-0O}-9^w5<{LT45?`{D)aXXud%S7p{Lm~}BRY1`y(y`teL-u>VyhHyPoqxs8q>CyX z>1JPC7}#HXlZ?vFI-Zw$lU}Lgy)F=eM`6V*x%^hG-U3k@mEbwWe`li}_sW4tb-@SiWcOOGPx2WMyufX7U$} zVNJ0>KNoMBw22&g#nxTOp{GaaxuE71TjRE?YZ}dH#D0$`<@aJ!u!#^UJjqWna>c=k z)B4a|W-4pG%u;VIv~7%>f0S>4Yl7{oM^=A*zmA(w(Zm)40$g;EUN<(UzkQd9;r6Sk z&o6WSvzZq5$4m=+FM(@{Yw_8Co9Iku_qd7P{%14oz;Muim}vN(GU%D=$^P@S0k8LVoW(u(yn*am*ionXj7sLJ_R&{8W;mgDtn}1Js%s zvc#t?=V|g@M>aAP)~=_^A*|fR4*1VYo=tfVZ9CqT^fnI^{MCs`AN`J)u3>6>+N7B; zYp2#e;UKiHwidB5%Ykn&=+p>G&;88wmc>Z4*eFs;>x!DpM;mP)(WHHv+3uTW7+sJU z>8R3Eyo>)1!7%dPTOI+95@Jjvp=p4D1`f>;Y-SS!o2ADTt1sVtOkE@z3e&i||8DF6 z#ezkzizk+C3C9}R;B>FXSO=ehqO`zff2MAGsIXf}&#O$0Imn=7dUQnlE1gUT&uM$} zfo|{}2K$00julG@bg{YD)b-o8KDL}Ou*l|JeO7?ocqVl={+V-M(FCVZv44kEw2hhrK=$B5o) zE6*pG5o6B2f{Jxn@ksY5R9K;`XY-Y9m!oRM2VV}IMq+|e6}HD_%l48C>DMeIl?whT!-dk<8KY-eRNYWfRY^4@BEaN z!uPwHfA~s>cPlfh2ch9Poc)C)K3e1;J&|6DkN&e_h7Z=d zDMi| zj(RVA%f}s<7u(s4`x;uvp3O(zg8p38jRUX^a1>Vkmze#%P&VmQ5r0d_^t%rnU&d^`|-SxN7;+tB#o=JM9tyl@l4m6cy)ly&i&@0!lr0#}~Rh`m}}rV4+(hi53KOpdVNd zc%@~$g=Q?p^mGc#?Q*H2eUb3>v27ori4IM=uP~cjmBnZ6wZhZk5(>G3W1RvNOq;BJ z4g{PYYt;DsB${*4?>*=TB01PcNPft>VwLErlks#(F?t#6Q;ECEJbYCN|6aoN`ZXW@ zq#ZVPpu_YPI7USW4VY+Ix!dYr%?(daLhd&|xdofWxFN#n&7&ARll=FZZzxQYK50~* zvHvcJTa1HFWM1WgkdYT9rDVAo91?JSvAU+`GFRD_%#9{Z-YjN-1D4)n_Wt8)yQCp9 z8uJJJoQUns?^Gzy8H-H9{C}|2VaXHbAi9W~#Bq9y?OQ%thrxfumBYrNxzV86~3eN)hPZPS&v$d z02!e=(u)^M#v1H&ccW}F2_g;gp4x7Q(T${&iph|!ype)+bG>KE7rDm=dc0c-dlx8e zqsTm8?x2|ZEuya2(qjftI???Z=kLU2MRKX;&VGyc4jMiCEmwTajU|3FM%H?9CwD@l z-zXlmaE*xW6)n^EQ2x|2pCybHy(Qdnv$lfq)`h{q4l@W3ZnMug$donq#IPzqL%?BB z(wK(Weq-{~vfnhs4%9_o&(kDYDC*lf*q7N@ja>7kNnTsDzG^Y|#p^&}7J~~{@3;vC zG8ICS-HT>%CYTF5HFWLUf6jN^s{4id?mSdsX$C=yUM#XM9Wtv-ogvFxTSfUN!<7QF zfz#5oNk*Vo9weYv=!GYf%!WK-+rgdUZcuVF-MZgxJ5v-IDXaQshUEm zAY;%^y5i7jrnmlB$h_DKW%gy(VlgN~JLvrx|FH$$IK4?nI_qZRE6ZQ*O_lf5ij%B} zhbzcUR3ghz)(o8wd4OthW6+bKIp7lizzQGSgg}1gakduBh7Iv>h7nYb5rc%yjDnG( zBVF$bM#x0a#Czi1AKPEWa4eD>|G7RNBZaKb6rZTp6+PMhlM{ApCb0_OJ-mu|ifN5%blqtm!9iy{o3r3gl0$hgZ60=)oYBdTIojn%H9=^i zI0c2secEsdu|+B62qI2y8`E<7KFguzlWd2v2^n?MzK1DdYiI_TrIaW(bo76*vSEh- z85ifM6p3Hi6R|{00}49In&7x(B&Q1H-nFTFjFFsF<^=f{9+c&!?!Q*V&``t)MXcUn z?En0N4FEyyY)UY3gw>m29;J@*@53bL-@Fq+Czu$f5AlxOF{Q!57V9|mzMZdv_na(o zD9yu;gP-lMm6z?Yc_zE+`|UsXKTamM9Im+5U%dm^B-a+?>Kf_9H zY2*_gOEP<5ibNVfYpKmV0z@3^1Ff^6;nv?P{*BWf7xgh`iMvj1<;dxE+j{c@r#002&cXgk|n;tEk3~O=n0*x zVDwBW)3f~~JqwCaFCEq$>3wy#7`2yrHGRu4IbL}8nI=r)4RJ7>jdU)q9VBxfk6dH* znezy$4%>7+U3Lm^@kl1iNMcv0CORWNSj_q>Wy~{79*=vz01cuwtuzw)N)Ftupb~J|$Se?4wBSc4AKn#XM4W)L4;sXLTVW!0Z3o39=;IFl-Y~p*Gj{VR z5$5t0Be|57WM4jwFC~`Lti@d_0JEsOW>7fya*!ukXVIxASQoId0WG6F;2G+>|DU*F zw{Oc`Xcns;k4>>inxd9;y->a9_7Lx@DE13>hHrQevDhC;DBCJg%H55j9?^x5zg zwWv?vZ?s3^C24-rzwqMk-vxhp9}Ddbp2-)VqlpRolV*ivSfe|?S!%!M#8DEovA#aQ ztG+J3ZhKojA9r+6w#X8)?@)5lP?49)?2&labm4(ug~USh_Q#N7skoc+M3GCW6O^6%ol=6DPk6YJ2DjD6pmli~Weh>-ZB+|c| zg_$ujR#|DkpPn!99JSYjZ4~c|Ub;3L^;KH+G|Jvioe>n=XMKB{FDz5kd%5wln5Fsp zIL!HWecs~yW!)&g-a^oYM+L;D?7$jfuh+@MWUa%^TtZlkYm|o2d8#wQnZx@5W?p~O z+7WrYHyK`;g^=Mi0)WF@4y%o#tdy}B)|2Gpo#G8j+ou{wgUdIqTtVyC!tJ|7mJ8l$ zgIjJIuqWLQf2FxFF2^R^*E-#kl)V*}-hXl3lpoOw)AH-hgh;L?3Ws@KTxDxt^D5jv53VTBs#m1C7|~ypVbk?a;EB(Mje|WM&a!-Itn>#O7~`8 z_E+f34;JBktPHGm&ai~o>WFOr=)<+!`oYQ7NPN63a7*!_)7ctYKWmwH_QHE^bbYE< z-J1M;9j&Kp^9>-Gc*)qw-%Su6La=FTkU69TG5xHcwFua5@tx>o2Ve*8Evf#`Wcr6aKxD>58Sgcj8Ws_G5PI3^0 zZAqBvK^ylQF*6}ukXvqj>)yWHX^A=Ct&26Ug54)?2XmH2nj8qcJ%1+$6d`tnYSvH( z3tBAPryuN2SDg#LSFd9rdZfMH*X&q?v0uI28pmL%EmCbwR4`)ub4~<*#&cy5I)0+8 z|KTaxQM$0eIP0}jH#!PIPD*8A^LlIcw!FVjzQEPTpwyq)5e>2p8cu^SGA`~~P~-tOO&w@C4%E=_o*>=e&FA;%W25F=CP{7bcRkvAq54 z#2_#pRr>ncWGkyz)RzDZ9M6Wt;aVs9eT)T~eT<2dxlnJBA9qLV#sk@uES$C?93ca5 zr}a{t2!tzXt~dBi8^I}yI?t2q89}pZ&vlu0;aUsWNlH((0MDGGYH;n9kj=@*c>C&+ z%)W|7jmzAXy758beT(EhZ;cb$x~2uYu8=XBSx=qyBkjsPFhThH=q-RNn>(#W&w0@ zi{c_di#)`9pU8v!J=vlN7tY{v>-iI-f|96fNanV0N+tAYndd&m9iOWEl28H5#F@KG zh00?;^VZ0Ij>t^ij*E@3L0dtbNXlh_r*Get_-f$zUWfPf3KUw0s_p=7Szk`bvrZWA zhsV3E6+{IBcw!csfm8n#ik|uvza(oXGr1SN&!?w1qkRHN;$iPUq%jvZ;KWa%6ry@M z&^(L|dd}RyT&ZW!+g0#|$6Mc5koJPjOV3n-m8z&u$E`}Oj)75~l;3?gC+-K|5zE9! zvf0(Af)nw>+>OL%s=;sUwu&Mwiy!c!-;w)7g#(k@-li6bu^-5Yfu{$KMkjkAA*Wf3 zv}dmi_uX1Tb^@G^>*p<!sJGTv95hD79-Tc8r~Uj;t?MZFgP~~G4jLC$_2lw}VtzT4{#rpg z8*GdM1=;2RIIDL-Tc@*7z?fLw#*SdcDA$wsZeOe2M*}OJ#_%mtm|Dl3N1aZ zRO5rn`iZXhCcrMZvx%x}FZCc_b2(|@un`t8C_!n5odKIId%oC(d=3B+u1cXicpb?@`Zg z(o*>Kp1p{FsQ<8vsg^H+g`SJ;af3~#`bix1{YvACV=@awSAF(R z)zmiFvqMz@#>4WtUKr-vc(^erETn2rKniBFcY~K|9KyGbP>QAFnoj%kqHP^R_m^$^(Qe)Ge#97Uq1(bf)IwG9_(Gu%K5|-{ zWi3+N``v$yPj4JV5(D0!EY`TLe?dX)FwZwx_7vJ3^!=%vmBqGL$yODMX-CR-eS_@I_LYZ_>KUX=erEY-`?9Il zVJ+fhyC-$7*Nx>Qn}534TcIuA_|laJ%DpdRP(I$xnX?9sbsOBeoX^K(Lwj}S9#r0t zk{XYZ$|@G`3$7ex(r>us;3RcI@cd9{w!lKF9z{zM_j#V>6jn})k>lV@L-M%IETyL z>`rAq#)6ak&YJ-Bw_4O~tn#mS`Up*P7zU=COSFWYO7i-ol&zKbklZ0{r&o_r{doy& zzZgvV19osTT4KE-Q?hWJ%q5cVwHL!{t`eQhe^`*z{`)4~CLHCb5dd?cvAR$7qN0Z3 zJ`?TiSq}MvCvbrhL0+Y5%I&pAo*r171}}%@7od)@ANn|G4n^*1q^vADePR993N#;Y zS0PBIGx8)qw*gdnK5=w#O3rC=QYvwNUB8^09mBW<7$o0T$dW2#*bnh`X^i52$QWe- z*lV|#i3a;sE|gp^bpwXrn2@6gR-2X{#u8ml$Lm(a2k3SVZ=)2x|1=0>C0nn2z{9bvu_Tx$5;o9buzp!1+DEeQmmfbLcNSm-bx5=J9Si3~i1C zCIU++VAevC&Z;B^$yvF!%dLabPt!%xwh15n6;d$tbzfrP4;Ngi|=|cvIr1>z@D~se8ng<)Wc16zO ztCmNcMm_U4a2f+gIA@ahJ|Yy}PcK{Q%-5T|=}Txz4B}LezK%IxlKhf#Rd4d+<0Q*< zWYyqQ?3S|7Hh0lV^upBWSt|CRt~0jT2fknggnO<4q!+^TzNd#?#@RhG3tJVQx3ez7 z2_4lG=-_`MbLyCVxO3$n^mV-=DKi}c&ss6N?ib%*K=|9clHe_ z64Sy0V+se=51KCeZE=)WHBl40D|*E{T}^3Hp`FC4@lK5s@Ri763o-V+a=;Od>n!CE zy5CW0y|q0jr<4D*U}!S4Lt^DYQdE!a3@`eFyJ)Y{=Ic_PjZs6k+o*#&B!@;+>r$`B$VW=vi%=xX@utgWXb2Ps6L%0^%-n5rx=O zFrxzK{dlX3%E3-^ABlRwYb<(PJJu<<@`8;Vzpc^iqv+n{8P3O0s#Sf`j1Vp{Ez+E` zK~6tE6aGhC=qNLiTdDZVOphrCLdUrCC6`iwc)1j?TjPw$F;l~v28v+Ycn*~`e;g3zdSH{>U(`=z)&SE8OU>fv*GW{YyvU0&8+~qCo-|OLLBC(V?e;bC_kF@u z?#2jyWV$KHwY?I}%j{rZw)}F9QbCpDyrs;==lQD{jylux1|9VZmFl zSn0NQRev{itEO#o{knW!7O74vQal9qPF6v!>hMpWjAheUjnc$@>i6m_w<>LEMAXj6 z(Y9W{kwdy$!*{tTSISdAy~k!h%uDjq1<1M3=DwR=NgT>M;QcdfyN`z&etz$(;C%|E z)KC29@CrKjxov%-RVv-R-)q_Ws%Mj@NIFMlHZ|kDzF#7>w{h;9!1Pe)%2xH_R5v?H?2}Ev$-%u_xZ~Qq`9XWS}8ePznGv$ zgMfphBX`X#nwFgw*}YxU5Mhjb#1zP@x^wUPq507bWWE6+N}gZM6;wA4lwnoB(Rn)V z*I}TXh#~-2@Cg}^v3_d-&{q5xwIV8zyE)e<8egEA`9$xDh~# zT4v@?xrhFKk0{vuz_n0*n#8_~)<<>^X~LTXvo{0hF)(A65I;-gG*eh(&zL+cFt}!5 z0yx~LZ^D3(-vZh;`7B68{=P;`jJxiljQ}9y2h^HH!xopUz6471&IhGKXp|4c3?0~9wB(xJM_RYefd}~^5+vJWJbCxeFa7hQhmu|;o(+QTH=7#JJCR6xI)euY zP~>Fe+E9}HPas!%%BURAH3R6ZA@qK6x(?DaFL!${ceQSF?%a9VBP}kfw%^>0M*|az zc*ch$PC`>?(3vLhBlYLYzO@pBS}(ecDNz6Z|9;usLqiGVTt)+rpYS0#{78#|>yt1p zYO}>foNeL1CJF#AJV_FK*SA{3$IlIJ5fiNdcSQVF+ahb($P}#P=eG|^|Nb(~UufnS zLX{El&!+RAwyDm6UGCZOBPGvIVcgTXn;E=Ep=N5ajwMJlR!vTQ;f;j1}7O0K9RT%?-^=vP10l+BLKFx}br&ABe^M1Zg{04Z*#Gu!{U876b4dM( z&Ca8QlrQjK*YSV+=v_29O7K~XN8JB>;qLfMi7m$J*q}!U!r!=#73dS_gZny|5*it$ zCHskk(~=eY3CYKMx2LwZgeidBbTB>9RCTCUj6{W;1ErQGo+~tQUQMn_Y};S6t|WWq6uOXbk}H zxfE}fLa*oo;Mgn^IhJdnT?Q8z7@b`Kc(w&loi0jmZ!Q%zRFap~h2WbR5eh-ZfL4T4 zP*gP?y(`Zh`ni5EwYU~eVF`_c+xNG*&2ePhBo`-IZU09N>roJooEQ(zB@0w*BHbIU2Dc{$E}`sxU`w6_08a@ez^xd(edauEkuz?O)S$Hg zilH*cpAeWo47USxp1_5Rtb^lmHn zEU05IrTOAufGsPDubV?wU~*dm^Gp7e3X)Q6TvsK}PY?Gl-iq;06rWo*RS9 zfCY8b0w5aZBDBoZA7Xh6g2ECm!GInf!pJV{Gc> zM-uAQiU=d&o5P4eWY|>OI*IDjao+}buS$UmkYEW2V3H0}&Kh!*pCJXTtpK{F>Q@Uc zyam_>r|1M9i2Y-Q|9jN_mlxZI2CP%uKVtaCF;NtGm;6e>5@Ztop=wJ?1flGT1i9Xa z@!u)hDn`x`ox*|^eUqFmN!To$H&yvi}wF*s=h=wN&x)m9|X zN3yQ% zt=@JB(5<{yz%iz}Y+3hX&~5o+>ZLBFp$1t(aO6W&oMs330R#Yxi5}R1*kVmex>fA- zTpq#9?m zucC)BcL$^z5?SV@yoRB-C-x=b8EmB`Zt04@2WuW@ zfCtP}ShE}~;cRe{uR#DkzgZ%`F>(c_4t=XA+Cd2-J;!NVXD-EMX^4ml5zz~+Y+^#Dyv6IGXf5;n7 z>cYyI?rfYkyB1WPwZibWCH^C20ir zzq$DDQei!#c7g1b|Gd6zzP(%mndVOLJ406yJ)6B>G%dffP2cnT`FM`wyJGdle7UKj z#IhPxIcWpMgSW2N?tnu5hy~!~B_rnmsdCAw#9i;9Wcngt?Los@KC=BU6K}38xCMGD z1fBAMY^79rZxbJ)R<(@NDS_WCiC*V>`RJ;KPKy-f9te9LJzD_GY@=T0YAg`|TI*a4 zz3g75+aL29NYi5)#ow)=9|fSs{sSWKLn4X$L`6sSP5G)lH!e^};$$X_CZGB8#n4ggY%O&0_d65r%OTGrd= zh=y{xYGU=6Ru%nB`t)}BHSuIv-dR(jaE$z=KF`}DIR@JbuwL8}5CS`EohTp9zlR?G z&6_Yv0|pLv{*_N2o04xWk^tF)^t{`F>B4{EdTs&R`&i0bO>U6OyzVy$DwzcVQBn0# zq#0|FYrqu1o8t`t7qK09gNam6E%IA|*<1Juc%{*Sd{q6RuIdCSH5 zzn^LGe;!4N*pZ#p>#3vIPQ_^9|Go# zO}+JZ0T-L6tJH@&&^b>aN5-De{U~1p$Xp?p@*73&dQf?o7x}7B38Ras6$4gWP81RGVG)c+$Swy5N(umS?HN=7M2e%R9R@Z@HB48 zuiYq5;KPNpkGiHBAV)2rLnwKM%OVkW^hcF#3>3YQEn?q_>tV!t^+|c8@gm+{W!u2n zYwikee5*_Ii6ax$sf(O&bUdl*Q*9pDq=jd`#q?BcA*j#8%4aB3r%-2l3&%rFi9~b_Dod z#&i{tMPMtR$uu8hRT0VSQR~bjryCvvRMKaoRWfXuP2aO^3~XxGX#P?c$gg{CI@dFT zM0vkO&M`Cus2>6qb~Vj14buEN zHBZBn-zG3}&oxE^`f;fDcrjfALJVfsxP)zs_Q-lGr6hl&urIeH1M%jyLZv;NtrnCP zi1gLf&qB5wwGwZ-K$@W^NJ6Lh&P?}aRq@6|#@in7FS~_#Dy*UHn!7&nFdlT62|OWg z8X);2OF@+{J27N|0XIL25bw1*cf)cpnH^2~!N3FhiHw6%ETRX)y-ukQilju9pJWdbB!fNd0yzzf&b!U@ z(vriYzt}8pa>Vg?)bwmI8Euq_IjPMbXK={~C@Aw%<5+`6vmiQ?!cayG8=CYt93*eo zc!Nn8TBrF;P$DimHhs$Jpw8j7YseW)J;ju0Pk4k*m+qo-Oa8ier>=Rz0dhK%zQ{-- zH4_Giye{z?so@3_p1F07U)$-eXshPkwtei|MNg%bkL^nsTJkTQl&+uo)0ygLfck9H zRm5gwp&(1}=2dZzW7wSI1pYf4i>zKc^Q7pB6XfwzWTMb>H6Hh*I647 zL!^d@ZQzL{by3ejbY^bexP|+AZBJ2GvRES)Zm$>2_|(@#H@z^XcuN6hTp?HdU3iQ>VA>=kf8jK68{azZj9JJBHvQ4a`h`67IJ zHUKhT{TWS!3*7C|g9FTcM*D+8Ed4chCH>TNy9{0yoDqMpcS&7X*71}Q2d>EPBCnC) zc5wddfv{4haS&aDU|{!lC(=OWdS_S|G885C)rf3|DWy&}D{BkGR7PxrYLEq(I+VUX zkkmtge2S97vxTQQfg*HFVqdw^vpYCQh;7fV_Hc!vJ%!^4_q8Snls}&b7Zm$6pm%3v z3Sw;~B+}!;wasY;Jz`U;x(lMRLt?w@2au|p!Wj{W!r_|O%-Zk=P-LhQ3%-U2zu5oA zzzMf|5kgf+5fPBrUEc@f9{N_}$7V>HIp%%ee}{4- z@DH#Gz30YBx__Q}dy(4G8b3z_&!<&XfgjVA1i>G_*ng)S`Xv1g1^QfaeMD(q=PNz@ zwf?@Iuf?gvkf;fvIZV&^lILsP(kb87_SX26`&d91HwXP#e2?mfe3G@uL!iH6aNg2a zoq8*%9Ieup;jXea;_kk^qObnh8fFFw`?jI3HRhOEC#7Z}43rz~kb*rPTn!AK9=F$9 zy&*HeJ1$*BnC;vSdjR3rWn38>4L?z3%D0I-(}awxC7Y^d!ZgP!8Sv*Dh&X48>(oknA$FWeRghp&%n9WI5F#75>!?EA;a(2k*x_a&38`y^G3L?A;l<&kmT*rh*%E zrfS=NAEEmn68`mU?S|K!r`>Uj&kMfl|YJ{qk0^Ed7(b97oSr=Mo#z83XyU0rE zE9WH1LWq1uqENd8T@v#(#n4$ulq#{xa}}ydg1BzORxZ6{A5yu5ZeTlM-jN zyjZ&+L|LK$P>~=y=doz^axMAJXUk$ATpQFRlq!Cx8E?k;(J9vcLpA+{ZT;^J$_)vr z!(gd!Y4TamrbIWaW24IAORcMbn39*ei3|BeYV#{EAybP$vMmR(UU}tpO1$r>xWm+ zPj%+mcAG+4Ud;%a(&L&rCw*A|jpZmF+Mn|0{U z`hnpHWTDD#Z-zUSlXW^#3`rtM+Ue(-t6ZQRqn+jtgSU5)S0W;9g z!-+p!s*ih+7C29yTErg_)7jQL%O!O{USBKNeo(b)hAod43JtTv@Wm+&A#WZB#~4+L zDA4NdZ5wCFdbj0xgQz_E;tEvtc3jkp&xW&gR+Td~GFfAtm@vN3#>CUTOnuCRaYQ3j zpJ|#s*wjqDvpt(Vp>>iTQYg4`O&>(ef9q`26y7#~)gSq3hr`;KPcof^vhCp@k z)=i1vdg^Ip2k$~SeeE5`~}b10;g@WwcmuQ%ImdTv-q>W-y6 z>a6{IggtCLH52z*UxJS*R;)S6iUj<0&B!^0e+N~RX>Ouk;maGsvJ}d~XHtVxRkf!h zE&0#h{Wm%1ze>=c`e4Wp%Hcb2#zJyOe569$Ku*Aj8us#wW+x0o5t&*#u;qWCKp;ta z5z_uXG<{#w*lC4BpoK9;szzyTeB9r&d{mF3v$Gl)rc#N%WG;)aVjZJ2b#%EuE8OmG zYr)ZRVHFOW-5(qo4Qr?k%-7j1udthDoI+He znUy38N++bf)Og<*CzBAyZ33~l2Ysqxlm>eOIgUgd=&6fc z*HUcB99>rP1r-cC*gL?K&C;v(8ZWa)3~}tk9X8~b(FOK-{j(Ng^isRss1ki@%8D0h z6@oNVT#{gXBy(k*WIAL~ZW>4LvMT0&b`_FY>g;O^w#Mp#woW+9a7eN-{$P&v_J;8E zarD~f7g(c0h1*&DG_)MDQ&^XClEjnQeR7?RZ>v+j=1%muCSeGNw7K)1UW>EtL=jCT z~nQiEWzb3&wrLd zBE?TXnHcLW1vSJ@{d&((SV}htEUxhHYMF*cTw!)G@zv3ei_p~Sc228!H)nXuuTlIE zct)N-vw}R&boIqugRBf6PslLzI87e)%*w7N=#@meY*12YNwS$uULK^NFpq7Bb1*Ar zCi||d?R;-U_(WQ^pqNYp{qMNcfx$qMGbg?BV>j#Cwb=IK%12u=HR`5dtlCE;vK{;g zBuf}ltF48vRj&!In=>d1NWS~RGQO@a4=vCU(cxwNbxNfzVr?0t(WpFzAA9DgrE##5 z^Y(aSXC;b9vA#I3bS@+&zqmGtewE9fzbJ&QRCXR{1ed=>)J#-XoqQxmM)ZG45a(Lh z7BR_wp9%ye*D88WsC~&;5p6>bm;7NuIr1cr$+o`_%#!#hKd*{frpj2wC{4sx;Qge^ zZUc{{Vt(-j2yL{pODt9{D_E`{rbbk?{9>`LQ)~u5{5x*_+bWTbyvCBI=HyOr!q`CH zOg>_6l&J(B9QJ>FQX=2=xf(Ye(NP+QB;Um(Nwb;zD(DmE{$`vJexpe_oE9$Y?=$~A z5Eb?!pX4-7fncLurMAJ{$PnL1oTYfS3c3A?ckgMSW1fZMwyS@+Pfr~fPMc3PQnSU) zI);`U+{TOK}704n1`+unHU!KP}M_hRqk$-Urpv#y65=g?%s7 z%8mx)+l71FTHxPVu4`ujxoB$-UX5-}Jr6R7n93vMb_6Zyx`cqbDpQ0n@>w+=NJGuW z|I?GYoalig^Xv+n70oV9e?Mo0e9}nL#b) z7}`+4a=5EJR=4-ic0A?H;&)C~tU_4EM$6dTc$LWV3YeL9rn4`P6VXuCgl39!V$~Jz;R6vC^i|qjT<(VEt=lf}T)fus3 zMF$F#bWMZp5=Q#()dgzAxyMW%{1Wd?O}fLCF;SQW;5fPTaUVakCO0?S#pWP+R?Nr^ zCmUMTl1(ug`7I7%dD-L%Rz2qm*9Q=)XO2x(ioO-_N4XYPp+4VnuR6P|rEtnXIjGx;}7VDZ*7<8`s`5-r&pXjR| zIkPk7IPH(=YJ*PVM?+H=V6$aUB08aJ3{IsiIv}Z~xC+m(cKS;MewF)iobT-v#r}IJ z<=gz8&eB~yK~(35sKUU&YFZl^Sdcwl;kB0`uXt3GhK54@@~688IAtNnUV-@cx$pq0 zuK^)EfnvKGEh6(oj@#J{;W*Qs91;b8ANc>n-djginXiAKDvF{g3L+pPN_Pv=3Ifv7 zNJ^K4q_n7rw8W-ELAqO-K|)%(L%O^3KCizs=bp3foEhiOyVm^c+U}SAe&dNxJRQcp z&yNrH`d7cQs}S+gXnmZ@GIFh{HtL7lv_Nuaj^i5o{!B=eofffq^j4;`tN7PP^K({# zOT_trq;WS{hK000{LzOnX8!ChjJWy$zwfQ$bf3&cw0lz-m!BJ})J5$XohxU0QXip| z6op?up6k|TU9|u}K6PggTdwKz!V755BlkWWCNMguTwi)`-p$r>qZPj!m(aew83qj) zwM(ct+~6v|w(`IYbx z|55TmWEyS=O0g+%$Cx1i!a}W<+yad=hHrUMS}F@yqXUWE$bjO{8!>q1Yt-KM0t|Br zxBh1R9z{X^eXKtDty^Q(vi~kaalZK*2BGa(8B_>;1(7!+);;E3Y8TYCS zFs4ElxD2{-*r{mTKJh%eSI|#|nQygVn;O@-nO7e8Dr6-CPt?M*BtnP=xwwUJ_6n71l6`(L`(y^?rLRzy!S^MeU_sW&5 zj3D?R6sIXpLoPAT6kpBmBU76ys(H^Ldy~j4yz`Rei{(xVC@lLEHuO2RNaXZ!C20+o zTL^X%V?i61fjNj7nb+`+oY(p!d%wR?ZIGR}rOE&XC$gN_^zE%m7ON`rM`z}613Bfn zT}A@lK^NTmbC1X$FBqG;1ArkBJJsDk|c{lMl=SwewK&`Z*Nczf*7CJp-v^~YF0xz7D7 ztoC+!Ur?j@&AY4XALCMbWC_WhtgRJok%cF6sJb-abR(1f!igmBK7X#j_;N#Sz;b8J zXqd-^rsd(D*w?uo35eNvxlyyy)mV&AT8ZKvT4POUNb)v1UlcpkZJYFOYHVqlmisa> zMH_cDHUOe*&@T1VFE=JEFnrW(HO}*4bro~~OQ*^|T1oH&9$i<>~bppn^(K==s?QNKB zi*b4UAe&Qb3`d$r<|2;GqK4y=!b@VA{hN^hsl&{sS~l&* z*tZtgtS=ewy*bNtapIiTO=T3Nh&@X&gqBOUx#|Wz_%=TUX846B@n4oTdD8AUz;gWN zt=mXm0D6keWt$~b;jy)^?8X}y@`1f(_yL^z{1TT6c#yxvXmvcIa*EzO{KHGxxk|`V zgm!_P6k1I?bp#W-qOfhFI3B;Gt~*$%JgGs(6Jy?A7*YuF<9d_=r5N2uD`ZBL&C3M1cW`Aw_xlZMq_a$RY`NSs`A3!?5^;Mu2L!-u z)>w&2HDW`8;i^$BD- zgOOoVWlMZB-H+Vu4>R{Qikw;V0|$6wpfwZ^h}#Lu!f zvt8h&ieW25XE;y97{i{NGJy^UG7uUT|SAuV*8!a4+ z<s7X|c+=y2l)kDz3Q%&=B>z^hLA^2OJZOFV&PQi0 z1>v_uCi)Teu+I+6;9KL;?m1bjo5Q}~UUA_X;kupjj=6X1raZr$QKwaUx#2*g7d`oO zN-pe%7#R#SR7GM^hMeYPjp90sq1roo5BposW%Wx!dNa4)zE!npOb`29{eXGr%zSX< z4qGG+I!zhgV$zSJ6$s>;dub|J^Zv6#EtaN1wM8(os!wYwH;Y~UnppEp`GiVIVel#5 zdj-BWv5u*4-EO#c=Sq!^T78*~^X_6xm1G_v~K5oJeX?<0O*f9s(75@Xc&j~}aP{S^{Z zY7u%E3z2wR$og_Xq@NuUDyyk7qU^OKTGQkiB!*2J^UBU8&Y0Twyvkjt^S7{(Ks!Q+ z$DgUC=`+=Kp~p^VzFBkS#_1g!nfuW_ zZ6Ou0xiVS`CztbNdT_?&ZTyOt)(=q;^A@FrM)(-Kv2X{mJzg8U)I(m_7~ZW;BiLzF zqDjB9zRK3T-&z`Ly9bxktq`Xl?Q9uXbSEm{>I6qoAw1|jBg#Mj3O?Cw^Ty#0MfN-e0{dPD*L&NS~@sE-))c&4>tY^R3NEtRIy z#W6GH2|p?2m7uEjOX4U+eC13!Y^@gyF`U4-ptKJ!FbR;lW@;}p_-P?# zy0^MPCxa(8$+;b_hn~WeQI>DsNmKod-FA6pjX_ku^J@QBzdtdkO2CWfiqtd4(`Tf4 zx8%R9)e#DAD&vmdF;E6z+&C8H_3+js5L%38FPAL5E4G?>=YB&a+4dz7{`tCUgy}ba z$NaywJLi{g*0KhjuYWjowU+komCqitZuIHd)fcsZ8p60<4u?R>L@{nQ`!N)QD_4Hk z+1ymmt6;%%fBFMxfmHbEFb=fVMpei6-aCzx`;xlvJB#1l|9%sp4Si%nNSw;xczDn_ zknTfy>Ow?!6$zhbKYZEW zq|rm;k-8w?_6SS%p&q-}j@QC>n%t+i4GTfM%_Owu&|dgxB}wQYj$OZGp_gGV+EgpP ziu@K#f)^^3UrBC;?00_V#q&;&7vU*K^Qy@%vb+h7FJV9^=0bX&l(N%WtpV93=oI+q zK7T+$GdTY>mH39cy03D|C}C0(R(qO$AIxU`nwpl$btmC4bJjEGd3=|v;;s44=B|59 zGsdeu75k)bl2&;XxwVQl+o(5E=H?0$vWNY^ZW*P~S$}N@)bRX^;vZcwW^Wz? z^N5-U2SF}-XF8v4y+7fD(6X{^>ZG{lKCkBZJEz;cJR<<#j4%8kP6LB|FznH9 z*ysOKJn+Zm##KWBcqWd@EpE#*zccMsXLNupd5Jdji}u3m*`IuKw|6NIWBly_hS-L%dk$=hlTjx;7WU8yTVRECMr7PiEMfBu7!V(5tw`~kQG zg@2Jy{LyZz`vAL$xy(?P;4jwx5BekjJobNX!~ZT(|9Q#3_@e&V$$xOn{xAysv!#B0 zOO2hJK6UEk#NbeG==TYyi_sWmE^_53MeiRQex8fH7~VmqWK`nYi;@>3Ct<*od3S;1 ztwh7sfVZ0OW+RfSFNX(FZ}2oRZt&IWVFD?j~{Z+Z3>dUUrwg^J4m$%p>e3SU4IMf)G2_`m+N z|9*vT*PtY`c&kAn^XDRjKYZ#POtQg1BStD}L8T=Y7qWGJBLy7Gh{D*S%k{mr=sy1eGVGLX|t>4U!DlUerv*hqhr6AQp~ zh(-be?a`pA5LJd^z+m#Y5s^V45M;Ra4rlm@`z8%k&p>%+45HtXw3}&FtC(BuYl34j=EaBBK zbKQgg@Gj?%w>W`NH;@!VHP1Q&Rxi3Nz+c<6?_StqaXf#^;(;ZII>^D;!DRFZ#7b;w zK`tOWXTKsH>v8xe*WZ8t&}U+RXyTVmgWU!-C(tqXGsSKtqdp5f6p1Bw>7qa=>J-pM zQtldtzv;(hoHv8l$b>eh8*rLgIk&pxf)Ejj@fsj^)|Pby29H)=TstEd@amD>r>jp% z){vGm>IDLWH;7V32j7Cud&7%*qTU%ZfZe1k_PetvXf_TnzQR+pc$oX+9U%oGu+i7F zX%Qwg4bG5F<8f%DPTJgKH66&;g~r25X&u&|~G3r&&=Ab<`)1rk2bZ2>tEd@wG;qkwu^rGn+-Vc<9OQRf{=MP)7bMV?KNx3)?1 zZRhxJse?v3G6+#i^xw-M`vPQias+OSjRrW2S9Lghh2(d=_b0Y~eVmuordpk*c!PIju3!Rfhpiz|%0{&p&b=N9!4} zUmZVknoa{QO*m2W10OqCMc9tut*Tiu`5q9dnLdbY`p{HAFuhy`njK1Lk2`8+=M$9| zqQ?Nuc0Y3v3$--BFdXVg2(H9csqTpGIu|y1MBd_Vrd>I$%opZg_!e>rgd==_`9%~*!n>TwjV|hLO4%FJBM0Yjdr{81!91vRV5oGaJ4>;-^9@& z!Mq;6tv_NvBZ+kJ`TVbj4U{`CF}%RMU$JRwvha&=YU zsTFEJi>)Qt6ja4?og^im!wJ8G`3S#HXk8G$@0`#fzWA958Q~L{$0-d5-CU10&)9nx zdOmh{yo9Q&z>1yiDKygZGTxQP7nLuWHcW+T#{ATEg~re1D9GxqeGZZmtAHigE)IC6 zR{g@9OacL(j>ud8y4CUf&v-!W#*B+z0l-*;AQo?MGsu7&7W3LhcE%|9CzY~#)5yv` zoFG~(P(dE*j>+JN;#0ZRQ}V5Oo(>R|(;C~H>kI;Cm68kib%^uwsO2N7|j@GhYh{LwQl|Sq|N4 zsSf=5+ck?o9T6P?q$JeLkz4=tV_q{=VH56WJtJa$U#vE#1qqm&_e|eTjlLJzHQMVla0A{`0VaF=5qxM9Sy6 zBTdU{FF|S)j$=lg=m*m_1 z4eQGNa%e6FAplhaXqf}mVBb2g$?|Z3+Rn$3DuxDl$M@~O-Sr?#ig7T$A;K%a;tJA1 zF(l%)J_&d>yo*Ypu6;_I&@jt8>wG;t7Pdlvqkhf-Hs=!DO;0QG6l#O(H(Ql7owp=5 zan0;hDCkPSxq$Uk*ENw_0YlJCws+{rt5tv9(ygdUFYf*68TE~d#NNg}(X)wcj9Sx> zGK9M;rI|G*M+JOWe4U5oJS(_2fowQR?xlRW^(6iy>dHchUb<@n-#$v z`SVfho-ab3r@ZNlU{u2U7Pd}_aJ+HEWs3sa@9D7`m}}*3LJ!i#?&BeigBe~g%0D=% zKo3l&G$QS(^|4duhRw2nLw`C{ZD7>h4U81dvZ$k;?c&khI*wc`Qw%lT{J@dhj*9`q zX0pPLt|*{_8$?gS_ct-5mC12dz|C%Nf;J<~e`Jk>3H=vJ_WAIt4Yp(yv_2}{kl(-P zi%+xC47c)+jM){E7oX^wvJkzsXXDA}(Xw5CJ5M_=Dnn)JD%yW1E`L~^{i`w)!}zRS zAth0gtIUWyu-W0E=bEycSp?bJj_eCT=^5K$zZ8q4&0>UovLY9|FCWXj2U_DMEVWX2 zE&GRJ=hU2S7sG`_IHhX&63q5*e{c6k%)K_l`N~p0>W`(&#JajL;+kyj1$b3Eh%=U3 zj~2*3=yY6fpgb7TgoslvuiIhJ#xL!X^g4gmxmaB2sq=9(&~%)S6RMM9VhAf76FjVO z)A@>49C}O9V}iVb9T1v-Y%HG>mnO(H9p;R+oLzjo{VCRH#pS&6!V&wfG2Zv9qM5#&9MxAKw(nVROZ1- zNRfZe@u9GcC>jRv=|4om>&2r$-UKW2R&Yw7;@HqUey0~SoTo!W*IXuFl4Ft+-b- zUc^On*|E|l({hgZWY?pJ-6F>8G^<_8hipm;meeGH%1v%{W{avLB~W4}4+H8-N9&Ck z3XH%8?GP7QLNhyZN7CMTe=Lu6&D{fV@!1%$xbo|(e-XpbWTa;r!uXQtRc#V<47Lu` zqNS@1!?n=iBApT<9kcq_pThX7p`U5rOerE12)@cq9*q9x$7e$`Z+8!Bc@XU{_FHwL zx^fS4gUrZ{S{>%Rlw}SqIc^A#Uv+;-a{p2!|9s5RFz`{GAMS$4o_CBwJ%zC-4>GIm z;n1rTn}Ts%7;g?)mktpe4cU}joI5+z)ZVc>-A_@JXiIS3_M;dt6w^ipMEd&l6RY1eJhJ`R;=>V zh5jLTZKcxNx4qvMd@2X%rE;ZT{p35hY|LgGs;cFzC!#1Gg~`EyQJ}vL$b5PKnY1l% z9Q`&&4)Yd=E}=z+w6ANvmfv9INiCye!S0A~MwRQQUPJ>+kk+B{Jvm$X;>`87`-2;c zU$zHKl-8x^T0)DaV@UtI6Z2Yo`Gdlf|aolY4}$1O9G2@X7t7bNM0z$))Sq)$B@&0zL=vD&^WFwf^k?D&gZ=U zipcYvpjDri$K^g&ej7R!f)+&`?x|oo^q~VgVjLn-rU}UarYEmUfc3-dI6Bvpgw`gr zyIE1!VSTOAVv5n7`#L(!6>JHIZQI)y(F!`kJ3NMq;uSAQ6dATyJT}Dl5^T|sz{j7a z%xE=oX%L($4#Ssz{aX%OPMOx)<07}El(MV-an>}YbL>65I$>svd{vURV2m>3X83tf zTV$#67>;C*Barb}80LC@bKl;KDLC^ZvrFiDHx222xfclGCAY7#Eu*NHjw4i;%_~rEpig-X-U1eI>um51^V|QU$ zyO#)adkrvzW-^3|vV1Y5wd0Zyo7uK3inyS-43Yz$Z-m}C$B8e0lFA$cK2JG2T<2A)Aq}x<<>5`S67iZV&~f9;5ZtY^yIKb zKF<|pyP|Thi!g)lrgz%9*I8wKWWIbdFUFAosf=;6G>X>+_kKcW81K{%3zNYon{;-1 z_Pm&K!=rd$z3`w<+lW!kUEzlp#toe{A5@zP>R$dJsK*C;JX{9NE6=1G=+SGRrZ?9wRpU$A7t?93tV&U+HU$3fL}Y(RX8?_~I5>bq=bec^SB_({HZgqKiGijUdtTduytr zRpO-?T3rRPj<2NLY*)iqp3v2&!ee1dg=_A)Mq>p#&`KNol3I}+Ajc&zlOA_FY zC(&1o$Lbr$#6+PeH?;C);)-ApF?k;|!||-r6IyI^7w8DJ9Z_?6W~kwNwN5J1rW&CN z!1<^!DZ~3KeXI^Gdfi($?6KXKeS1(vC|<8o>@7uJ<^xX@-N?+h`wLQG1w&@Pn$Iu;$NBlr626YC0)Fy0F>8I7er5_xgcr&7 zOnSRs0G~=G)4i4$>gFZ49V_tQ5MDBNG%+vZx8=KC{lHyR`^lR8fKoBG&TNr7|MBjG zv;6u-Dx8^%>DqS!N;n`3GY?KU`{q;ojX4$dUS&m2u_)ia(Gvf9!I}iKOOT8`2>*$| zEz>l^X~g42R4um#Tl`TimIgllV5_WvC)8DSWljE+Q5FJSm1tM)-zj4Cf-!iR|FXG7HC(|eEtk6;HJ43Y{XR8F(U7;kq zStraV+V*^fEODCdT|B0;!jXeCBC5>H6z|w}6x`i%?C5S+vJvDqH2Szv^AaW2Ku~x&Q8jg zu6ytU`}7V=uA>|4<>T-0ZugOTmHRP$=lL$?W)Ss4*t}ao$(OH*pIB>5yh}SSXHA3o ziGb-z=Av5KqPy_mi$P~sOr_<`okb^jy|E7SDTYyWLkj)_h06M$+;4d8b3nUovErzc zy;sOZ{1r7$TBR*y${TTZV0kN~^oh+?hgNIakMnPA6>2vrTyvEr!@L&u1stKrOo+qE z7p450AMwy{nC97Uwu@kur6bmqE1VQz%0L*hERCmvNbx513!?C`FI)O-IF%xJg|jC8 z(K(uR^FA-Q6p9$2o-7_!kfw-Z%5K`Vp~eIriz|WTPRVUno40NQ4vK>_@5bdaN)i_w zibC2J-^1RRhgV1Su%MjMb?Dif%jkh#g*)4}i#*q<25nq#*=UUc{Uv%$1-zc(d>~?a z7h2rNLvqm#@xhCZ@3^p(oS`8lett15vM<#ijS+R}ms?#DMrWci(8$?Y+g}rr)3k4Y zW`WddyUMwLqq+(PpVAtO45E7oFE4T)e*wS8NktJEd}T7J98~i9wULxlRv z>Wh5URiUf!;W5OattEayn}MI z5$_J^=If{*@Hahg=IZ_Rd-j3UM;xs&sBM|=Sbw-9P|d@t@YYR;q=3|)r=zUFUqy)N zy87%6(i9#f5Yl?qzFnn~!OW&YKv(xIi$S@@DU9h!o0n(nCXeZxr~mJ7e7QfDBr-Y& z#UhT-ZAX#V7p;qxs7fm#?74!^*VnNkmQkQOZ`PVOm$+Ie#(8ycgduoR$YkMBvlP%ApM3UOlWNYPJ#chI zI;--HI`=yi=$3q83d2;{JpLvB`8%d|OD&*F7z}!4)~8A5|7{e31FTF2Pv#BVHtHqW z`)l~wx&8Ny_N4R*?6NjUw8nrr#{W!d+q>`z@r4V2tG@_u!5oB}$rf7R>A3u6A50uM zjoZx0>CO+3FkhEEghg835a+k1&&M;%QYOR9$Nf0zuZX`%i|qooJOrysZ3N<^GD`S( zMI~!wurf$W}M>eedcD#}sjBUVrwEt(t6mnZqW z2cytWJ&lL0us<`$vmK)|_nYFD{Ov18-@)YNiJFdW8`v(oWxx0v-RPHRGH}gdl&PNl zmpjaFFlh57^#VS8H9tywRa+>9dCoPr2D)S!x2h}IVH2{RP@ zFzX~s&&UJ`%>LLjWHH>1fPA0?QL%b_=(U&DIh`iChb=4^b~yZ zedIpH03GcQ@j~9Q=ZMS|+;GG$aBs!~zpTTG=wm*Ib=Xsh@lN=##_{OxfS1n7MxH2v`lRr& z(lxf7b9atp0QmudB4!PLvvdxUkIKdzI_rWi5=oLRSE}y!LN+Ig8(~b`3b25Tj zE{ptfr}fUAGa2q~SYWyG%Hp?6BHARkTs7l-aFx_@?4Tz%ZLSO05%KB35&E zcGLbulFEj*)VppUn^gTeDqWn@4)s`wlm2K*}hTb><)Of8|zs)7Z z#_(?dR7d=UII|+MBE6<(Hq69VqQy6{z0?FM7k7Z08&sJFY4=A96i*ljln0r1msm1r5%jfOf~dfXCA}GhD;=RCxE=u4Q#NTLtZ>aQi2J>MnYLDYW60O_ zl8)i*{28HUg(kyG{)KlLFTUxJRF_l2r#dMhDoG$DB0tl8u~@}jaiwE4XgJJsin}{O zWlwJHLZ^4fF7zpJHoQ!Jef^%jg0{v3(*S(0EeZU)cl|u?k&Wdn*aqX%s2w#4*aMIw zq9n{(ge~^?zM_7^uWgP0e*t|#&Brb@4`Ml{lu?u(Tvor}c z<887+7fbesq3sgl zyT~#VWfiiYQWlUI7CHpB0O5q?9PZ|RoYUWisgDt4DljuO^sw?|=Oj@6d05#D?@Ggp z70VjN=QK(R8Xb9!&m+E~NqO8LX|&MyBq|3Qe;J1|+Jq(L5iS5TZhZ5Ga^8Id!z|4vTR$ zmChATJXKG$RI<^G-X~Xo$lP?$=s1_^(*PqOqi3CykLCFy0Wqy7ncci-Z z;LL*8ZgtfAo4?$i{P&4}CJX!)<&AqrLWS1i^%0rlJp~uXk^8=~p?lygV$@8D7;l-{ zF`qwAWc_5lAj_HF*4yOV*>8mBo*ApX)4Hsz{8v6(|9%)?QdEd>@m0ygItkX6>y_c4%}e4DCyD3=rx226A1L3 zoMy2pM+E5Yn3^|55%YM`2XYKl^57R5yvO>_7xkYj{6`eFbo|FR|LQY;@O?hlPb*^H zO6e8V{qNS)Felb1eS zL;UNt-vMkUnh9{zB-=~qVPWp`#bODkIoFTqyAag|Mh16>q7YF#Qxgh{pZB~+O7Q0 zRs2^M)6e7gPmKMwWA{&t{k2;hNuHjaryc`)X-OcV<(!Vy2UunDGI-Hc*)Wtn8bipu zn>$`gWh4l}vx?=-1s-6nHz2g=GH`;IK#0Bsn_UfSVFoew6!yn^GeJEl8pe!iY4IG~UmoxE-Wq?I6h3{oo*dZ6 z9#y~~33#d$E9y-7Z}FF$|0&axl$};jW|%Q|+Shy3_2dXmrYCAxb#%YSZ9(l-RWkPX zsFZmp)@ED+&m4XhM(%AN`!h&-ehr^MYYZVJdN!w)_1Kk75D5|%AiQiCrx2+-G7Obs z16WFJ3uL*jz9A-agf?N-5z>-wKxkSP&T(ppXGI_~?&qn+4H!N#i8K%26t?jlkRjH@ z+m0Z6mq4ad9BM=dK|krPJ@gz{DT(*Ki3Eo0M^M`xN3S?hazaZlZ?s&4v9eV(5Xw2; z24JN{d{PEZX&^#PWu*f!!yLgo$u;*Hl>(vjP{`^k@>_fxAS(?BR5Pi*;-CbRIEyfS zWuq;%lMU$UgQp5YH2m{7z*Uu#ae=JW9@^~ef)i<$%HvT0vCty?VT0K`%8jp&SAzDI zB+Ih>ZOWAulSTpG3O~gpJOy8FwGMu0G`4A*1ECe036sDnzNr!l6>Fs!bPmg#v!JKM z4s)o4#EwiYt>l}&$L=9lN6+a=zBaAf)fA!FI9fF-yI zkRhxAq?NM$qT4YX3c-WSHJ#@dx}rmgBA<-W!hUt5esCxH0|5qjHI)fB=8C!UAI(X7 z)a@bU_hqGlQ&a_&?ZSjpw$AvnTF+8S*DAubkC(c;b%PWL?}Cx!jye293WHn4OU#P&=P6>GYzrMa9)B3U^C?gzo-PC z_Pl(J1lMP`>bP3^tV;N3B;Due4$DD0veV)3P~u`8>x#@+TLH{=kv^ve+xds6JE`;b zp+92209~i1t$wVpnU=KbwZq(LPJFj*yxl9MG)+=bZ79t#eC0B@XEruNT$nh}rNbgL z{6sWN1WMA%*k$&;CGoB264U9srmPZEHI1cJ2Gx!1QX?BW(4(7IQbZ5zS?cZR29G7@ zYuA!hOH%PDt+))aw=)}O5G>iglucdtljsu*)^}&wX7rr7iH>%2UG2Ws8P+kqPtYI+ z=PBnOJ1@YaH^EGY!#PTXw>x2SKS_>^xHqqS ziB86&JG80epb$`FA9dQO-eC&gl|CC{TK&XMqp&5<;u4gQ&uDNS3zM(+h!&wg?$2qg z(TtSZ7Pv$oe2*frglQES9ygMV0CebqHxboMzdJ%*pD6@5sgAzji?KbZTJF@8T>j2J zRWsHZsYwvh8*#w1&rmOiALjBSa%O8GtI}mt%kH{hhoi_H@iI}R%*Iar{Q_Ojmi90A zLbkMrxsG5^Tx-9yt=5<>^#nqs^(pRnsK6RxlxU0}(dT`6S5?hvlv+I{mvT*`mty$l zXM_ht0O=+1WG+Csb-BQAQ6<v!l7GGM`v%OyL)B=dJb< zj2iCLY-e`T=Wp!X>4Dy{D&uf?cG9u;ahbBU4bPMM6Sn7c207)g6! zY+{mndEZ~JT)9YUMF2m{3@5Tcvy$5F@4pkZAOJU%}=i^R>1h1Mo1Bjrc8p;UQr=?p0)ro4f3 zneV-!#C0#X0mW*H+06u@=G{80#bfz0TL3O6hw{}2l*4jve!uArE5c8ygO)F*5!rr= zvU8(O9RCZL1Wnr?Az})NZ*}+5KkpE{aYYtGp}g-ivKY$uP(BGv7A&Y2k`(OqG%fPZ zb~<`(=J$AR(8ei8)4JTW?hdJSaU5d-FF@aY?b2_z0GO~Zr&)VO#2P*ylA_4hYwxc< zD1_g4uU*5xtcSPLb6F>-@cSeG&PSM<@K!zXi%PU2Buu}Tyoh|f9ho0XDZ9JyPA`4n zxXasSv}Q6-dXHysOp&S>oHvO9iHv(G zks1u6E>_+9#P)73rV7B~aI2p4$zf}xOawC%g0EwL_X5zX_rD?i4ieArQB@L_5$p^U z<>7!;1#YJKkePlUMWVq-QkS|I# zi*2gVToLU+e;k;J9s1d~67r85qI|1vSY(jz6$M&1Wh6(k;~`)^uml8;MY?y68*K%dEkbE%Fv56#ZDm#V{f>DF0#9 z=b*p_M7cy)JQ>&kKq%_7W-I2GEj-fuX0z2#IfY$EAcUtBNp221ib65 z)^{i!3u8{es7&_)0QVQ%$-f@sD~LS98Vm~S;V-lnwje#-0vV?4T9vkr~$^tgndq<)?3*{gkpgg z6@rjym``tGZoIMY_#E{lq7Tx5!2GXa?pu&0X4-RDl!0P3Fy4=grt}!WaGrt{T&@T4DvuYDK9(lZjUCyVZD zJPvM?ultESb(~(C?9Otrp-)FxPJu;bQ}p4H*4}P(YI?V+)ZomkSpuMIFf~sI!ku)9 z{c*g?u;g@4f^O2e-uL|(fKqUaDw#?m!-y82(lFLI*WxNkbSz<_N2QfX*AgI-x){_s zv>V4ZKLGG++MP3yaZtFe!0Eb>QA9yQE5A(Bc05>lq`Hm_t7(88p*DF`wN)}#Znl@s z{aHeFgIJ5r&!i zdp*jejCo7ict-kV3qs!B{PWJql9X~?H`ccoV$v4nkK!CeG4faMc+*!%Tpt9bOjZVW63)mI`ATuxa%ECv+>O(hA+<)kCW3e4$_Pn=>e|_S3>I0 zz>BH}WeR;AODDN}VP?sUDK5d`4Aq$~y@JW}d z0%buz|K=wwy{)g=%zo^1!}t4g%rxYpKiifRxZZ1x-+nt#F<*ZpJQ1=e`wUUnHMmh7 zd-xTix^vA74gqv-R``JhHO(MnS ztisQlBlATD3f&_T!`YXlUH{L%EeoFFSAOnt3uL+m+daKb9D27O~^`l__~7+n!LfD3xez;{&m#Vu@0$2*vOkB_ZMEqiDlQq@tpn zyK?4fcGvA(=GVCv?2=jPz6ZS-olWDiQ!#av_io&gV>|xHo;uB;*EO2IPh0xrN9?{! zYN=jVQelPBxg^#av7oif=h9oEga+U5u}HZXnLV*$V3`+mpYU$H#BPgvy5Cz#(7)ZQ zmNY;|6U&xK;1{rO(O+Eml3rt;d*GHm-iDg#z_IDH9=+MEpFq_|a<_adcI>)}@85Gg zMJe|;ve0BdJI@$V=7xmYC}B#_){GzxmkNV@CMqeFhlwT#J!ZO1;=IQ}nKXCK|JAhS zeOS&*+CN4S>0QPF=y60NhJ1?)blQ7wj)z<%yg2NsO7oWBkBmmf=s>y)}KeQD-Wr|fB6vr+!uR@~4q87Ey4 z+TT8pw8E$3aiwW+G3Re|oZ8zmQ%(yvO(&g+`~Gyh)SE5uH*~^BU(>BK*ITNuClXZ3`t)P?H+l$Z`3qB( zhh4n3VQ>RwKCL*2p&WBLHe(ceG&U8upNrd>ab>64zW18DI6hu!1Af_9wBAh0RXV%K zD-SS-iY_@6?jpvGJl^j z(0N`jpRPRJQdOR+7qyqQElZQ=S@&?|(ct%&@s^!KiLcRI(^k}$@bi+awWfjUYVR_< z97@#r@V-?9@nB;3S~}jw<402RST`>fgzCRdDbXIw&NdXMzf?LfqaYbwG8I~RsCSI% zsQ9u=kxhsq`m?l8_GjB~pq(K0tsDs*P-C7LG`eeYkWmpm zKm?!fTRH*{MNhuL!|yUjXU(0snZUkX_32j2tD?ZvpL84Tt z%UmW_a4M}B!+%_(r`05Kf`^BQ#JhR~r1r3u9O^Mp& zgca(eJ33=7eIBc(u2VZu zxBt3t-Sy_rL0fy}U|XG?J(|E5)EF3MB$pKF+H*yAlm(X@?;UA0CL5*w7kV>25nr1c z3AyEwEDEjAgPd7LMbkI)dy)%6n%f*pjD_^ExW^Pnwd_-m@RkhROHEzdd&%c%Q`?k- zx8)w9Zuc8d$hx-ICp=$NyJjd`^evj0^oi!sz?HkKsB2oPsc8;5k`CsBH?Yy#ilriq zI{hx~g&Z-^#Nyg+&FGk+{NIHq+Gj{cj!#n5_=WkIPc+XQ#KtI zIJzuLtA3Ma>(q4;e6hL|P-1n%lwg;ob*4XV!}`-3H<8xa!b;nTyj3Ibc^YAE>##`q z%_z>b2|`;&9>wq(@U!!D=y zS<+0_hVsOK_dpH3x-;2eNV7CkR0XA@*2kgRn>q7gj%y-)b5cqS-zJg`4F4a> z-a0DEw*B{3@kS946(j`|6eNcRNksucKmo}igrQ*=iJ?Q3P=-<@WayF}x(Ae0YUobs z?jGPh$NPEqyWh3;TKo6@WuU})T}L11=gTK`>QMVVG>xJ<@#8w_v)RD0HppotQg%-0 zR6wLWXTd5-4~kNJV@>&1?gMF1v6O-Wd6EsEiqBbscS|tReOC2DihG$~+EVm^*IDyr zLExq=(h8J*qy^Y1uP#VTQ&Y%X&G^lSY`xE0Dn=KbLQkHP>_2MBOH~rwOeQIsxDuC9-jQOX}2vkE?L}Z;vv&g>-|b-VWdRi!QkHYdJ@f} zUXhN&(D1`-&v7<`+DVzw36B_aPYbm^GPxckDb_q(nPtr>1R-WHYEN*<}EB|o(12Nx zmXu5@nX53{dRuBoE0b338!0xLy&mXo-Ani9WMC2AO+z$yWQM6IV}3h~ies$|ca};s zC62e6RMt zM}AVWS!xyemNedD@Q|$U!iV8ON7I)`zY&erYLTRjoi0}w2WYt=*=aAr$cQ|r7kk16 zWPRR2Pc#*{m>ZMYT>)1mPbS@HPGhpO3;9VXJ4W0rPO3Mw)q|6(2eP}fEqBmYyD2{x zl1_xJPVu$IdhCo2s@xJd*|qcjonL3S5c`o-1uY~vv1dX{bCUN{1;U~(9&@v~eDroe zhP>H(EMku&J2o3+pF}%G3VY1=E!33pf$@9G@A`g78yb1u97S~`m{BEP4b^dCtxN*?#9!T~iaRrnKw=)<9P z{|FxMsj(Bx+iW`_m+Zuze%tz_1+UZ$;{z*=J@u+0FD?1yv4p%-??CxAI-@e(qik=q z5BoLrv1sxV3pUXE`0#7U&CENSTN>zR8wSjQy3)O;DLf5WXgqW*u%E|ZR=BBB(TAUk zID2h&GO?+loWH@Lq5O38Dp6CLOpELBSwg-h9cxZ6^)~KxRjwf0{K?2N#4RGfKDwM@ z4&&`HBX^lvJA^Kif-aPj5%7p7tehvxR7EM^?^QO&kF%~}nw_ZF8DQJ>k0VJ2w6@H? zYzG^Z&wxwb7GcX0Wz%*{!X(vW&f`Y8y~J=0NZDWS*OYJNQ>gN&TkfP1b(TV-dnm5& z_RLMrP2Nr6=n)mMYRQ>Nzq{H-jp8_qi+AvkRmz&C##s#xS zsf#`esQ}nglEGKpwYpZ%6^X-rsF;GWeJKoGGr-m{v~#K>JN}4tC-rxsN_;|ob5yyE zW*HUTITQZaAAaBZJo2ul@gMWP3%egE zm&(MQxg@OL>v<3D^zz(3S#R-vPGt|xDi_s2Xe>X6WuaN@p0FYLN4 zTuEyjcgY0K!t|LkUmsh?LuKg>OF0t3pa|yN9uJD%;2!^;Y8Q)jB%#B!gTF<&;8yLt zKJJzY>35u1Fex`)nV_l>@DMnApoh>?mTAE1=!{@&||%TFm8ny`!_jlRq(B zWMn@*)U0*t0-X1tWAIxVOG;b@?x_?E+*l5SmS;bTHrkYmF(s=Z6*hC}GBhnQeF$ZWnuGBU)*k1UmrZBvx-E{yZ z?X3q_+N<35!`1Ced!BSvuN;q2LEBS#aLu!IOo$_~dcFLsKcvQD?(?5)&s6b&;8w>rpQ=PVBkS_*)Mdd)l|sqfvM;6D zv(i!K*ov39!P&Q#eOEN=9w_@v?N)mX{3zZ~e}CC7`)_g9SVI+vaRWru4`&@##@pt8_Kuj@+1dZ?pn7z zFT633sJ#RM+B|!Ko^|?5V?EzU9@QVLkDg5ree!SFMs25y?*EL~{`hI_fvZU9fcc=3 zvonwFircx9;dh?CveVp!qrwZ6o1eZx&g;*@$Bs&}DmXfhYQvZ(Z#eEn)L22kqm4v< zntML*BX5)PigD~yQ}`B9J40BWu45os&yL}(Sb3z#cGb0ChFgK36%sR?YQOn#Xf6fcm(a%poruQ`jK~7agZ9dYedsxvY zb#Roe7;m?Buaf-apKB-tI*WpVzElIw+p!>6s<7%l+`^yiGjmk^JIGhMMzc3(tc31> z#$s|{#U!+|1vXT9Z03DO_EcmqzAsecG_xeFPRGY4W^%h_>_w6YU9i8)&|v@~?3c=r zJOo|582jN^hT5YP3d23X2r^aEyyO!v_u+bT|rIorE>j*bZ7*rQjf7|0!^ZhtxJGL`o0%;>yY{W`v}}I+%7=BEz6dv z{cz{{P%8yeDHz3;BipMP+Z}W}S&hEXqNPXKQU;&5-7wZd>@Rx~J61^YD4n0nN7myw z55n59!E!N~4ci{7wYJ;rQY382OfQpko8e8@<%**`9#h$wa{Dk9>DWxYITbNq>SN70 zm;T)e&7;-c8e*aQjU_JM#f~5!ybebPmd+}XJFyrSPpSEa1$LFwz0I2Sq`K|0^yn1v zmqfWCzw z7X_$TU0n%v~wr8+~tRnFlk$mv8a`Mu4fzW8fQALkCp?Z z#OXVYNsJn!-k|Hf=egJv&CcNBPv~rERw`npzV(?@q3(^vDHHpxNK4@| zTy>uczM|o7l^_wUGKFR}!L? zP6}tu@8w<=wce#=MU29ev;HZX33J*A?O_9p?kLW)ut-MKPQeyAF6VcgT=ew8Oena} zJk!5lPGNmtdQVu$NJ+?BF)bx`d<(kH&2|ZR!=+J2;lL@=kF*e$I#rwrv_X3V& zY||eimG5WwrJ?AN$2+54{zNPAiAuJ89(^Sn{uzb^<5&Uv543^r<&M%nvG3GVn92sR`RA*2nU zJ(wfJY9GVa%$>Jlxux;lGNzl;qS=0EwuB;D%kg8R;7I-`o>ZczHo~AJC9ra@-U9W3 z*{>_weIhf}a!bpLMo{D@s5viGc9_15-<>c!T3w+-Vg980eves7QAI=ydbaogu81Z= z&s}`ApuxnosH%0pcKo*vSLt*D^!aMOA}+v>V1m3lry65k>H2IhB;0Jm>+A zLdoM^{Bjof@oSn{oG0_MRJ#H#mJzy8TeQ=QH;y^ZYL>Z1`6c|hVYjRHKtaG!3r{Wc zj3-H`GG-n?sOfW07}v?!t|pAQ7K)_N9{+SZ?`%wN+cjjzxXq6=3!8h6!PXEckKU@L z1YJ?gx9Uxu^twfV-&%}WJqP4npVC8W1tXW^vO1EZ`U0I(jr(M~r?2v+tZ-Y*y0Odz zuH2&6S+y!O6SZF~HH3n2#|3?8rOhj%7Bqa8o@ zCxhW6vqcY%WYA5jYA}wasqSp$JA!g*@CxUEOel%j=q}fCBaCH;5n}wtR=?KPyP!=R z%i#i{EiEgS$;N0|B-7+kK}6>RN0K9&bYcQS5QqEEBOal6-GI(vsP?E2fV;N#`i01G z{A~)R zyd6t3J@hP)-od;?AYphf$I(hwcJNe5TzYKYd`QhmiJA_6@y&XJ?>vhr2-J3w^Fogw z*4=mD799A!$92i|xy@-LbKK_Q-`isOnIek^2>IC?$z9kEdJ;EAs#$I=~%xjdT4mmg*1YF^O-(EsKAB z1Xvs{6rG`bSMyY^G<_ z^NwXl$JEF>C}kR?dv8q!-_Zij(l}|7<@qJQn5{kA?iM0L+K2YGnyGm;YmB$k4+QGC z>xA=Yu#+mwP8;5U?jVN@Gg2wHZA+;#i~D1{p-j0i*W8+=L+A156Z^b3;o5g44X4ps ze1K8!eAJlY_$QS|!sjcXDVtAYP;#PL`cLSHPcw8wK4Mj{zT_rxHs;NStntA9FG6Eh z&U+a~_oCSO^-!tF2AEXqLY>%TI+*>_v$G^PMl%d|=RW(Fws!lN&H;WAoVhMLaZ*5A zS?G;dJqf$1SD`U(z}a;ov%}ghXgj-{Il<#|>=8_%v>3D+jSq5)BKM%V26X#SPlm2~ ziB(#=-xFr;n25pb68@&~k<8Zf~+U->Ya|VHVrR>RQA8OhoYt(4>$Vo@M{KWWZ zETe8Q(WpG`>WpFe>6oTZH>&3T)huh!b1hWG>QL2jB2i<8;vN?Fd$c3P)++K8zM&hKIsjR?#j1^PWV@U?3Qx|-#73M) z{MKQj6RDkE2~6Q1OYwd&D7k!L=A`Y@pWUo4XEI=F(w{YJMmg3-}V_Pufa=R_e0Cs`+v! zF3Jl>3n$Ihpd03WL2ILd!RBG+XC2eN=SA{?%(EdRM5i>=)~~-6!jb(`Ag0P#XOekX zR2kiu?0)=Vohwd>h+ZexM96ogj;hRcL~SFyt~QszYqTw&@& zpgrF?5;7K40r~}3+7h%If00(;FaH31l`qxg?6NgR)aBXaT~%{_TKEz%%cB0^^o!{W zI_yKMOGEcDZU-uzIkV+4b$T7We{vKjS!S+SH9)38e700?t6%I5uFt;6pP0~*kdE{~ zR54xKsjoPO^fkbJo>uEw^(LgJ`WSY*#pHXjS6glx>#&c0N7UU$(@asrCDKE`U&ioz z2Sse_e16xylZl?F=1#nQ9j7*Zy{S_=*Pw4nWhb_N`pR32622}{TTPdi;2yv9m6YG^ z5HfuL{xO+&hB^8jNuW_%e_u^W?`7LdIViV^UkAgdJLH@q{A`VC?wo?&Nkxk)GDt~e~R zm-5HPIM?xO(ok@uf`gH1Etpd-;JRnCM7%u56HXY~* zj|calX=&gc&~lzWxs&1gjMNcyt$x>Xg;_J|YLHg^zz`~(tCX(YNevX~T3TGID#`}a ziqT`}0Bp{g$+$?SzxKxKrAJz90fRx!JG=Rw6A_7Kr%k-A7B(kBid@pU$hJ>>1_N*z zmwuyN77SsiO{O)y@u|ls7Yd=4%TyX?DW7>H-^GS4hm*S5{ZCB#y!L)RKTMV(a;M?= z!o0ZvvzMoe?Q3>vP;}#M1=lB$@rLZ7BUwL20_jOV!VgwjG6M1(a<+J&KQ+&+SFxQ6 zd&qieE$5Jyf~^qVQC9ejx&p0O0^CQ&C2xGf6ps%cvbiEb$H0EIS6_QaKWZbpOs>V+ zPz1&Ox7vQ3B;xz$=JAMO2!;rq)LTfZ0SwzCium(uItqaMr7|YZc$?E4Q%89eTaU2s z&Zg5;MhDc9e~HE-^5{*BdrpLUwP+X1b9fkbix(mpPyL_~n0F=OQ~RTc$(;y0Y6}(p zW-TD#*YoVc^gsGalgmzl@l+6Q9c^i9aB%K(3!MvAj;;yz|Blp*2uZv;Q)^uCg9wVvZl^iztaOn55^diIu)iezK3 zQC`<~y5YTu&}G}EXK%G8qJc7|Las(e0U4L5r+Y}=m)j@>*E|%6!w?=gfwVZxx{Hj9 zyeIdEmzi2c$2#(H-VKZO_Lqz>W$a1S(s(pwgV@PJV`pi4dZNy2*Hb0VWY8ww&2UpO z&JuW+v&nGIhd*-}|Gi0Iw;hyQqA!uOtqmuK_b06R`Qm%J8qV~^HG6{cB*Ooj1>}j@z$BY7en5z z-JPe#NjQWVOQBAT6Au+J6KKs#)k&e3S6_WK_bw%=MqG)?OR$-P4I#FmKXMGMVIvs_ zRcHz_7_*~0gr7VEOMb`Y`8Q7?&FAPsoG1aBn{JC zrmoBqkmu9lA}&+hfRD;1Xh#s*V+)CSJ5_Uzixb|UKGsxRF>}jn15?P(`ah$yLm8?E zxMJswZS2UgH)@)5CTRlm+g)1M&(~mJ2{{>g{xQS$wxUeFzL040SuiuEe<-c>=Wp{| ztfZ}HYTN#Es61e=&5ZAt^Yfj&`rQ-7w6BHXm%*F?Bt=7qYrgP; zp_i8r#mJ!iT|UB@*Bu#VqOVZMIU!H`wcBGK7hdE_=OKUR*Ss!z^YP)6UjzvKuK3BC zgqd;NNR8w9ZRLkkMei(oaNDmfTi6h7#G@f9Vn5B#0$bST{Ft43vqKHrbxL}oqz1qD zf@ap(i-+m`5%|PO`k$-*3Jklqq5jFCg$HU{Ovjk=Q!Pz*Rj}-GO5U5&^po_iXRMlc zQGtyKKh=*`?8ea7bS{WG?`Y6KeOfAV-}BMj28F~%b_9I1j0<5YFo9vpSW8@{CC^mc zHMOUlEQ0@vO*;VlG~@$^*NgjCt`!e86Ea_P&Aj$-zjX-zOjA+eiF4k%=^@}fMlM9B z|0 z=hjXRBzlgWB+b)9_I(0dMn2!GOP3fvQnhh1YaYH8&Vtr>_~c*o^om@oTZMRFCh|IY zc*q#U4%U~Wa0dM-X)leQ9*ALLHygsRm*}=vF6K{qO{vu>2|mz zL1Ws8zp*qP;(B{ev5&X3!rvJmCCk}REr9Ixh^S%XFT%bgsY3hK-4f@F&0Rh4>M6k3;ELVza$j0G!&<89 zBM3HG9rh9!Bd|(sus8L$e^m;N++PmuDB)nod-PQLr&^y>f}l$IIl5lrCb+1vEaxv} zo(;;{!Sr7_U`I;?3w^|tvvRw+2F4|JeM?R))UGiHE@Qsazg7z{deoo?d9>mN(u{)jQb zeO|D~a`m*PUqg*di|}U4ao}5I%HAi{kIxb6-MYQWwqtR*8HQ^z{{jc9fY4aMJF|XO z=l06l)pGgwTH2g~TGLq$cg)M%6Sr#08?H;tZtbO185>*oZXV=#-d2fQv9a@@@D|9* zzz)o8j~FA zB^2$sEoA!^lv0dC-`k;~0{TJk07ab8p3mqNQ&OdOduFnljheeXHk{5pc3yaA@MW3q z$SByIU2~pRgW99L$ea844KGtef2t5I4?OA;tj8>jq|t4azHMnFzqLO+s9Cho8$796 z_}poVeXCVmL}WO_qLKKbiDc=rpZ_ViE{4+Ozz%4#Lq}Qk#7gGAp1DJmJG`=T-Yg}&nB}xcN znCzYO27#F!6b;r@)Ue&~>pJ?Yy?X#wkP)_5l=zI}Y zmD`nw2=nG*L++rkx^;hdfC&w#>vyqhU(NjaldV~XK-9cNHgq!JxMweC(3@sQw@>E9 z4YQJM$!;DZ2bO%+&y(mSjp#*ef&R1|m}rUT@=$w|O^?c2FSG=|qyTpoXgCbOo;^S0 z8paKj=^~nIx}Yx(s6topmJH4~70T4)tq8brT@xy%FLZt6waSLdcI-%06eNfZiudTg zVwKE)AYPUQU7j!HyR%=d6G$_PkJ$dznrc&8mNgPw)mNQk(v> z6bw=JesOg+jTgGu+NTh;U8zDCpx>u~S4oGBLB=;7TCWjJH+@rrmTrxTCA>Q2hVrJthc{FJ$*4Je>3een4Pk8Sq{z&fLH`nRhW!L+ zgt&HQ+TOfilgP>VBJMUlbWP=;|J>?JbK_`FP^g(B)$~p^9`1%5I+WLTq&D!w=JUOk z96giHB`oeooeAN1QU#vk*Qz>CX_caP^i@XAVEmBLrjwSxLP{E+0YKz^`MLzB=07s? zk>@O~Lw;4~*T8I_B^9H<3+@u6jLtZZqY8H)IQNri{!^jSFo}SFzaD$&n(Aw86KwsX z*Jr#>i#8L|*NzZgN7YJeyY@{D_8jrgu*qzU~g9a{)SF6ukx#Qk26yPzW zDy<~xnsvu9=?{e0+mz8Y=4T?eiQVl3dOX#RtAX-~yRqIr<1OrmuEVvAuhCCIm-Y}I z8#!6;Q1A0I_h70w*huc)KyuWtRu^j%XDS+I;yicV{=>Y!k}s;X&KqIG>-D}L^2LX} zzj@ZByj(Rn;)_&iKNKi=z6!RuJnGOu3zDWZ}wpd=w~-01ZJ+NHQl8`c=!h3dpyWgdIeO^bR?0%pX5#z z?$SH)2pEq<`Lw1QGd*;GY?b?`+g3l>s8xxj!?~+Pq_kd1SnhLK`or=^T9g`?%*8NeyDQdfAZjjjkP?}-kS4um zIuuD;2O)9;m&LQ%3*I|F&9oi=5_yTIN{2BE-R5FxA%`e6|g|EX?~KDiU+oBj%OpSM;aB6&0ou0tE3iM8s? zv7P4`S<5I z=L%-5oMwCEOMe0@RfYDsRc8Z*8!po$;BMNoSjVcC3Z`hxe5?*7j&+_ro15BhgWx73 znlG40*enrwcm&czu7*I`gwO7T{`TKTd3$VCOJRPk>ll_>P%um`T6d^E-`3g7(hcge)* z_{7@Xf@@lTqY5fzT8~ei@5>guelF^#+O5w*Dhs!TiHZWr}Dt`_? zK`SvZylE9%wNN<5DaAoGLVd)ckL@QfKt)Sqs#i@D=_-l(^z=8 zI0wH!AC$ui7Jqli+E$-y0rhvxseirgqn!^o=MfPO3Ug;N|Ph<;0DQg%f;d_Hr z_Q@C{1MX{S4d)$XOJa(o%0leX?rJzYjqhz((N-?+OL}G@Y}melzQkn#M9D}AB6WFe zJEq6M6-SEol%^EKismV&A7qpTEbGFu`yEU&ROl?cN7XJdk!Nsr9M@zhN~-Wt(UnH8 zF~%D4^FId*2TN&UHo~{ljb3aN-o~!EohM=ieX9}KXXhfGzar1+SLUDOoM~f%*g|81*yy6eCl52!N$D~9v#FL}^^O=WKlazZQCwA26G=g5CTkgPwd+HdFdACB?yf{Huh07W#_X$i z#@(0SbRJ?(m%h_NQ=7`%ocYOf;56%Z%>%8_Y+MQNQ!f`_yt(`1Cw{K_9k5jim!{*~BAy7|YC6hO3@k{*sSzq#Tix8pp9`mLIo`KR$*g_gYdI z)bzq)`<}oWym|}M6^(4fjcgnEZgBs2w(J8)j24atPWM91q89W|jA>(1NYj^b?mTzL zKF+eT=ZzMB%Ha?qyZm&(A$)7|owT@QnO%f<#tYq8+X6-XO6zYI*?wJ|<%DTHoctIO zR6RRdvic@oz(~3KiAHl%-;qhSC$S4%`66!9rN8tW)uVfBJcDed3C{aq1LzmZiH-j_^*OIx=8Qm((!YVoIV0JCsqv9c#h zff4z-MPl|##@E`YR>3R}YV|M?GNP5xr7xn|@aH@g4Y-*n4R2_#zEs3k3QRT~lEp_J zK^zn{uGMAapL`Eqs=AKc&`s^JF{d^8DY_WmUd|`hTlJ{w8{1kFP4Ll;JWKe8h3OU! z@w_Lr#2LAW#Y)d^1>2Omz#VFX4UzQ~dgAMP2Q>OqbcZNm(~nXn;}$SOZsZsd!@A>V zZOU;Oq*0V-2-(G_KW^|{9(nSEj7Tcf-67{nSBeKU#$By~gvF{U^o>~|lq=|#i1S+H zhHI_D#V81`8n;dX=4n=tjFJ6rJ-OV2F+R%%zVML8*c)^+>bY^u7=y3RRNNe z^t=IE;gF&qG=gM(q}H3}1}Sw*ashlkr`3r8zO+-zH5C+d);i^!F@j9Z%^|P75uAy- zt^>r0wwVEqeo%%j;>naVveASWR6#hFl^)uN=6-@eAmn@|G}{yH@O=%edn$K*)!)w zw@(wRR;jDB8@Km#k2Bv;qRU?)u!?NRU;`8x5zKh6n8o|$nvJc{vU9)NvPW|r?Gm5a zfo5}5l~>-9XCZmRy{Z=QCyQHery z7vC#b#>NuL^9MufSAK&YN=7M>?_c?_`Gt?refOMKMV5q{45HJmhW(k-g9~lCJ^3}S zz{PpXgpHIx@@Y+7cLFSVv$Bqw3;%r zjqlp8grXW1rw?8me8i(Sl^NXcxRDBf#%Vff^N3ChCjT!(3>+F#f<~z^z^YV@FfBby z$^3EGN9U!OpDdNlN#hHWJMck0e_(9|ceW+;nU$so`;g}uFn2ZVisV9r-p{nvAnDYT zhi(N%I@Ii*+9LD*%wsppC%+ce^0x-~PSHoaH1^;(j8fA$)%CfbDf2Ofc~M}q(BOI2 ziT)SE-lFfTZAuf)!k7Qr-gAYdb3IviI#c^y%ogdGuUXmzBJ^= zZa{4c^GgmtqkqIrgeotD8N;kX1_eWr5e@-{N7-ME!FAQnKQyJ^L}a;?68Rnus$cgq z`x*G#^Z6gf>#6ehq?Z10so&Gxy*dtQ^9W9A6jNX5kiu=%*3*o{I$>KS*_XtQnGB7m z4P2$8AT=yT!HX4RZNwU%lfO9AFz-w?+sC>)a3Adi|8v1B?drs#P-*wPvNZpN8$?uZ z-@pekcf8D3icvHuVZUM}D*62N1J`R$ zGjGT!g57vaeRivzIrUsjIJKnqGx#Q%7oYjAuWzv7B2cI2pfp4EN7s^ueZu>mKmGGu z@+aFsbp2-G8Sn12Z4)@>J<4UijaCf@G6IL9sK%K z(Ei07iZ|4jwt1Prk9empU3f z8l6OF{Go00vgCcA{7v0q{^Z8Nnt(WjtnFFzv4q(h4I%T{NCDoA&?Px!+N4SDJu^z~ zEt6Cd>1Y?td{*@bwyKfbpQAM>xZlZ76-W#A?l`z&z>>1LTVgk7~irOl;mR#(A zM149imq{76E6XT^v?Zt%zbJev3K5{c7(HbFT5fOtkvse*c-7N1UG%wI>pC6z$){mKO$l3pP2az+?Rxb1yvg(73X_&X zHO*2R?fd1Dl9mr5t!IGpV#;($ZvGwf40kiGC;Br~?wK379UrZ+OgY{Cp#dA z;{Z@$6+5k4;a@lFnl@2;cK)}u-kxR7>xyR7u6?E6GD#^E~(TjJ;GaRk_RQAMFQ%TbBjgAK5VDz(Dpoys#4 z&$-lK;aw~f(c%5Hg9os0?126tcgQl$O-AE;bszuW{R)LVG0~g3GOa(4FfNSvVxSEx zZW=uKaQ!wiX@jG9vc}olzDwnszyq$GTQ9h$31YgImB&9fkh6{@uSh6HX?xMT!gQ2a z<6tOIAt zBMwA?!^muB9`O={Q9GG^&|9=s7aH{kJ;IQSM8OoA^E|)qm{uLIeCv?t`Vf1g=E6n< zwZN5Hg+MduZiscx>Y@*Uk8G$sAGdh*EXJ51t2Bp$iB7RqaXX4^5}h0F<+p9X_l1|A zxNnjkLsss!GZMh!I=6Q$;GsNjDXh*PJWV9y0o!Dp-X^#Er|4;>(rDEyhGWNm_mmx1 z)=}6B;bC`jt4!qHX)0@Jy3wL`65qPPQT0kAccf<~c0Q3F@yd&Qf_XyNl-ZX$9xk7$>{pK1PCVIYE7PTU(FPK2GT+b7evq25K1~w2 zFH@$Avf0Up2l2EWW8Vs zWb55sFYKUp-c#tOi&X80UKf&}Q6V>D+F|Yt0;;C>jXr)4pM2BW;!`UyXgIIlQ(8fJ zUY`eyJcD)`z^eW^i?*NxmWMF@yNx z=3tMKnA#260DJqCH*^Cq9b|Gr+`b3?hNYwOAihcQ`i$bpsOZ7OF9*ru@)~)vi>fo9 zk0`Wv@3a1`CjL`Y1l`~Z^EHGCWQScadNs3j@fawvzgpp128f_ITF*jOBlL6=2@WRT zfAh!FdrTM0N36LHnni2*!+s5}0Us2$!#h`OG4Y`CWR_j#TOD&0gJi@Z?8O)x+V+bZ zX_~(4F6WK_%nPoghcSEvs?-Yi4+3HKF4{$BL^%wDG&fhQS`O3=otz}ol zRDZEQ>(1ICR8sf^HXScofxC1FYmJxT6gf^Y!3UM9}ZZmst`O1bdo2;sehR%-cTcC&e!xy9@dgC7G7k_g752f7Zs$>(2Q zc#rfGUP(d;7$bBwjTUhS&?eUjGRL7rw||ITj}FrcIQ@v^z!;GG{(kq4j#vaS*FYg7 zd9wM5D(0LU^n!-%qiHV_bjo$>fg?lhOthda1TMldf_W%!T+dObxI7}jviLa!EAsDG zv3&P#NtXOEx=~HN#YuX(7KDj$0#`D%0+8h-$S}|9+KqOX$UZeY00V+3Pk}7m9w_#GM+=xK8OQuS zWHGb-7VN6BFaDkWrm{0W_bh<$t?p7@?DaqXwBGna0;z0K=j(MP*`@AeeEmHs?_2O# ziY1p%Zio50a>o%|;2#uZoj|p$U9h4H7P$mq6qe@gt#__op$8Pk6-(`3N(e%b|Mv*I zlpi&727|dN zi+)}0dxJ;C2$Ti;Sn;$xVg1oPc1M#W58w;;eSan~G;WI$UX#0ze>f#yxN)NU@$qsD zuR=c#eCpvW8IWpN8XNA{|M!jk&x=bd2cE}UDriTm?f-is|2dfdpTAJueG}mO&yOM4 zf8Pp1XXL-f@^7EyfA*(0R5yW*s@V#2760$AYQPUaxhV>t!w-OSdNKwORvQHR4+->T zPRu<%>#O+f=sG6|cK3_N48x?QLS?AljIJB`|KRKTDt-jEsk$l8%)sDCASD&)bpm9S z9mo*TQ^HJ%%UHnET94E=wAaW`wioOan>jSqW2NDkBol0(IuxXryhM;(AMbrBGp-p< zkeTBt>5?_TtwJ zoAjk!;p00q95dN!;+LwJ>*j?qzk{cHG~kebcwpd))xiEOUPUXQwS@!zD9-YucMPq=JjSok<1r^wlUcz)L=h0G{PnxV;cwbEU>M*6IH2zd6ZLVR zoFx7F#+!nN#7P4I4ZT7@el%ee$k?a;Q@@#;e{lf-Viz)0+lW-fYUM|1yu2* zxI6(vwSoW>B-KXVS^u_pne;6f5LNwGA$`=QKwxr(hr)hOfro70Ce|VowW#{BO0O5~tg7UinquM0`f6NJF zpON^Im5;1%N8#m_f5Apg1UYeO{4_KKk5!|%tb*v39{n`PuL=ir+`h&tpgFkcFH-xXKUi+W*hxb72vzu3sgY_ zKl~>1`!jaQC$FSOX}Aq&giNdzG+f_$N967fRHm!I*cH-t3Ovxt%2qPaoU0obXycBc z^Ca+hT%z?I6xZ|+AirJzE^}&S{zHt8tO;n;n77c90@{(?I?zdh!`sza--%YVSn8Ze z83qH#yO{fn0a0Ub_v22^2dR~3Q%{+X!zIq~JH^rgzOLh865<-gM*p*;12KT&q9EBS zbJ^3+4wR{M;J4#W=Cnf$tfXq8KftTL913JlxnCJHJchs91g^Q?>$o@389+6yorC!l zppeo&^|@~^lgh!d^)8!kfm*v><^vr2v0uq~?d7Jn)KeC3tC4er8 zSrur&$y7y9_WVHMz|NUdTHwn+0wW1+>*Ok75 z?5S5v4w*TvnBnu6W1i|{L@vW-G%i46&}{0Blh97P;7k1^_dZ)mvp729SmV2^I)i)# zS4c-~!hmN#iza9CJ~Spc4(XFZ@GPncf}@A2mo9w}Ap@sy9z*v@tq;hd{g_sBrus!j z)T?I&fZ_IpJ&UMcK*et=%FunO_b%sW5eX>Ub;J@EBwOA%KE-R8>#trw?z1*is`v3^Leo`|EN5 zA??Tb+XnJbnBpW5WU2tZYv>e+Ryh+!@ze|Lf;cAvF-_Z3G8BXf0|OjEv-lw21)H;6N3S4vLcLDEGH9#Y?3i4ifDmm*pMa2A; zAV;L5A9(L(f)t?xp}<(TmxriSU~;=Ju@486HV4~HkRwmF*Kc_P=E&-HMJhSbaCpX9 z+NbCQ;37M)YjA{P;$b7Z1BHNr|Ka3^Dzm|*SDnCgI;N+$0H7Uthoh~~yyOG~iRgdE z?OPbfU21n28F}hjE=&Wdskb0QOq@wzwWQI1`-H$ne&p)B*RPG3&3cG?^AWMg{)P}n z%ua6`{s1^so7`!!_x1qd$H9F%G!#<}xRwHA(+33B^m+%v+tK+e#LE@r3_QU(Mt#7; zjt&uu6a^Uo=M*qp_^DmHeVe_-X9r#7IWL$vE%GgMg%CLwXEwYKs>cbi_1`%UqIBvB z;-cStsqk{=9zgp|aCOu4A^tmQI48Th>7BVGPnqNHFabKbYBi=~PJ#ES4#ELdW5?b% z0(}1;%HBFE%C_wrRY5={lpGjBKtehsC6q==Km?>2LZqamK?DiuE@>pBq@;#M80jtv z>5hScq1or$-}CIX_uAju@BRJdTCl+3y3RO`UmaoGm~q~G?$~t2;Pg#amjB7OI*49N z_q0PJBYpKLs+X=n#NP+ZYg?_}1#=7?Fwup~BNE^juF5Hh-r`&N!d{A?y9C!!Z>JzF zwby>@sZ-q)Na*IDqHq3FySNj^X9VO{x-^Arx9)L7jb*^3BoCq^YXR`XltLzUif+E1 zZCoI*M=YB_@bEEY^fs?>H*!#vdTAYiAu&fz5xC|Y#`;Vbm?kkkoc9!j3ycmOcrHo+ zeB;!SneVBgM0B>pIp7&mlnN!(G;ocTt4ICMs;LD(&^$%VB ztp&@*f-m?iQT@BuD#R!P?;BAs_`zfr9#kHtU~&-S=&0=*`oa5xQcq!TIacihiUrTT zM@Rqj-^7f|ZA*jaYajtsxn|n~*^KL9GX-AA3_#T4R_@{K3yKBJ)q%K8xme+?;>sED zmYxcMS6PqAjeqJ$opu1S<+?l7r)@F#e&;uxTPo*lOS|A1T#6~aUz;??YYal_k35|} z?N{a=8PM1pA~6E}3J2x#n7TYo{OpoA*JxTaB_awkwqY$Qadz(z_lKOsVjW@olU3UF zN9&);9SEcaS8?upCB>A?w@uXC4y{hc9_@0TM-(66h{dj-^#&eYvv-LX=HQ<>>hM{$=TGyncT_ zwLmTej{}rauqKmx*O>7Iy!N9ceJdc-)sS`oN+;6}Kdvt8Ydh)7Uq1(lumWm~99^JC zOKm>vv>rMEaGp1z|MP5ye90l zuQNid83e_X2jVXM5Le6q^X0_u>=A@!Ze9IVy?}CRqrY$EU>|MFgg>bb46wI+7IvLmP z5`8LL8*3#Tf%ke2!S{ywGKz;-;sZ!Qm+pd_6h#bAdy}MFNvpbHRm1x+Yx!Q6%U+f> zz6fm&>8UbJmGXG)NW!}GK`-=?i+P6VPlZo&!CV7fP}|>2>Du<|>0PmojynRh>*Xt; zA>|rn2Zb{gCzj~_)6B?0J5g{79}nll0GjRrt9@%2W9fE;vs=C$o31$34mWLvT{bBi z#oj{4Jcw(BI!A!g!#9@oRN!?}w&i*`y;ZuEs6D-k{oB+Udhc%N&DCZXcQ;y%FIQn+ zmKTHW0chF7k{{H$Ob$bw|7E!U$IcBRhusUyoF7vBcDj}!c{u8a&IBU|uZ&j9VW5+B zr5;)l+B;ZAT706otKuehI+`yLFW$!fU`|&m26ZW?wY#Fw_V*j18mp8KtoxLptk;t+ z-TujGcoo+G9ULtuvbKFHs7dx7k{ne0jj&wmy~?HGKB6Eh`#eSuSR*vU6#-&~$*;)wBl?$sev zvyu}T;5g5#7}K)@I$)U>dAoF==BG*e>d;BK3#Nr53>VCC_Z;a3IG=8~!P1hFI3j99!`-NoufybC5y>r0rHGX54LcL3wn`Nss?a3%T{8unK zR}}fBZIKeUOzytd>2Eb3zQfg!8VKBRLH7Qa_*h0yYa5wf(`ET!u zq2twQBK`KWgr*U(=ip~zJl~(stVv8PkOh08!ORdbn{I*~LG$i7u{&n>FIX-=QJByj z1hOVm$c4m#OUay?wtU~1O~xo|z)kvLP*|?yGdGzIEF;SBnywa5CRVeD{@wlvo;5jDxnCpF31T+u`eIx)9jUa$XgYalb#ZX=@zEz1k)M=%@57R%+$^MbtzN_rxEUVF_36e8Yh&>7Ud^I) zz8Irkm~rU2h}=s$ z*~0*AaQ9gJrEHWMiX1u0sq3{3qw9g+?XfRaIZNq~_sPkTc8kw#Y=IcyBuAnhgDXCU zGsDmLuAZYf#Wti+LSnMkZ4Dck{_2u-gkN)&GiDOg#VJd!2~|)HwBKKO_Op$<%1S4Q z^fDQR6zoaD;kC@9*{?qjlDwclY$BB8#|?J>+sknKdY=z*qEj<~^LtY5-vTmcK_7*t zd0xo0*uRWCg;8`Ff+DdrZeMO$kNdp6(omr=UFbs2GF@WM@X4vl$OVmgOS>Fd%)#*i zWIeL3X9V2|g}p_Q?_Or1hAKg|~J?OPoh%q5jTpjIAU<`%@81nC_VyzW+Y=lyJb{ zAi`S5ZgFWtxvc}f4hJ*JVf!)jkC@!pm%|AU#hCWkw3}P!UtGlmA2;E=rf+S(a$DJV zIM!QQjSPhz%5!}P7%_xI-)U6v%+GEfrJvT?-%4EL>CO@NTgW2(H19lrAg^vYP^~R# zUllFBWc2;3LbOBN7&stx+!>SL`ZFJ&UK;#B$(hmYwbLza@D4&#S68Gu-$n)^?!(Jp z)HObk)fwfrf0^8j4kUwRp?U35vCTlvRUa&qz=-bjXJz+B(49j2cCC%h-xz+dKba+y zTdoum-0d;3AOU-{?_hhf@{Qy=Gv;_C>Mp5uz?ddnu0@RfPYA@yO z@wMY#<*cc==MjixiQ{PaniVJ5&b4rK{eu2={yk568m29Hj1XG#0^kpZMbz2{SK-%UXu*|-QGgM z@YHQjG6gaJJN)llZrKYe{YDcD!~XAvo-P-oo^DdpT+Fd;sMKQt#V)#6@{58e{v!@pwa||(yj=q7u6<+n zxbKqZjH`!dZJuwMlxW>*!rX`B(FFVeXCi+KGcB=(Kh}XFr<_w8S&gUwquY4U*xQu&1Cy=l8Fo% z|FVDLBk#7m_GBx(?*jeKSQYUFZxgu@w;^7qTeXLgie#OelTZdlhsK4#xL_UXYs7&S z2}5pFIPbzRJZ7n>!TZ+R?sYJncPOF4L^OPqECYA0Qk$0fLvE=I@e=J>AJ2VnrOnkw za~Cp2GcytxV|W8vbu#{^AVcKB99gSm!8=F+K|`q91AoQBb0VL8f1MBE83HSmv$QHJ zGH)RTl24-Uy4%OQ&mPrguw{Um6enT^dm=9@ddjBP?`^{Z{TR--0QJax6^PWIPFU*G zR6zBM0-)(Iif1&2&(BvOhRB2v%&gx(NInt$7P`%7UOZ@D=-mIU|I z(_h>^yn*UBeQox;^VHuW;!VNdEByujwOpOuy#~Br+UMPZ>AcfX7nr(#d~n>bWgq_O z`R9uTn$MEH)H>-kbNl0gei{!Obqm5dJKi&+j#*QuWaVy5OB=o&jMMmFdxJim;2WXi zXEgRJUYN5zCgIp3k&*%(2cyUN6L%N<>q&( z?B8h&?>g8%vcWWI`zadw^Wb+VyxVI;_dBq%Nz#f*6I1@_m{d4;+-}f$b{`F7cMac7G9?JLp%X7e z^^bJ4%&1G-wHwtIC~YB=GO91-Y$3tgKOk0nE^j0i;|Hdfw4*2ZhHn3jOfoSZDVLql zgr)_ar4wqe7~&sGT{4Chl{WySx_x45kAWsGF@~rX4hV*!lj-+tsDgxHZ5N9iGf2`f z*waJ0GJ>HqUkpSD>w^P-fupdK?a>8sk>z&scm|XA%VYkD2d#tL4}faebXH7tWXvHwwJBK$g@$#;+EnE-i! z=}1j5AxO0oy9TUoHmgp_+F7mm)F?@DXsJi6xCI1ZQT^8$h#*#0qG9gRSkJ6RCV3Nx zb?L}hXqz>9N+adO7lRoe6-BTI5@=xbpMKm;CyTtuFea0g7qhBj$c~67EZsz6*oz{n z(`uU>+W6k4I3jK{(L4}EQe?7EL|6AeZ+S=22@Hur><^~yPQR7kOvhB@Uw8ehO6)SS z&rQIKJS^Ua%Hhg(l+Og*BN(l!c?FVgO3&(!TgX>gI$3fBXag3Miz7D=?n>j!q zubLX@x4_#fYR@ZZT=|-{dYjslt(FdO9WgdtT69ACW)KBzd`iGa&G5Db<8mmbZegZ~ zL^x~uytEe+vjwiR>Z|>>Q6f`+N^U>58RB+q19T9rM**Xr_cEJYyzQVF5596dNg-bZ zfsfV1pq_QCAT@WS-y7Aq-;l6p1x9Z(^CI;*56QL2{E>qA%j+MkgqAfY=^odNHinoy zG~9XQEzV^%)4sGM@Y?hh&0=Z$`uX=jKg=LULoWOk)6qO_ku)E0V9!kR;EMs$%LRaO zXFsZ-Vvpn4GLCp#!a{k8xeJ%+`0?X}UAo9aShN?cJHNe5#OV4=e+Z=q`W@Vq(UB;@ z3c@kwyCSDVk{^xnM5^=QD{okj>iJ&Wlw8!MfH|t1>1?X=;N!2!)pI$PttgYs8VyIY z&)tj`7}l2g`ChgYD7$)yt59sT0!0zH-$JI8{&h=%(LFGK>Ru&HRj{$cz=%VmRSqv| zPhEf|VGTM96m^=~UV?dMm0$YURj56g_WC8Sy)N0BC%XPrTR{|XVqB-9Lm~c(!h&pn z7R_ulxl6ko;sK1@Y_&{S2a}1HCd^jzs(>zhm9SWie zrOTQD4RE3|Eds1aAmoG30+#cDCwaSu7V85zp57)g6KT51)yQjNihWZBaz<}#_Bol) z;KELj%!*)kUtf~%!N+0}n_z>55)L%FJq{F-W~}G2nX>2oK&96H48Ijt1(@truS+oE z(*;`*)L>cvSpcVi3GGFSfl)gG_@|8tBC_=ieyMKw%E>j$w^2gpJ2ggzk=Z`^?M~bt zZ-C=LS?Sth<`fWX@sWinNwHEigPGsDFyg`gOSSa>x5)!BoQ|>$8Si~o(c#FPJ52bg z@(ZgIvJ?h!_G}Jx!@%m0+Dt@%Rn~O2c|jxZ*@+kVw!&)cr%9&GV6;y!lp@`=1M!JR zHr>s1W%mf5`?E$FpmWN!_ao4zq}f8`skBVWl%_b5)L4&KV4C`xFQ%&f45GV8@tYs* zfo-KB5vKRqsP3mMBw|pWhp_0-b?`ZEwN^m-hH(;m?Po)t>Y?t}dbVTyC2Ti^>$MIr z)J>7h@AfAG_{#+fzcov3`YF_*IFgSY8MHdfhq#ZHCy!qNR_Q7}Uwc)Zt2du(w}Yac zw%Zr57D0*@IJO!!gFI0!!;#Ugh;Y)yTDRXtDk8M(BLJXIUULD1Gdn(o8Gy)S_oSqo z8IWy?-wNVffuJ_Limx|qA&H@lh1CWvoR)+;^L*fBz`tPHV`v}2ptnStW~8s;e9HdQ z0-D=Iw>AnYhyvx&!RC6c%JLx?5V1l%l0++a+3_*_xbPi>^)?4o?@^fk&`1Dozr?wX zyZ$-b0Zfbl%hM#^g{$!83#0faweRLw4>C*IYrM+{I`$vUg{B+}4FCF5zON8{v;6p{ zo&}`9MN7>7byl2vsA_YFiSd`K;zO=_NpMR#+6>WoL<_!<6NFO!OO*jn7#=7#OpD&; zBuZW(BMl16>$stA`qJ&s;4Z(Jaga+fO!fIuJRw?JYp;!p?mT z%E-dX{#_AAt<6A|dO<}`<<@vrShIpy+V+pnBY2pVL9-^o*>-Qw$uZ8vhNPLX^1;Oe{>8X=;BqGsDa}e02+Mj> z3h^L&KpVZ^HWFGT1kZ+Lg$_fQ+FM9PqS=<5Q`-iz!bU+^5S(+{Gg|NiVWSb*FI!Ux zLVa#{fcL|2I(^*)4lys5fHw!HybAe)ioZa!#_5qJ-t|na`_qD%FyD0{y=3;{-Y_sk|wD z#fxd&3KCc4m(>;aqk0Kia(jX6SB2K+{)0UBW(-z^Gr=6_S9r#6I2kIYr@<_~`x;3| zpmuLQ%O3K7x-e|O5USI>`>F5T*i2$m2p9yH%-R)7vrHb!2X7LHg&}whP5hm&FVTC! zbW}JR&b*F{-eMId*j>!xfvl!~ncTbZv5@z>0)Jk`?o-nhW;m%$OHBv+3A;_SI6ZX1 z$aSA~_jh{!c?6wb4o7ndvQI7NhEdlbkUzxME&;=rV9A|i)m}py>8IfemzDr}N*IWZ zaW-^42zK9k_vQmzR-<|J6-H9gX|aaklTA2e>P$h&{uusTgtxTi9+mkr>jL;ig52kT zaLs@Ts@ttw1D;Ct=V_0e1kUSy4!L3!9naDp5Q}YnuMpgg7R=9p;XVHY)BjA*<8A{- zb{ZhtaUpMC_~^G@VTix!#GiNs9L^V*!#kd2*r>H2By5ECBhPaEa*UEAQGVPG5Ux;> zh^S_)%T5nQqxNMKGM38_KYoBp!!Zo?Sw5-@LxomK?jR%3U`?7u)9QTyR534HI+_b7 z+^tF4qDNT$wY@EeUl6yLoXN$Pi@)5=OWng;M}&3EM-uxjSd|Xp4)L&IK$*F&?%`qj zp4jw`{ANwLPMov&&N!Yd6XW$2ilzIVW}SHWZcGNHyfcMRxjc{U(N4gr0XtvAzW*m~ zV7+puh1~qaZ+i^*YoBc}ZOizobUkU=a5AA0jN6J+dvDdlbARsFoqH zHl66^whW^TSY!yNyHQWe?cPy+q8e!WMHV4-)6iD}EDSxVhB zRZy93ufhlxT;6*E1gX+h_PPanaVOM9@Lw}|Iv8nCM?&k3aFwiVNx|?A;c)OoA5H}s z-gbH8CKnT)KbglNr{YMO8bQ$j{{DAZIXYkWDdpa#?8}e^-tp}cqu+dzGUx9-R*(L} z!&6*&(fN!|PNj#-Duh0EIj3woM>pCelaQ_G$?clb0~KVxI1*OOHCoc^nQZ0 zgW$AjDhX|`*;tXqKfSoUQB5)>06Y;Z!_q`!nbk zINFK0yLn&i6p5}^9fLW#c(`EPoiQI`o0!ep2A>w^T$O}(0aP&EsbA!?6^6!) zG4B#EpJTWqAb_ttl<^Dr@5mZ3gBYTWVKQXh0}1R{~fe)9P4w11324VZR_ zCNACn()BIMl=g*!zly2YdsOWHhoGR?YyvBM%l(4@3RZgL8Fr}@gif)MyY}xbcQIyY zAGJw=;=xj|8r%2qn6;$F&ZhiCT!bowe=^B)ApbbuT~&M?1STa~^U) z66h_w{4IwW+LTg9)zIeem+J((+bW|1e*LA-1=l3QQZ6+@GWwf_YTd!t92#WQtIs?Z zt-}1vOlM|cn~S9*EhMaRw5AUgbVW=_x!*Zfu{o9o3#Vv%K8zLZA@e_ElV@IJ+5|Hh zd&3_vNlff39A;OHx)U~k_WlHi;8IfjVx(HK3FA2->CG*^U^ci~){Pd@gQtByJv;{m zFAdCw*+&n!SjVkySx@V97nF_CEw%b+7+|Q`8E^EgyEkS+DDRGl`ex)zIOU2JhsHAh zrKH7;Q2I2%#RzwGYnV;6$HNixu!U)*PR2jjIcyV!n)xqY)5!fm6hcB~!ZtD&8^fD! z9}~Y$#2UyokDJzJvORfuW;y8!>-wYw7cBL-@YnvXG26tvu~@=|Vk2A~g_ixq3{UST z0fO-$8Fu*a2gKL=HIulCk758Z8*b>t*hUKMkCgDCp*D}Z&27zLL0S^EgRuK>%YToL zG4K%FK7=v=X~2ofqqRmIVc(gIRI9w?8MdP0#Z`d!zbQQH^H~$>n#Ei}Kn6)5)WcCZ z`eje(*6dEr{4N|#za|uGy=KH-sbJi_7wOS^U<7PHGYp{|i+RZwDGG>Vy4KFCb;&jH zJ0g2leVkyRs@;$*g^N#R{beu1l?D0axW=vvHlJ~=>la#NON>+z&> z0Udv}8L3t7V5WQEF&al{3MBg&%>q59UY#g7vnW4vfug}lAI zFWpc4;Eu>IYEyV{Qw*QIBB!P1Hbyhk9x-gLd* zdK&puqsSEbZ*SZ#hl=+wf;f;u(jPx7}qo3pt0;%mRVXOhy=o7Wg&< z?-u0Z3}#qKBo4Ri@r%pS&f{%wF?85&r03bhGp-6n?sh`$x8aFM^|FZg`G3diFbjhZ zAlwRdoBX~OspoWc($o3tY%oY-x#_geUNFM`!9RZqgbbUj>BMv`Qq=`0=-h*9p7=iU zUS%4j(@oP_0YaA^xGX`vA;uK*6gcsq6ib^$Wu-e~Weyf3tl0|^epH%Tw}}efKDGN( zruPO$%k}XLIuDvGXAiNH%(Wt`!nF^8)YluuX3X>-C&vFEZ1+?AjFFUoU#0aj7a?}| z&6H@PAH^nbA*edT@Q)6gM+Ofw^_4J(`)x_}cy~m|^11^4Enccm;A)|81w)9<(Ee9I zXZEKqR#mjnfp`BCBDz`b=swTP)`EG~A8F%Jd@L1Q+p--_rvJW}|50u4H-wQT%IBmp zD?jFa*E;#_aS7m&>_RzzzNMO7{HH$W;lumCjTsz!^g+~%Ad^K+2xtr1c$Q8V!RYvM z3`GvLtm~rntOY$qnZw0zs-gE8#hj8S9+4>~nH!n@Yn1GM|MqGOY&l^7F#b|#&5Ff; zf&W)>A9DgQ7v;9XLVkwNbRV#HOqI7n6uB!94Dlo7AODewR)x0!N&W_{?}dX9D68xH z1pgH8EOUzg>&Jmv-U>3{iICi^TK|74Pye@a)c@g&Ej$=DqJ5O=HTnP12l(Htqlf>1 zB-n%5(0|!V|3_K=|NW;u04&0vgH!+*`|62NcWR_yj(>0#{W+SUy`b;|Qo|{v&`#Az zJ&;OF8B?-g8+svS(V+0HeXN;RO9>L;zBwM+k z-sGgGl$5t}zI>!Tw7oQcrRPx9UE*F!cewq0V<2bj?&~+7~EhFGZVZ4$Zdaao&FHREdN=Lt&&7C%Z+b|AHZhSGpw@r-qN!J z9=ARpd+>$bf}Du%`LVh>fK1OCT^%HHbf;LCZeT7?u`?D4mWPFx3b%Thb_A~$=wC#Z3B&3ne`{$4xv<~BSUeh@Sj*qS#uyl0a0Ftmw z51dm}SY7o{8qbK&SohQn+c*O{Rq zy70hzH&;zj_uc~YpOa?3DoyFFi1${V4Q1gxuM}n1uwI{F(xdt&$Q7baRfjrVH3G@& zT(Bp3fE({=g|lufhhCzM_+bb3xyZn7(A8I+*@$)XdSCfx=e%}!s8mb2y0UQt+>(*E zmo2lXHBnAuldXq?L_Q_4*O_gLVy68fYrU27SVB{M|IGqW^PSx?OuPh7-#4vHv0Sx< zs_n-EPrX#TdaEiONZ_bIZ7sl#E2?i31MwmMERaY&Q1Q%`z%0j2AHZo?jKORz!Ue|H zCkJ-4?A>I-z5;RR(6Bf8xW5j+nfKi$&mC){t=9%FS~b8lR$WgqUDR8+$MSR_cTs3+ zX&#{4c9@I9xPZ;+d+E8Fh0Q-gU(mpGfrHRSJ>Ss3BhWz`|Cz10TNEd6$l)>8e!#=| z)^d&_6dKHgueMB!6kZG#D*+<++9yE6e|;)e%;hhH&k-)Po(PB^14BD|yfO8!7{8+L z8UfbRIjzfu_!eF(ieYrB4~;r-n-+;hMo=8XY?i`>`pY+d`MNB`2qFXhTyj)T_JJ6; zfq$m`>g6k>sAD#3xL`VZ)7o~KSA9enq;ecIt%kRBRj15Bo(^~h(R_gXXFXok%mIu2 zlhyEt6_U-u)7SX8U#0(9owLx*HOFULQ8S+HEpiArMs7Tef?j_^g77={IRNIUSxqu? z%gc6NuFFeyyES%SJbS+`G@u}1YqU@pHPAMX(O-65$cx)r&PJzSzjN9Kno3$dhhJn* zyO)H9pSEq#0mziQYYv;uZoN=*?fhH&z5de=tm{0PxyPw^4yFUd60?$7%tocWH+*aFuY+7-sJbV0XygMChslzOP ztE^dK4|k5|2QjtQ#3ehE;BzeRlXklP_vV*Alfl(g(XMkm?9;0hXL-Py5$e+AOx`RLMB@Tng3%6F>}~jmqM^4hmy>-y1Wjhwjmn-x6ro>< z1R@RpbYugbDMPFi7a}TI$IXvF1G@Nz#g6VKARLc4E#tPP@V*ZV9c}3ETs4awsD@Vj z!ULb0A&IR})Vn0a>$R@R<{wNbOJXtvkHK?jV^9gK?0J=UHgew%!jk8lUW`ZB{KJmr zXrPL}E{P1-N&DK}o2jq&)jN%F70uzw__Lg>zE|Wj@m$lbgkUChec-YA&)U8RkRx{` z=*2X3BQjKE6R=1cpm9(t#Y#0X4pu$2-!m8MQZ=&$ye?{VzkPtA{n_4_*n|U9Mh$$0 z(_l-c7b@=uKwLw1jMYOV!yB9^r4;VB0tMS!uAY3S#5u;*R&p_Y$U{$n)!4IGULgs_ zIz~&rvBn=Dy%;n22z)-}HvG}Ql;WO}^BZ|=eB(m&@%*?Kv)u{~k63p3eTP4fQVYO? z2)CKNT}~=8l;&B#yA)%0;8$fI^;hBF>5VT83Gzo@<}`iOtGT+Q zrT`ILwj2`YFk!ySF}ib&yRwqvI1w>n))nPbYUz({t2z=v2h;~Sqr+jqjE=Uu2)apn z@el_uObt|(Hl}v7LqtGD?X?}t`_c#quMTdXuOu2tFdvGnT6z608xaA{ka~V@cfNx1 z&$rE1Q~nyoGpBl$eo>NaOExh+FBspkR#|pB|3=$KdUWT$wt>so+t;QsiD$TyC(k8* z^2$Y0g|CS<5M0)AdI1rN?aI@+jj(zzyI4!PM$9FkvQRDf-acD0KW=JKP~LcSjnj7D z+6mRkod?#f=_Qbbi_eqz0CB0Pk?LO~Krq;Jc|;YLy%NB`09;V|Rta$pg6F=y12ImS z-&w~OjFn2#FM>x!EdVl*mr^V?LJ9-MCYe&epp3a(_kU%c0L${L{*G;RNl;4w$p6Z* z8#D{_gTbiT>F>?~ri0QJ0A|~PE5I((=de^;^D}-@kWgch>O2$U5cN zELbQVN=`KZG9VuBZ3)eLRr;8tneZUZi66l{4|SuKl6$hvoxhl2XKq{)=nyLVG?!`vX6*NODc%TBM8mvl;# zkqHpB%evwV73b>;RN(1sLC@MxHwy;T7*ZeC_8!_~7yA0AHfB%sm@R1Y<9o$hvK*!r zFDzK;dtMa*-4@vY+drfwZ~@#Gcl5+r=Br(RGjM;cSQ&fSKPksp`umS( zDskRd+sX9%M%Z}^i?|EX`Jj87Z3J~tKRbE9uj0!F37@P!?>*1?(9XlIr**(FDR9Ph zydQ4wmE#G_r(Y#sOk#Ms@`po~bHW|&M38>Z5GQcUL}JRGSqyxF@#Kj5F28=F+AS@) zZf5?7VAyxR758xY;z40Q-BkQ#-j(d+@`F(Vj_QMM_NTtDRK{t6g02>0p??*8J$D9k zoCQ|lgv*EhoHa9x7uxjw_AlxwjJ$^asg?ls>S{>2^QDOHJ_h=KUo-F1;0X@0+0;1{ zGo~P@>l2!v;ROAV=-Ur_;!bAny#Q6fly)y)-DYKZRK^4>tuKzHN#8*B4ZHU7RQ z*!8Pv@b|ub+FZ9uu4C+1+<%29jK=s>&uKann-+z<(p(?xV&Wr&o2OEcn_r#!H{e@# z@*=C?f8%*~{6`Is8h&_~aV@swH0_EBrsmm0qay`C+3}`X4u855r#XXh?!tw>6^CQIaKiQl zo0tJb#YBOZ&xx%VSpMGGD{no8?Nn4?K3UvyeJKVNdJ5b@6>C6fmVOvpUdnU!e5@&N zDR<7l6=RvbSoLBqx|ndnRI2YW$q%!jKo4pAi5mpkPyVen6xkl{qgWDz@Q=u9GIQHd zO9kBYtXbf2Hk7CHvs9J5bY5$I)nkl=k{j?X0W*npCkg67^I*p1&_EO~Fo}LTytilv z(4xZITNzge_X}(vdyAe-!5zCt(G9&SKE)g)`5$cjKc1D8& zT-bAkWu@QK)&=9iO`tfqHG|PjtVQZ?e~EyjJ1OlzOVMriT>Im-xEmlJ_%BX$JIYY% z%NL-ZSdJn?7@Tcm+;M`F##lAId_AO0^G9&5=85=$_4^2)U`LEyn2WiclG|+ijZOJ4 zYM5@AMi;8yyU%wvIdZHD*Oq!%&G&4icuy4*fCSj>0i*WVxdP^Em4*l8)4;V?DcECl z20b~pwf9W*iJNEa!nni2j4mHcC3|IZfQ@!O0}z+=pwL;KQhIJH>t(v5%z?Jpb=f)o zBt48KA2|lSss;A(CU(k2L+2scUBHv}bwc;*xV9$NU!mOHxN}Dxc$L+BCMZXk2$v3k z$0^}bruq5nBL<%9sR5sHGW^*dPtFYwE~1NQf?fWMQ{`h4uLb($P7E8W?3)uViOM26 z8f||hUiUiQp-KRD^T2thdg23Uape+(jDjG_Vo3tm=?>^v2 zSq5rF30)qyyN&7y;9f#e`p^OA-GOj3@IL&k>rDu<-S5rpk}V8v8m3)$ei_Y`+dk1K z|ADWGhs%$?Lnn~yF47Kk-vhf_S1|ZL&fvFmhbDO>uN8B4C)*}*jZV3pY0w~70Mw=-k1d;-aHjC9u({wcm|cjfjCf0!J1!p|^90N)tBY46vpt5~`#tDb z`|J#ilY_v*Lx4-DKx zY007LEUPxM^}%5w2=Vs;8G}=p3)aSKy}C2HZSRx6ei5o#aJh21%WDr~oLOzpI2+C9 zE55*~_D4e~lA7IMctK&71d6P(XnX6qVHw}?dPx@()jJu|MyK7{qRwwn`jpp|H#X_& zZyYivyM83_pmri0C+aQ;p`BQrR=(RT^thojdt*9GReGMWXwW4K<0IP*HM+?5Nka;X z5td)I)nd;Ns$%lO1-Z)ey3Rmqs^hqQgxNy7ugcK_#rzF1y+xtqtrz-rfa5gq?+rs6od;`EySsO<4T!J~>lhThdG zl!*)(n&Z#E^%rhIb4Rez%xC=W8xG~WDG=mm4WM-l7u8-ZL{}P5!8lUybq@s5w0I?0Xm zCz1T{IsMG-MV~Pc>ib0Jg6yhwlw=93-UPu2&Chw@@-8~er!)I? z>#6QMD?7`@YGlpM-L^TZbGYni!mebndwnShjLh2czI7o&`MCGt8L2g`Y`_(PeYR6} z14E6@UJ%?Q^XcD@Sfbf{M6m*N&N|0zQU|mr2dBETmq$D$K9_dg{M%0-T2B#~80GCY z*+7qwok>$oI6M8~*3Ag=W7#PdOD~fWb5N6pTTBEPnO`PiPkCAz61F~FxcZc!rkhEA z^{u!sce`didmgw}3DZ7_Kr8Nur%JSo7NaDORl#-4ymcjdT`x!~YPac);b?p<1`-4w zjPpnBg%xh^eUjYz7*@1x&acgt1FXbWty{(Z{0V%jJ%$uJT%I)B4T`>l4s~7qA*YPi zI=y-e&||GBT$9Uai8roBqP^`tY8u!Vf5qt$uKU|cw@OtI_NX-@2DjJiwdl0c*D(j? zW!m~|zYb|UwlUs7`Sg1H(U7vZY3An1*tHFzYtb0AzsprW6z2nc>j*1xK;sG+VViks zk0I+6ng_zf+NIPKuXrOK2(?q@@t9AaS%=vqhyv4`xQKk>T0j$&zlXH#GMwAQtRPe461lKPj zGYQgxY}z2(Ja8b|UL!Bc5=CK8%nTnU3hkyXiSjKlSf%(a6tcyFO~Zw1zqrJHDtT99 z@%_;Ym?3xBiCiSs!xBJ{ZN8mq)K)mG^^q>$Y7~2D|2Q&3AF;H($r26oi52(dUTCwH z&!v^z-1pQx-3~7};OmOmUPQc@&1gjGYZv~n*te6H`<$Y;UQMV`%&74k)W*n79f7op zk)gx$ec=Hff`|u4p%LyU`<>ho%PS`U#E@JSDm$1rdBdygAI23cko)}l-eVVi{E;Qs zG+u))`va}|3^O?gsm}Q5V@ScdQ*gD>eQu?^Edwn|meWjbcUa0>nn5k$1ZfgS&BZeU zoy+e_wBmAYtrSw_jH-|Hw>K#sJ$mP(vXcBfBGjFUK`6}s(@oJKx0hOxevRF2ypkFp zP>0`NS=tO4l`0u7#4q)ILHJh1B&v6@r&?DtWu!)g%DNPueLcled0F^J(Rzt&t*03hR2va6_Mt$OL3J%sax{QkgyU^{n{|FXnQ$HLl~T zDHR#vcdv&(l*6_!aEZTp|MBe@Ig$|EQy!AnNwpFkNH`Nr+j`-JIZ6K{IA}Rv3$9L$ z*8To^&-+c8Q8q=uvQV+3Zb<1?c+ufQ&5C^#;J z-N0NcG?ejMEi%+)q9Zt?{z{oj1+R{E>F0G+8P6SYmI(KicML2D<*gd<{ppmNmxLR7 zE-2x}tR+03a_gz|+eiU{1#Oz<{zf4xM9*d2fHzm-FB2M_c5}O&kVc_9T~}`&3=ut| zHuU~HD7hj6P?Y|e*K=eWkum*0^sDVYU3{04^^%rk%Xu}MQH3j@mgwQ$-mEWaY;^a4 zE@ow?|D11lD|+s63&gs*jh}Za*FEjXkHPfv+3@4L{0N}KV|L->t?u7X?R?F%+Wn>( zxa7jsHGSfpi}a^*Y*RJHEi{hxa%q9F;9Z1m@J(x#1ALl?a`hRCI;C36j+)RfIwC1z z)@}T@>jTrUhn~t*LUp*3*T`2U)hTfX#D{^IpS^efF;Q!dPE3a0cc)(=D7(UX@Vmb> z@EIpI^RjxI^4yoJFM_$3i3`boc>dVFknaleTQw$`)UD5aoIk`dh*n>Fh;w7@eTUo~ zv%2)5#PJv)aId?hQ2Q)9++8_Jv#tAe;>3wg)kEPGwAwHAI)&+Jg0ekN440HnEUiv3 zg`U~7n?s#+3oEm_+0Fh4cJuEC851=jwBxwbYT!if za_mBT z9;8vSE2%tG>=%A7-dE8)p&jcKqtN9r!{sqDf3!LE-|yz%uQf`RXn>hRg+{!g>%P(S zSN01zqapW$i}jqy-7oA!s639Pe1wc!pRZyvW!1g;&w+k6ZVGM(^wcoyGg9xw+ve## zQi;pIwYnIUXC2h@nb02aO@TZm>QWW*3|$?P%lIM$E)OfCEkvR5)Zcd_|INT(kyLf9 zf{3tKv6b%0pyGX_y0wGPuIm&jQ9^CFRR%+*+_v0STjh78E&BFi$W4x%>bm|u$t6vv zMAbsZWqn}Onc=~I{gDq#YBU4$aw#&CNnU;X4!gJHw@;$IN(3Rxqv>)H0T=D4kDIbHZ9;1qNKz2q+?ERBcs&TdFMuc>C~E# zQ|)dzBb0uu-W|xgcR;-abElw%y#iu=5lSc%?UJTV`=jdZ6d}q)xaGOBoFlip(~7IY zI{%xWAztQld4I6=ljj3QahijyHqS^)9#qk1Fd0p~#K9dpFXVPIbAFdxY2E&;L`h)i zvN}TPPZZr{!{(XXm#f~+k}y4eE2jmLvrwO)2x8&`?KUS_iri+TdOAWV9TxXSMg>(X z2sZO++S!xG4SzFT^6IVmV*;Bz)NtJONWE`7pu)+v-;z+%YsK;~B#sa$JX7cvOFhf$ zbrCR}nHbABr(gQMer4NRsky3ed2}qJ>k^yUfdp(nTA8xv!g4?JQB@C zs4)<^_}xp@0C&tWz=6|zAp6d)3d(Yt#at!qq+U&PuYPMFcPIemCX>OzU{;R;*R%?d zCY!4ihr5T4=7iA7>ksKhIk=!X9@*CkE@+47p}&WJ%Qe_&2&-Ob8pA-e(QtHs`r$37 zREA-v8$0-I_*h+h25Az%GCCj65aE}rj9uc2s3q117fbj5?qcP0{X_vf)DP*Af6&4B zwU--JkADG;5qwzs4ic>Bl!Y!r?sQw$ z+fKm}(!%3By13>9RC+kkL$VQ1HU9K1s(E2Ee>KT;F37e{{#2Z*XU}{*!V~q2>@ho_ z%$~J3*!Ry@A`qr3dNOa6r%&z)ExxQ>rb&fsDXN)u$2}b$>E{0w5fT<9d0YWgzq@6l z;$eclK;!YWW&hcG0*bPu76v*0E8)LlV*dOA4+=R--`(uH^?hz~zl(b_+Z;NYOMiOK zNcMED%ZCS z3wMPBa+@E8Qp)C4i5$gMIGMjEHzkHrWwbP>RMB(zH+P2nN;s_X<(O^Jc+5 z&1h!pOxBlYHrR7Lr7iF~#>p~JOf~L8GdtKTzs=KWQ%uM@Q!qF@3 zI1552qJj$yoSf`TV({WSGsp{4DqB8PCF)M&#vxg+laxE8I4F8WBSxQCfE1MVC((A= z!jQV;d>hT?q*OS8q3Kt_z0{h^=49X5mK2nxL)n-7kEH^Jf)gCFxFQ@vJJ9mH&SV7X zaVJ$}u9x#b!B z9>JOWjo_haC^y2s^JuWPxR_eQkUZ$S2FdBnG5p&*-8(D?!S%!T5ZLFawc%^Gy-1Qo z?bG2j=-x1z6CPWZZc?}$0kp1g`g&`Mbk zzloQR_C}E{qL%dZYdv8~eba-*ahzSgQuHwBt~)d)GTJ>RE;;EEUrxNIRerPX!r(MW zB2Z1wqA}j#th?E-=RCvgL5&*G2^~t1@@W2y{EC!`y>b)( zI&xoi3$M3XT53Xi?XTKeC>Q-esKoZ25h~OkGc7O-35+jLB@=def7vDK_y3x&c?HlA z?uk1(mSn?Q(m?-PLD4mx(_865iBpk5@A#ES6GpnZQXc67L*g726AlD#Ea3?L0s z(x8-t0z)^_4FV!9j5LUJOP7Fj4_!ld3X+4QbSd5W?)hEU{k-@69>;s!*Yh_V6W`hU z+-t49*5{0^P8z?4u%&l2wPr;KKJWQbgMF$NvPCcWz7%%*ge)Ypu}DN|F^TucqG=b$ zZrk%&H*blRzel3#r@aw(NSI7EW2Z>LkvQDgknS5V15~jy$jQVVUS4)G_!tX6(J8D#bMVoBql$u*eGCtHrowbW^fcveG_T>Ie|Dt9BfMJdflj}(UNYek+_8d-hUYD{^yx2$K|4yj ze-#C6)DXEPm>sbxxPb6fhdc3Q@~xQ3 z8;N#5O2K2UHm*4Xj^dto--e3%y3HR9<9^f|KQ^aP-kZ>baK2N~4BSO}k_Gs<4TkQE ztNbd8VCzJ>bagjn3iYSq>@@N6!kz-h65ZuYcJWmAD5>$|-@cD_LJHTx%nD3~jIQS> zuB1uRs;zzq6FZjf1-Q!5M>6Wk+g1K?nc@eiT8Ky{(mWZ2KkEnt zB`P2SStHEpHrDl%t|PlJgT?FkpXltD|wDKDn5QsC`s*0YNggN9OYAr;Bf0y3A|m@>jBjV!KB+O)8s%Y}ZQ~X^9|o-0U0gmHhu02AtpT(eN5s#Epj{=VdxW_v6To4} zYV$YiJs#H>(MI&%bjWWmdY(aV~Ae~HV?k%Q^YNKXf*loJir^C9mNyj_xYk|SI>6feljO9Pbum|E! zqeYQFV-g>dH6%0>Jai$@@Gkf{p!P)e*@K5T!K~c-d65}7n9Z@g$bRNV2whVh#X&3y z-)9HwgJF!I-#UH@i53(BZCqi|UK>f`WR?!(+8l*5NVgp1w(x_NSbcB(5f;kbuEfH@ z*HE(iWz1Ox3qn~fLpZLI3213;{9*Tv7GZEzQHGJGpweCR9)pxIgq}wZMGTKyIj7#n zc<6U%%me07UXOUejpevK_WWQ4l>)wH{jejXGAqW`{Q#}A3Jcg-lfrp%4`~5MB9}lA zFu4JCCvr*o0mlUmp9kugubhCvmw%sG3u)0a`S&aI_#i(sRkt?PY2RX*_j+an`MRZ= zvTQO3w7?2zLB*6lgL9uIfGxPi$I5Mg?``~-8cc`XOjzkfkh)uX(jC?5=W)FJH6_1D z8pUY#cdXPwrIt#{Ra!WkZ6ySkgkuyI?#j2J{;BWt*zDbD`!mCyd0#=h=R z_=Wu5PIY;~SP@IQS~oNdJ_6gm*}QKHL?O=Mxg-e<7Nq?2AdJiB(Qu+&F#9KP!@1_r zVfrYT@$jQd7Lr`FMjG>i zDf3(v4oWI{$1qxA>|!bim6pX?tlF)z={g0Ij}~Z4kNqZq$azKBzVYa0rctc>`r;)= z_6D8$r1_Bt?>CEWXN5I{DqsENMEc8xoQTc6&qjT53udw0!?spK{txMv=^AM+sF9hE zezyyTCwn{`Fify$KU2RfTze^#6%i&EH{5g-T=7qlDSPOnLRYGjMB4E`(M4fzUB$6Q zf!!EOT5pcwL6nqq@~xQNs!bpEK{0R)j<4Klg!U*+EK3*dr1=k}U-F*4l&{>V1y3qH zEJ~#7nb0}ZJt})4@ELbY-!~?Ct10hC{hXZRu+{==J07#Nc}cSOq@+*9j=6E){rT#t zH0k)l(l;4!1dNX{X^V+B#&V`Qh|U=gnvpu$L*9>16>!ly1R7%-hMJ2z3qE$8hk}`A zyJ3WnA9i@$^a}C~8OEf3iVTnnDksR_`v-SRej8K;64#U$SH_jF5K2WZ!)%z^g|=@L z7n9yzCslmtXr(2?EE(7o(3Aaj92g4qRPc@^0+In=zu53;B7ga~r}uHbFRI}{!r{H| z7Ri<0d2q$tfa!P2w-RTk2yOui4HH%N)zT#yagJ|+1`qP-$V8(lH7=7^LLu|Sq~G@L z$95voN^x0Pr?D+prQO#}jK968Vx-Wwl>rxvcQbIVsh&)!vz%mSl^4ydEj2}XFo}m9VMz1S#2jqa;EsJ2 zBw}7SK0|yIi@46jORT~%HhR9w!YvqGrni0alA^XxIG|ir1d?AEZ6GZ%@;tKwTKgnz zWL_qOcP}SWt>vo)6Gy1^#RjB}rm-8c?c}-Wob+Psrm2xjMilRm`GSJ|Y6DxO|6SNS zG~Ygnup`=v_)6<}X@L--nk|lEY&ioG9aE3aGi3{&riU$7UQyg92uPm=FP525!V9fN zrcB=Mqg>B$?#-*vW`7*S{2LHBEBe|)hhH0&Upn2)1*O>CrtB0ouB^nT-4r;CpD>2IzZyuwl7mfQPtG6hjjMV)U&D?Up%V`o%>rGTZpt($B}YTVtxcY?R6@$iPu<$Z-QN?m^*(D30^O!x z)RYQM&~Q?e3F1vZ`n|LNY1p;h$Na#mly})PYAHgs-Tfc{Gto(thS$EDkDR$d&(fDx z?xRZa@jkbpQpx84DNP=xF5b2GkJ{dh=CLlSbTZq(8R-#?7Y1bZmUN{pfG*~G8Gs`q z*gVcQYcytj7rLzaB`al{x)Rg*fgIE? zk5ep^^RpFR3@c+i@%}MPpy;AM9lqYg%Tgs(xi@8~Z@6vfN5yx zcU8(C3m1t>O(R-P-*CyuNgBKNvJ{WXY0{pw^CD6q#t-GTa9RbO2`kELMl z_KRsP4~RJVSwUEpu~AeYSyR3yL&H`tKXc5Ucv7BTRArTlG|6=kIo-Ff-OAo1v;YAE zTDfvjqQF(`ex_>|tJXs+4(BxJM}?szyFr6~2_cPf-TfNQ)=@7wg<^hqEE?8-(bPAU z4W6+wRXbSM`9~1OHT$*JL*A5s?WJ2H9s%I+L%ZmXR{t_tOM3rbD#pD{AKC&m6Pt=QH^}ywLcf{paan%@jrpD0zReB0Ohx z@IP(){yb;vhWQZEKNWu?ucKqQYetgKN2i6uM0u7#P-rLAIXKiN97W=3_^64Yt;wbJRI=yqaaBzRaD<&RFM;|ZTi9kA*CHwgeAbmxCaKx?KN=TE-u zzfo;8mZoH3%YIcEdZp)~ousQpaJ$q<-|Qj8Zr4Sk-*RD(%C+9kzzMH^kLB3-HMFmE zOTGWcQ^`7Ksi!&^2VB7nLO*b*x+8RuRzXloG?~b1UC)o7v-^nX9NA2_dnxoM_YtKX zJPJqxwQbNjld$qvV%f{@-_G5PQ?vr9=!G`_Q->h@32k&i61Pe8cD1>^j(V9uJ21j^nb6 zS~`)dKj$C39DXcm=fy=6LfRJG2-+{RBR6DV9Q7IJFW2bh5?VG4IOZqx(c@l_UZS_4 zaMJFBUq1JKrCnwZ9*zsAVsLc{igAf%K$7GNdg*&1KV}jS38qg8OX+;@Me7emcF_KN z^Q*-LKVK!j3f*cu1OkAf`lP`{hUC}VXiHXpBofgzG0zOle2y@r_ofhOB%ZhCX5Xp; z&OW0bK;6>m3t;I@pJ^gxI4F&0DYFpVilZA{kNgNpb*q9hdL9YfNRTC~pNGIf@jwlA zF~c;KpkKY7mARkA>NV$#Xey+nAH)>StZb8-HhmX&1yFs03P-`ff`g9-5e4O4G=onw zDfWcU$u$~~r3l*UpU5Hm&HCAsl;Bok`FiO4YizGG*G z(F=uP@XZ?Z?S~d zn+YQP;;agH(z)%Txa}1t(m`|%07wEkRQn^Z@%yBlpx5=mlnDslF$ie!aSu|90hiCq zpzBO+9;0azKR5=!3Rg{IT;2MGbO-8k9rfE3;0nj`zVvB|Et2c17+iPmg3z zR`Z637;ro>7BMioB2`WKn7Nazr4^4jgiy4gKAA|OO16?wVEn!xQgh$JM zx1l0#1p48}57wA3f)<}!)d{w!G*J=VUNyWNR zJNvD3xj|%^FF}`hLY^zI_>hgJ(EKJX(`;#hTVJcBl7!y9QED&$nZfLD;e?>!kfofyBX$_3@pEeYA)ZliDY4GLnJmkc znbe4EQe8+J8sP>*Yz0=~p{Z#|O@`ffiS1H@#5yDmBuDG@S45O;l$_0|P5Z752=9ZB z*2==RV@a(|$Ga2LMv+H}!L{8an+lCn?yefncN0qSavl*=tQH9K1!okjFd}nPDyGR; zdIl!xqplUYp{bg{8Q0)lX!xRFOQt(lXMrh6P)`d<`5ORtjxq{M?&TkiavI?ErW&jTtlEIDpQO~K= zH$J{3)phdljLaYd%g+~HtPj)M@h5h(vEFj|A4NeT@mLbeYklwiLO$M(M^euygpwYB zZA@*PT^ip@^?Gv8<`gXA+d)%hc&1LU}MmjGf)#|qGvx`rjislsuZ;HGv zCHx?KbXMIP{>gCNfC@)m`)9UocNUjrVQPLQ17u4LHlKN|Nzt>&2OAda8TOp}bnl~2 z8l$73Q(;VJL9ovtor%~b>uO7Ho!?DRa1*DHP5 zOOkMI5~GUwfs!-Qnj-V&ZBrNb%_MG@NsZ9U4^j zY+5H1=zlUbypl^Q;<-cA0yRw9ug*e8tWx0UWf&wT)r zdl6j>+QprCE!Ekvq)kB(I=Dwop%rFJJR;4tddODo;gZahR!u%stbc~xE7nuqI!^Ql;m$Vg(hF&2lnRJ%hw*4Q` z>;1xE{e`sW8hM7Xbxt#Bqj^@_6dwBqQhq_=0)H3~&F@D;PqO;{Kv$a;$)w1mE%Z#U zD-2k-PVRCP1zJ@i!6w~pAFmVTR(G85_zgcu<~;2)@2?KycYWHz4(%m2vn7vJ*l zO9&1&tsCo;O3u%e2%%1h&Pz+*uG;5dVOHRO@$Rm4lW!%S&&b|_JB%5!%L%z1(tvA( z`<~q+3Ua>=<|Xut#zf@YifozqxK5OkYJ=vX$ckl){ZbjcrXZiN%^cz9w85Jft%0Ap z3V0&q^iwPg8<8Wz0dLOaONl!t&FxE($|Q81BGNb=6NkzNk`mJRZmdtiJ;N#Ua#XA< zufsNP#gJYCXVnWXulT+G8ixYYTfsx?3xR?n*t#CguRu<_y2St?z219fyor(}p7B?4 zR;k=-dGK8DuYVo41oOk}w&8YI^B+rm8_wd2#(fNy*JEFD&3!^@dqecqN~T_M*gIMeik`crO$=xGfzApG&v-TB zkeed|?8$luMlG@E=LbEGO#!`nsTNY?4ie^i8P*1|+M@Ag)Z%ik%R2t==!~-siegDE z^FWXB_C`^-?$;JBoDYay90%))#t)ekt3?Z$>lCobIM8keuUiUf6b|`2@%7UQSN4w& zKoZ(Bq<odINjj(QK28?LKFvP9d@{EPI!htP>wii^Q_A3$!XZ1yZpI@93Q>mc zwpgG4+4ep=SOWxJ&zP={txNG>KeQ~Cl%KIaR7@4D-Z*Yev*L!}NSpD!RpB>V)T6TL zdB&(GG|}^l^Y^GB7TEqCtMqt$Xa%;R<{r#LQQTZ3WEtxHBxtp?p=6y|#9pQvw5k~2 z-S+vrt1KK;%NCy!1gaif`CwA^cH|aYLPR$oGR`NjbX|5le8qa|4Puyse1)TgJ0J^4 z5kJmSeDy{<+%svy>UV-FQeqX+`C4J<`yg?gXtT(Jfr^_DA0aF#Rg1X(L;9S-4b$WT zJL6XwZ1Tzmu%r<~sU8S-qVgtmGLdW)Yyj?`V^ZCF^dIvZx`Ruh?f27AT2OOCtL=?X zF~IV-l+uMw?QQYx^D#kcgfG;Qb3 ziq!4Y=Np0i=6`@l@bk0{!aIgSJ_R@Qsumb@zB|y-VB%oCBXgl08;reUVp!#jm84)-GpF|arNLtxdjC*Fd-|2C9C%+iF~_63dK$?HfO*Yv`Mv|%pD zq*i~^ge!zy1C-oG zl%Q99mA%tjTZTQdjz@|?+O>4j;k9uvlu2>D+NL>9bXP1&Lbq#b9w}1YvC&qu3Ra;AGuczk}^q<%F)>}I)6$}*PbbkljU+ma;R)_x@L%};er{W!LHe_VrJh0Is` z#^?7Yh49ehs94OcXZy|7NRZ#Xw-r`1OXEVE0>P)_SQ?_#N{ISpH-W91wX-S;{ zw5ugU3Cz(9_BoC@wq+zr76p8JIXf?8aGy~mJK-1yW<5IpqU2r9?hr`kbg^wP`#Sj@ zXE(ZUaOT3cL-<(3Kp8L6iDL8baDb-K+5$Y_wRbSxSWa-kP-ri+Wl zT0+=X*K$GpMxoxuW605k2W)X^$J+r@FQw4DsaeJuSOCq zjiVGJRq7K2CAA+eSY{@`EsP|ajgvw=OpD)$vLqhB+~ifK`LT83tn$V={@g{|PjM*9 zOSPd?-2>5q1R7!QnRYsg7=C{>FHTnR%4zI@h|xC-nijk{35~^e%JcmE9@^9l$P+x# zG4p3=QbEph5@qo$t9fBYo)EE=t4*#~knRQ-*bmh& zA^m~~Zg`$`BN%x26+n*8Ws~zwKt`Tv#@I9V$e!HrEW(W>M(}SXTnVG`bRLnD;{%%= zBANf{YLlbil`5rn;|KPv@<~$Z2;*qv>*#e~_Gz?k*Np7= z(^y0(PwMz(P`DP83sv> z8Y*D=6DvTx9XMKA=9Pi`-l}Y8;!oE7rCLv_#1qoAs#85UUQ(n`<-m*znBEg^lYJ3D z7S`p3r5|gRZ_r_b(=Ad=M%(*ghwOPtBu4xNneNz6hwLiC0xJ`pNWnwMzM%y{jeX~ z=E?S(Sbux{Tup>qlahW4%~k9*gcRos%l2`0%rRv*gPU;dDHk|Q5Tbumhi0CWhZxkw zyMp8p%RFpMqH0z0da>;5O8ZO8Cd0cmVcS}SXiAe4+3W`q0-j7T7$do`CL`V+@f7lR zM~rf*mcFSz`SmA2h8e7QWknPw^E2l9q0?IAv2U3Y&KRnDgXEqh?os|maLC(2TTO_S zd$7UEsBza|QsqV=8SP(@#$BRat>!$gto=7qPx&FTh0hZ-iv5k9>P~nA^3r;^*CWln5)6{K@4j<*F{P;cU*rZ05NQ0FRx+VR zpXD4X)5jarz;L|i&}xa&ncuD%mhV5pK%&N77+A>sw)=jg(CZKR3OwCp^>IoVH$Ae_ zXFIVX z`c6nq5e9tE4wh9poKTF66x)_^0@}=AvM+B2IbiB`=j0N;8t!WQl)c#hl9mFj(%zg;TQ4`JE-JBS*I-Ly+<%FJ9DwAEXU6ioZb@2{| zUhNB!`{tgoxrJ#z;aI(wX(sa7jP3PYj$ny&EmPE|W6!PT_L12JPd zc@C2hUC)Vg0y+!=21hp1Qp=V_&$>G$#6#ktK7MjD8SC_J%URfSG`mC-+FSWXgekS( z(+zrY#|piP@zJVNraV<#l}fBO*j4j9AuZf%!Qmra3N8v1mw7|Fhza~Y-^_i-M9AD) zS>Le zft&Ellzq!YgNx6$s!Iqm|k%k{6f}a*^R*?WNNHO#Y~WVX-hkt)cWSm z<9iSIITP24I6Pw=1~Z8^&7z;0^w=@5i=73t1-+b8d>^e}>f`l6KgMIyutu)_7r?5l z!iHJQ9?-rGO84tRr+tQ$`R;KSQpDm1O=5{==*NX|N?v@X&LYNX$1`>ktJQKVXo%&q z^k%xv;0z|RHi0oaQC%vKbxS(RW44=DD5gMk@W(=1pev?*kp_Ps!xTr`;)FKoAIIcG zd}#To^a_8A)tFa(zNlt+I@)6J>GzF57+xxs8;{~__CEy~j?XyrvEIC&j-DAhu^~(f z7;8dOA5BBBt?++6t+W86%`?BOv9hE&C!KD6S!IPELE!_b?u zFivxvWgsla`Ad;|?Og9aEPMp42j{Mca@SggUv>%VrUbEw6=c+$T z6rMoyC4>w-sOm#OxDhs4=qoFtyeiwE9wPk|2+A?}V#`Bc7BBZbw&C~kpg7kyPg6kj^%Od?UmHvQS>|nvIiNNRJt!#Q3ni*68A zrP2OPW@)v*@4YX9;xFSze0sH5>mJ;YF*oN1i($=YusRwP zdiHIs3n;5dz>pY&ZIl56NI~x>uTN$Qt+Nt_^Rv5byOrnQ=}=nXkQ1t67xIsy@Kz!H)RiAlwPIQ{#<1e1$G2wS>gMH{RRe@7nG^jYt@H1 zegE(}oD9JI{T$%jLnex-R-0rm$9h3@1MrJ zV7kZwP&qi~Fbk%^DB52ZQ)VTNq0wE&&$b!aM`fx@hKQ#Rb?BOTzjKoi~DjoT?)ZR$fnD*BO=> zSP!Dy`Wt;=Ol#JMKllvCMvmAT5v#&b8qOjF?{ehp{vH-Dv>sQv8|LTFE)_^4$5Smr zM}3pGHEfoDIwyADba1Xm3y~LXQ%|oO7Z!mehj9D5La|+#y0)cY8knh5*j9Pj7(Atj zJCFvAYc#>@Cj7?Dag)_&?d`iCRy(xaD?VwymhOEs`UYM*?=}>J&vQBm-5N@jl{ym* zw1U^?9{L5$&S-U~N(r{}nF=UTVI*oLL|?yMBad}F;Tw2xbL^GPSZ8)A0*ZH@zo;3i zUQNrMtWV8Ar`FBVpX^gbH=vj$zdT1;R9H(vm8;;A?T>mHANEe#=NS#JP1H&5+Bs zigi}RuZhR`A)u&RL~FjogFf}!I16*%5KX|&m8GH9dk+8mm>D7F{-EtT+pN>6k)0gf zYblu?tZw<-CZgt1ub%0QTd^5eo;K~OuM*(OWL=D?7)~U{GtO}pDe3x(|ZG>_1uY^;sgZj39rs-1f){(4vQBFm8i-IVZOELWfXK=8bJ zfDN@w$e9VY+@szek&|*+L$>hXFXB(J4e5J{b+sJY4-#cu(T#dd@$CUp*B(Uzb&T|= zejw9EC6Fq7C?#1p3=p5^m&Wyra^&8FIVd1rnl_F^sX@ve@D=G#3pX7W{fW(2o)rJ~ zIH^phPz#NnctOFA)R+%s5tUEb!IEDQX$t0rW1CRNC!h2CmulmC%hw;~UNl&IYf81p z(puoIW92h3^87O_dV4j9SOhMzn>F>kj+n*h7j&O1S^+$CXz;@IIx+ds5>_I>%9gz! zerKB)HI*JO-F#LX*(*uc!gw&{{o-T3D_aSiH*z!IPs`#u3AQ<=_^RX+!F9QP68-k{ z-Nc?K=yzrl>g?9PH8bqB(|qw%h1-w(da-(s0Qt4GcP*G%VbFjIQ;%4_=TBVjWUN950>`VEK^0@il^xZ!a_aPe$+$8fA-XS5T^idE^^tQo|II zUlhQd5Bb`$vI)@6sjmU=Xei?O6Ou_oZ%<5ZrAhr2*B1E2+h7z}7hvaW4OO)30$vAv zuGw%0FdhU7_Y@%i2wenViUf-9ew==g{Yuf$9Wccl#d%#s0lIn)oc*J8**7ibrOpL8 z@r4xlRBC{So>YCU^fof!QsRfJ+^K|EYmg5ib1hp*Kx+`4{q-@j0fkfLBtlWKYXo=~ zJPwHvq@AG_X|CO}1C%7RFj%n{iT> z*O)>e&)>X{!1LjH1K#D>SOhr#%mN0I21)B1Op29eU^^i{!`o&s2A!%17oJ)ZAizQ<)#ADBwYB1t)noZ5G6_th3v0R0Lpy1*| zqh8|(fwc|yZ;-Z5$=~t1wtXUrDS-GM%{rj)>Nt;5P&A7uZJeT}Kl9jT2GQIq>}#dj0JPr;t9 zG{yhQ)e10eiQ#w4sI5%^$GXG66#!J$D1p(OwdqDo`qhd(GvB@Gy<-tFe?2PqKU_UM zN(1Z-#imj20GLnV^CU6kzpt(^hI?6cH$utMDU(V<`x1(i+Mq>cwhlmEYec~9w6 z1V{8Y0d(Fkd@nn^M7rB6d!w; z^e!Dn#QN;9rB$`g4sZwNuJMwdpzh5#VaN z4kEAgi1j8)z)TjlEp0masP)FziKzm}IRY$J#skM$LljiH7PO3;Z+d_`S3M3n`(!`W z8U>0|>WD-18$b+pC@QW9K{@2j0^GB)Gg!##vgL6}vi2|6rl#wN2P=gjTV&?$XwK56Fq<^lN^S&+BOqqm=)pi^#f%)A5X`YQrkgy>5 z8W-_8GN+|C-iX-Muf`|AjR57fF)MmLOUwK@J?E0kPtSq;8V8`S=8~^dfttAbf~gyN9R~{+7oD5UHI2d$f24cvs8; ze&NI|7Fn2aEX?k4KopK_jKS7pG=MOFH3t*9XDjZ=;eUn&C&{80ybU<{eP|pM(uQ{t#5!1J}V7ZI1Mz^e5 zy+sd&^S2qWA$WQK#xBl%*nM7YD69TIb!9;8 zLgr0MSW7|YYb+b*1H zIIPN|W!Ni@|5{2Qj5U-=b6LXXyS+NJxa+awnC{4!?(y-ubujP0OWXglf;bgX!3`Se z`BEr{`ARZa34El#)-nPqKZ2jL@*kJgYtN%BDwrH=xx@>G2-uSWeL%;D#X9X}QC%~= zQjN$G;RA@x_xz}H<`N*pSK;+*PfvlxPW@@~X_IYMIPXW`OCGZ9@^ZyO^iuoNyZ0om zfZMxc+jKm+SFY=IzB-2~vgZ@AzUYhkp4wALL+_@B{cf^KeoY-q`dT3N4Qw@2?I?bE zMuMcbAy|ex;N^cTd4V>YxtaMB-ky<2>oN|+M&?MJs=X+Rt8RFEffSkrGpw%~)&U+O zt9p>L1J@Q99*jrM0#~+ZLIqwBB%lfh=2Do^k5=YhEu1T!EN|q) z${D5i2%X>V;AEZ!ehE==np-%>79eyQkammx#oa@tL5~4QsP#55z5YKadh020B`;^Q z%df4{R_JfJIp#Q>S-6?hcU_IMovyO{wSc0YPN+pxUytXBs0aP>CpOps#*Pn95y++? z6n~%MgD}YEc7U8YoOpmE(1HB)DIsSaX#66F{HBil{R#NyIp(E)N}d5-aoijz6|efi zznL9FzVG)%of}KKX$95HyM!v=SI}7sFg*E2WA3#?n7*;vbl^VM{W}Ta2H30?i+=!W zOIF~)f}<|K62^Yc!63a_n->A%DZU=6?>rI={y#?Hr48wH{^42 zzv_WE0)=gYu5CST!?4F1k77g%dx)(OVXCslWAiEL<^?1|_&>Em{zqT%?s@SHKf}MP1h+iDr*gK=$E3TS?(OvgqQu#M zeGQyBA`q?}Wb&nXx5PvZ0JEeaqH zaRWD3!oUJh_eOgCr@PfxMAjhc*O&%+noR~nZAV##g}68&(4zeC(`RYP1Jzzn&E*ab;pIn1;(ELVmwm0t6 zAwath&Gpy{$-FpJI!A#10-S9)BUg80ZCnY5=cb$rAN0|7Jn7~mA>*oQ&@Lf0rZkln zHiv(lf67LIUAXtUmQYnP1P-;)lfBf7UC2Z`e@cdl;u?jom7n319=*xf;3jNdJ_VPY zi9P#Z&vLD0uZk-7&ADXSZnGGk%ig>iZzJ59Zr`JNSGCa~=6sMMGVJ~H=bF?lKXB-J za4ky$b=$VDL%Jo{__lNYsr|oE{ItiMp*~(nF7?jL$CIJ+D8~?BSH)0i{SjoX0>v#s zM&;NzYB~|3H)n&<_`_GZPUhG$E$asDJt5z;Jn{Lo20Er=f*7sMActj%U#f}Opb#_x zTRbZX+6}+6ABTh7$wH4xn`z(05B(&gxz=iF*P+#Tyjsu5YYs6=t|1SC7K%Rtcpsk-U-l{75_fD{D(tRH6-5|P%URz-<>dGp*;d($(J{q#S+LlVH zeqMa#)aJFz+{Vj6&Yj_R9!{Yu>s1cNFlV0)^2hwP9wP!M{QWOPi9cK=@ zN5>udrM1=$u%;QysQI~tP6!sescM#^2OG8FZfDJ97k0g7eI@JpOpm#8g^I+oZHX=( zCxxI`A^2DtfI6dU?hWV}1?0%%vwCI5K#J`%MG08c(p(a&>l;ryU!MX4*YFv==Hn8( zdXW0RYq4Z3lLdkBp@7n;Wd}MYc&6nS=fG^n%oydvTz@P)?aXRVguU6OlZpko*XPWt zZTwQTpFpFz>})#Ls7>u4k<*e;K!dVP*5p0cA5*GJEnU$s4T1>MYip3vce+oW!a5in zS8k2Kva$$i((jSE^CJoN z*-lI=_}32(Ko~r31A^O(nDzzOKp1%&%Uw0i&If=|2{_0pO$X@t5xn>;xmmNd(#hL; zptFuuHMeS*Td_#+VK!dpXj(6-&Sg#BYIndN3x2!_Hc&;jiOd<~ytx2p-^fV>l~!r2 zgCFA*EEa$E=ep58goeC)DR|YYSZb`r)HU`%>Js{`_i$J=y?R$`O`=RE$a`LmuS=Er z_+e(cX>(=&dAc7z*Or*PGQX<{mNOvmI2+|Yj?vPL#L>i=1 z>68v9B_$;t!lb@yuDzf4UHg0X_Z-hYzCYJGJk~<)`?~Hi&hZ=Pxfha*Z(?07%DNMr zV16B4Db!DkeG5Y4USllcOCGszs#!uShZAq95QKVDxPNq40VfwVdej>{P(nDJSs6sP z4}uN)u8i=X=`tr(UWdF1<$b!6_$Twl7_Jrbo`Bd21H0c)E1R7= z(=twPbHe|zhWp6@Z4c>O>M8YrzbKws$p@ePehbeJ2^Ci5H;}!x2D&?CfedPWt92|M zU7S>Y!swi;feh|-7kO>3SfVoL$Ie8q)Thi%H?v$Gz^eF|Tr&`J_*?ol@i+W~uhc+# z{i~QkAlOVU%NX3-@z|np{Q?Q26ZxEFSc{dpnKq?o6cYiwFg)C@e_KJ-41Dr3FD>$p zvu{9#QoUa6iR{6z{q(j&%SF;vhm>a57GU23A`w3JYW8%Hd&u2*+kXT7eW2@;q=5N0#)L4UJRwy<|cnYM`lgHFgR_ZZYCJCwYWd=3T`J28Kh zg^LtGps7(<=cEoBPCwPT2aE58h0!nGa2NJT+sq3P>iK|j2K6*2;`$p;1VO4AwrW@vO z6*jOK@!FV>YE!}qJNV3~2Xk^Jt_vegZM&*|lh~``>asAyVW2ZZ$kWB@_Sd(6QH1`@ zL-?C$1aSnyQ{^(LFPM;Xm%IR7o7Nx&?)z(Txwz;Nr#zA1we{G{p-#5y(K-;iwc(IX zk1BCSV`PUQM3oFf`bpw4%>y1N$S)z= z89oLkt3o!)+dSsaxVLZxx}dx&>W3jsv8RrH^KUa5;wUQ@wX`V<{B9JL;PIo^9_(n6@Euz_ptv~*^-M*`;NQP?9zbY)_Lgf&5QW1nnMMZG$;>Ic z>QY}Re$6@URo^!--zvK-eFUxCI&0kKSNr4)3G;^r)J?1J*)$ix4$OikHOt2)+Zsr^Pgyk^a7wgw#!B_MUPk=uhAm-!TPuY(O{IT zLgi^ztJHxV?wG);-0~^`Qq)hfP@0iU^((AgtK2*};)DoyqLPr|JgBRmU|v$VBw>fg z(_$cr$;|18V{0Rsek*bQ9^@RF*!fM zRiWGs&!7+Yw~DPH7%Xh<%eF@^InQH;a!R|~`##pd$z`JnsMRH$1N{K^Z~5k$Rd_`9 znqmdZwUQDKRBa`yk;SV}K1Ny_J@RUM!!67or_us|>jfNRT-&~pDvH~_Pt3C&EWDC= zJt^u&m{!F;PMcAp_LP>qCR;a-l}34Qtb+1waSh5JZdr4W{`0LsQ3fEEbM#0jbymS(y_cEycfu(Q=N(;e! z=Q*w44k;aG4i7zY>{2c$b(bx0l1!NL!1PpQoDDMgL;{sCuqO{GcCz`>=I85`Ba@CI zEp#QvQ0NZg>vHCJX)`xDWy2w8V=UU*uJ8w-+~aG7{;n%rhUnh9I_Zi zAqbs$Hhr5oPGmAh7|l=3P)HHmQ1Gcdg*O}FmV`&G)&w{e;7)SL+*krxX`Qc*m(q2= zs}EksfyeM-A$W}@-d&ZT|DHmm=%^YMc_Y^G>W>Wu2kSiSuII zrAE+M8bXGGiUKAl`1Lf3`#a;4D|Chrai3~`r`Est`m66h`I4w3A3&;ediL4LgsQCM zM8ZYx3Oun3xjAL6p(5)7RGrFY-2%pL0X_!1}u)g)(14} z-JN`CyO0u5=UJ=I=y$T|9=-lyXoa}-0 zc5#_{3mSC+KZ=AL*Gv4m*4$0-o}^qlg5&)_S6&chiy9XX({r5TGbjzFjQS36?jb+( zm=EQ=%olJK{nPI-5~9f8f;uF9wFh^d39Q%V+bNQVpzJbxJr02I8zWMf`EBd9;9zkE z`Sec9B{2zB%IaD^cM5?li6fVJTBsTYF4jdQlRu?$M&&nDk~J@Pl9$^awQl1Obfe~4 z@Bcga`TJz(-_`-9)X4Eimm#6XtiEm8lgC?NZGIr!0tZvVWdJG8IQqCyM8q*4kl@{~ z09@1j{#t*cWW^81(nj>E92@g&-C*jg)F(WwU(vQ6Jd-AD&W-rvQB>w*wna~m^PF~f z+1lgLo|$1a6ZYMjs8{BcK>?|+sIdv1YA`k2%Vd9(yOfw3r!W#@F!-RVqnnFOiLz!z zCvVb(P`!Qq&HTg9fbQNGo?x1LbWqu+qg5Pd?K+Ei1-ygRuRE7O%u~WncydGPD|3@5 zh};~#v$K*tV;sS{uN-F33zmKJU6y>Q&+QGi2@p904riix{XHb;A0p|6sCRzb@&*~9G`d!S5uE-(pNsGiNev0!klC-UrWjxz@fi3?==f0BP?EAdG%cWS&B(|5(axiHrwyfgw(nsR2d-U z8XBB1HPk;?^Akg*{Aefjo~jBwnVXJiuRaCcNxa_@B}grx#{J7x5u06s{^cWQ@)O6t z3UE8MV@&--Gwc5{%ADO3?1DF~MaHOoR;Zj&NFlw?yH%HA3>^U{3GSz5o?UlCNOP?| z{XD?PDYEf7XvB~I0GVN=R)F}2Zv8hZ=mByFbZxH80{HlD7FSTO$Qd?~=P5FhCbQTu ze7wf}Cd} zk=$o?I9*}~B(-gHAw~-?|2jZ=_2O}KY(Mf+MRC2O>qRC!x`8(CrR>Ys_4A=!Vn-t>>Dxr$JTm1Z z`FoqOZ9vIa*#Q@S--*?6rcWpuGp+nUC%6P6j-6?6isgVtx4+l~(!=zT!F-W5l8b-k zjUHi{amcv7tD$$p7=N^;T~#A*l#40n?E=Q-XI;wpZ_u$7gLIcxD~PHpAKA#TXe2|z zOwh;@_K=E=J%Rn&ZvE1t7sxHnYVTBz0!4_HsUm{EDO~{dhib>Kr+D|nO?vO@p-?}% zsiOaU?Lt9=Dls{^OW9i&*-jI~pSx=$_h@|W)c_WXFd7#QJ%}Q^6(U_!uehX3CzUcJ zy_BaAC+S{FR2BJw>sGucuoRxy6krIF<~*CeImZg0ZHAHf9?lJo2Li3d)>myUL9UFRT|d}1T#*(&9D*dBbF+oHud_qdWozDkGx?0-K2 z1ex)wpq=A{KC7anB`L3B$;ctKV%V23ryL8_e$dUPQ)+{*4nVFTUE1D08^5Ycz#M?M z*gm(&3-m1H$izOZJ4_-c3#R9AfedD~!0%ydrwSp!bgI&Ot6NYZSQ)p5QCo(RGT@kDJz`?fyQxGG7}*Zm*D!^Q4qB7 zFG^{2EfA=hYvQ?=kqPX=>cw{y7o{|z*xF${8BHB~Up}SIZx1Qvag~er5a;7yzQBi_ z<(0k4QOps?IQYwe+{0-C1sX?5Wst*-QplB{VNE*>ZVle=2$=3=3fF9k8;z%09rAHz zKprYIQMSLv2ohLoh$VY)1HxD9r#_Fy(yKjF#*}hY?!#D}ZjTJ}hb#Zi6mSM!uaBgH zb*LU`?koQX63AW2pY6z|+dshx7}d!1d*Yl`(ig^tc&~&r2$)++>KlAaj2=6kF%kW; zMk^aq-AIDZis?|@PfteDiGxw&R*0;KIxkfOcPE!sS7ME-R`4Dn1Ov zaZG!qC*PhXFDj;5E%S0k4{M37uU?YI2KDY1bAXrl*iz57XA zG?rjj9-6}*&YnZJ_|Gm~=_va(iB7$*3ix!p9zi>S2tM;xsbcN9Cn-LfpT^rRwst0t zg@LG`f>@GX1v9O05?MITbg;(pMbm3YNkh&DpQC=G@TGGWgqTEV!D(-C$^U`j{(D;d zzxiN*i1@9B^I^e6DU>T{`1i!g4-B6hkg!2n%h7-XnEw_NY>VC;V!MNZLL*4;TP&;U zdFlYzl8Ut@3j1w`mP45hHtpx_;s(!e6yl{x^V&tbETkQa75jPN9XVhhN94qvE z{9={vBSYF>^NGt0=N@83>7)qn&vHpLs+3NP_w>)YC@{9Jph4fkf3?Mmdm}u`p&(h? z1!%KotB-tJr0Cuy z1c~viyP1>IJTZ;>?za`ap6)KmQZg+|@o-o=3YV-#$6Fh3Nm)JG1l z!CX&6z*1FX@Crc_#69$x!D^>LN+NSi<~x|6J0M4v$Vqz3_NCUzpWaqM)guT?iy1DxKHpD(_R%dGS-h&Z;oEt$^;va{u}WiYTN+r9vH z;=%DQ%#W<1*u}7C-5ms@@Q-+gZ0%82`5#|mwt#u&%hdp5`?A|q%83jKGgK)jNmP6luT`$rra$CD$A{eLn$ zsG;xbZ+5f|ox7FpI*N~a+6D(|7E)U87(9Ac)w}xCAEYHuUeM%y;Q{UtQ)Nxt#j66X zDg6IIji3DpNJgarDUHf9Un!>23#Q*ydJXtsN_og*YKg z;C~{ZyrVks1NYy-ZDnOMRP5bq%6t9SpW#s7ZOtqfB2H8-{G2tjhkBx9`W(q4 zd7jB0bL7hp-fX@9_P_p)j3ahVb+bBJ>v!;fKv=Z1a23sYFK@P9*OOxT-pgW=|Fu8? zq>QhlNJ1xPgj&<7h3cRSU$p_8I8nuE0Abnp!eZ(%>rVa>JUH*5lsXdM*(5CZU)P-f zs7@u3uURPn8ztWV;QjcwjbS!3@}-GLZG86s4{YQ_34s943d{Ld>g#`VBmP@`C+*KH z*?)Nf{M$bO{-u}}P#bigRE1rB%7Vd#0ct|#X#LSm8%g!J3)MfomHr7tzPDG(U9gCBd;|7-STHyHI4NivYL){IK%nUMMZ_$q08yB)~C$)#!X=GX?@23j~CpvmOg z7)nbOOoQCZi+HzpY=3R_;*GP-f|b@R;4BSIzv;=m{9+h1^JX{Y}`6k-OW{<>DWK#rDgE{kJ** zkmx-&^K=3W1C79;?wRJxG^-S6B-UFF2SX*_zwpC$z+V`+cj3ye?HA9_gFm(hrs;hx zrH#MR9h$ZZ2V0RK-X2n=fz|7mT{SN7|B3ax>O{(3!~}`-$}Rwt)Ul{!ZTK0mmvwvf z>9UJKWEm`2nD7wpd7!-l63H7R47h-lirqnpbf%<{{5JQcX*gEXRBcywZ|$EzY!QCP z9o_!mR=_>pTY}e>cd{ewErIJP@zvNwGdpBC>F~!0~oxf&vicQFkDc>&WD0 zWWVxTYvv^|7R(U@&)WOU3ZO5gbKuUZu0(=8_46)+ZjbA~4<5pMPMe1&zcrOoAU=Rz zXkr1N6?*^}YX%@^Z#8yWivDY+dL+|J_ujDDPOX0rayyZkhyMm;fHElALBbb-24IHv3}tztmw;&2Cc@L2By zXiA55XKHannC0EVbdu(fBAtl=rPwb)X{qRZwBa|aXa`gs04_}m9!$5ackAz=slYY; z0c8}&J_gtFvN26bNRMJN7dMYWY!x+$V@p*ta9a4H3sQr#tZf@}@9oG@4{h{#2`jF! zH=S9O3##{y4EbjIp8dewrc2SyJqaB~#$z{wpXiGx*3Go8MfCWaLNLOgy#}yW-*j&l zxcuq?k!fYI3f_!s2avzcKJb>op3w?8x05?Z$!@kobSi46^&J2c&vYVa#}Q();5ShIBj$n66dgqis2ow))wD8cGdx_;e)@*Bh4^g zuV6Mz0GsW)Q#%drx8JZ~oSnj@*wW|~<0=HfV_nC_Ze`*8J+=@ey=puogzlr1xYa-F zIX?$V)qlQH`+j>D!K*0@;+B3eX!|WL7JmrrPvio^RdsS??yhji+9LjaTdHI5>*2?k zaKv;w-50&rzuI*(j5A(DAA^GoISAVZ@Jxs7kXC%Ir#-}1zLx9sGa5;)mJ94(2%OF?%xVRs^_QyB3qj3m2<*fH?4*}-!N^frx3>=fOH*YZ`|iZM!*Aqgs-u7W&w`4bigwtb zK{q>QXIAkz9L&%1VCbQ|$Dp>O++OnYG1|m#AIcN{Q63D-qwK8S6Mdwc7W1!nd}r)C z64;YL8%7dWuUt)66YEtDtDmoKRaCv}lyQ^hB?tMYZe)4$`Wc%D9 z>vDc$B?d23!3a1;FofkPV1h$0p!r%A#a|rn{xg7Px0K(mbNJyV|Aybc%d#(g=k?Fp zdc)-R!+?=1@W%-i41=9TYF(ZHDOj)QqrmgCY}G3m|Qb+ zm~g2k)bu%)FKSqs@Y&8<3LJLfe;KAF%bItmLEQswQ-SLe61oCdx4xAJ>R7#8Q;D6O zO3hiLiwxi9w=n<50iOekHD-O|0;&9_0*C@psuo0#5%1(yv@FdOA zqm!iCCur#6XprNN@@XL=g&<+3vNSJ-ZqIjc6!#H-!BCVMSK@G&Lf!yox?-K}LOSQCB$SMT;*B_cl8F4GxIQMG z3<7Jl=QfE+Q=&rhLeR?3i4ouO|L$D;^^lZaq4dx}M=7GGw$FNrY+Ji1%BDKF9{6by z{S2`ZFVT5=X#D^I5lHt?MWoF}O9U1(6>QX3>g zQpZ(XdyMzx)0(t7*20%<)w;y^)-0j+peOkVF?|-Nz_S1Noi24)A>TtqDP95<#19Vb z-HD7@*2CT|>(v>gIB0aZ`>Fe@`s>wg994v#b(6LJ${uCx)TsvyX-hj&~b#`?_@d%kA~+%}k1 zvM5Ktv9bau&B}QzRFDRhFZT6RllCWH2jiVFO!cfg&KK+NW38#_8>VmiMq3;~P~@k7 z9H^dgF4mnQi*Rqu(C{(kRGm<^*^$yHuFB9KRlV#dNg}i8k&_V|G>NpTBgJ zQF&yejL-xi2bUXWZ0%KEvox*h$^=gCg*({A8wi#y<@Lnse1v8A=?|{CX21R#@ zZ)Nb4RdC|kYL{S_>oD}@882zb86^xGT%(lZ_RdfLrO*Ux6+L9oxbX38D_z1GujNtT z3DSGhCzT*?rUZVz@PQhe372_(_A(0UNObVjr0SWWmE1v9ej0`4(XHBf_^a7r{$Ory zVLVd-(9E&Bc3;!=fu#=StJiOw>}r%_3rw!fKZ>*h^DjLqggho`0be=RVZMn~q21!? zAQ_1$7i_sYP(e_gK`IHN;ePiZQ@Ew%0%_>>5m~e=O z%<|-xY5(naZg9DLZk;$L@e@>D_We!NzSix>P7nm4{63|sF|qBuY{`rCxd^+W>sr zjI0XC+$Pnr+*@w9b=uR{C1u5K?s_2}$FMrA{K4N=a&$}(jr0SxVWcOE51;3erTh7E z=*xX3!K57O!^o8H_-I^1eS+KcPCm^%$L6YC4N@}t~ z{S23RnPWYrl^u`1JL)!px3Cqj6aa5w1|G}7+@i4fXA>m`cQ#MKR>(Nm^;gzwZUP0V zrTsWqn}y06;Nv6Wk$t3hEvQul!J46w)3*nDhBZV#+xqD<`2E(*L*3!ts^6V}ZfHAA z=*oY-6nMGcF4`HJCB2?3pdjc6Y?PkEAHj{bfB&7-uVXxq+0YvFo;pi|qS%WDWD4%q zBu3UQpAe0^BN@b)HdcCIE;QA2ww9>+%a`{;oIKP8iMqSFhY(vkHSg9L+@y1D$@5+~ z9zCw_LlRo+dN<=sY7R@MCh)`nbkxJ zuiGIZ3?S@vYb6wR0hSCb&k_$G+iP*$Jx1;}kbQ@EdqxJ6jja2doVefa-(I)>W;oXf zdG)LuC@XeXGUSdehdsi*W_X!~gLTepyuNpLw?KQoi$rNz9mXX^pr}W(l+OW|*dxo; zW*w!J#)#+m{JZC%i=N)$^kc46qW_E8RhDhZPwU4rF>01Woj1)XbIKVbm0S}nG$ciJiG(nl5h zV)#<54;VB!&yR2ovg%!=^4Mtbp%#9OtX1AzDSK$zCbt|MuZyMk}|CuImOwg zHBC!hvK~U0kV_0H!=dd5!5o&lo=1s!zsEQ@{SHYsV)!z7Y2%Zs{4|eK1Xf=PpE0lv zF)?lQ)dTpq4$8J_xOL0@aGS#7?k`!c{W~N*&O^_2MK_UFmdfSB`AL5SnrvCO%ZE2P zPlWBCSQxFnyN~4rRui6`?EHp7fd5i&Hd*)~_I7Dr_1U11zn8Liw?6h8<(`n5e&iAg z^YGN*PxdD4VqfdFftF-Xp5u$+9nW090mI~4+-a0F7Vb}$SDR((FL&tI?(fi@gq~ai zCOB0p%_xJJd+qruGs5*|vvX@gZ^_h7kB87!v#=vpeAqxO9!hN^t*dFF!je0~qWdPt zk;M8l_e>c#4E6p3hqe*^1O&_cN-51cs^2T1w8A?NpQ|Uk2~R(@j1H`&@3HQrb(_AH zzAc2mM6Z}hu4eL1WZcjMC$40Zy_(*A=|)1?Qf&% zO_qDGLpwt>lQFMsu8D0oytQYDV~$)j+1rcYXa#ZY>+vZLLeI+ZA$ zGk);c`QxcDX0`qaS<}oBedHk$nceAvy6myA>>(5T(D%?xL7^r_a6V^K7 zk;}f5`IMj&{@4HlQNwFgdGgSuAV)!RjZ0B5Q>(52y~y!>9d;dJ-s|ltHjkIW zz}dh0d;3|hJ}gn#k7KXHHsceyXmQe?rrj?L>gxi%F~3*(6pXTjJ0o&gXC}So*$f8K&biS$^l0y@!h%(UR=IY> z?XsJ?bwK2$h(!AHLVW|y3H~?}>0YKC=k&9@{j@pg9wS~mI$Qu~KDo0yPN@v9##Y5t zF()gktKRQv%70ptvwH*R4y&hwdit`Bz08FzEa~-}8`-{3M3uUrzyjqE?~ zA%*r1-GxLUjNS>-aE9Dtp)N(3JIO!YnY?~;G!Ys__BYHAgXx)B@U_bTY$b|6|Sz6nbVemWc7QL77Zb=GY3(U zo0$RyV;=qxQB2?YZjhc@vRx%&Ase0X9V0U|LEk!+h*Nd)Fz8Q}hoo@vO$vXH*J3~> z{(`yb$3yOhEm+ldI^-r^JUZ)^#QV}uSZ13~l%Y(+xbv{&wt>ly!J%U{NVxx*yQbxs zYDKtbz}x0rVc3>Mj9wsw?{o8>s({AaMzLvtjWZrt*

    I7qNKI1D_kp;=ZrdOI>2msYB0G zr7CSY?h*c4k$zZi6(*teNVy&Gxq}skj*iLMYILnm>^2X*2{(9e=D+0NN6^>$li)v9 z8sB*=)6oj}0(i@I_sgqpLSoABt#EeBhio?(2%+e%ZxYeB#pMrhHFnCM<<{QL3g4hd ziiRaDSc1NCcma=MjXcKB-;^1jc^n5ntw_Ppj=rk0CkxiQpITWJYI0RYB{P4sQ<_r$ zB7Udq5@JTrJHjio`N%(9C^r6DB7w*z{xQ#q#d4Z7USB~1eY8C5uG<{?ZL(F2GOk%@ zf8)?(#cL@}&oEhm4 zbdpSV(in^qDZJ-nWAaHUZJe-v&0E*{#2RDwZkICQDr2aZGjRVx%X*6K_=9TX<@gd? z)@i&7_QwapNu(-%E*&YjkG3D^6lSXEDE(p5N?ke@L`S7&Bx93 z#vWvzrJs!6Yzx^(HuT0;uL~FWTIY%L6@2aWGg>H2UPKAU9dh~r|1?u(+tfl>%bnhp zjW6@#J?veHgJ!FAK%INb#(A0=<%(k3ih^fe`6j<748R*9#>Y!unkhG$aH)ax`mp%; zuKf96(8*hM3m^WB5eYaXaC;xDocV?#;w&_y_}Uyo-tOi%RiZj`vx9TLHMpP-T3Ul` zI;zYAjdMMgy(q}sc19hc!bpl|w*Sz@q_a(K-#r7;&s%(#$yPDgYMyCe^$8NXSV$}? z&Nev)^py@a2L&5|IHw<^Xuw_^v&jT=g1ViG^AK6aXWoVNdh_MF@6H9`6edBq?L_VEFN_!>ylE*wR60GS2 zJc}bq{5vnS3lV}elb^9?;m%R@QDcas_wDo3zd}i}1V~r#N*gtPK`@R;&4XJmFPCo1 zTdgdmJwJ8Isv=}OWpCHmWNRWavX0gdnUPF&HyLHlw$6)84JUskn`eFN8n9M zMW@)_trh$7GY5jqB1!F)pIf6jtDNs@o{-Iw$I@=mnZRlT(n4`iP#@c?$cs-pdj&&46yZ9XSWkHSvCSWrc1rg7Sek-c{sTgv z(EV!j%&v6qCH#qzPPoZ-J*@XjYW{aEbj^`)H&KOV@g;^g{Sa{uuwxW5ZR_<@ta&h< zz&C$QvD75WOnp7A=Y#i_&ZV%UxNPeg%4dbh=|KZ<@rE3=eK)NCexB9^5C4L<8H(Tf-~=b*<-K|npB^!k|~z{ne%&b=^yE;QDr>R0MAC)O+1zBQ1(mHN$rpMtcn&hrkA{;+aMc-SeR zMGn&g_>G@XXPqpg#VS~uTwayG5wmj?V}o`3!lKZH;bwNw2O55GuAmxe++f{-qc8Z* zgVHM~a>naWPcHC=|L1(GkJW?l7kn=$iNY!&bQmgK%dsX&IzN)76*}V1uj3=^_bo(q z!&1;`@l&bqvB3IC&`Cs9MXz0D<8%cXtGSpCaZvH)RtOJ!+1AaDMG7C1VBqL{r6Lgi z78OsedGx_GV$V=7AJ?B2Cqb35U7rUxC7L{%

    ZE!AvzqxYZnb>)4&=bo7W46+bDd zA^Ev4%x-1F7SR-T?_T@p#yuKDou(-|zCkOigYf8~&7c0Fag$d4Y7vx^#1*L0G*pR3 zCy{}#?sC6rc*X>25x3k zNJ%&ghoyPUVuL4j{lN9~X|gtH_!}n$4-@Zi6Uq83RQDh6@(hDX2z_QL*P~s?*x40R zo5~3OBw4Zjj9LE_SM!<8X#8QQNx5eE57r1NOwoXYUZV928E-uqP_zWmc@8Dth#OD^@Z`=lII&_iN?#%6j%~GQE~cA)6Tcyq zP!dndYn;6S1%)(X%l4v)&~<6Vo9&?T_5=zqpQ3AC-z5U58lUtig>9ZyzTJ|9EjA>JCoYR28R zF8RJt8i7}CA?%p5^hxQ6=xn2ACYtI8({tnpOz%(F$} z6|&T*DQeYbP99SF{KR{dBJ5@mxEz5*1MNA2{@g&8M{VaE&lE>9G!4Uaj$&9=tJUMG zcW&~fEvCDgG>Cmpn&cWjL5?dn?nKc@5O@jK>ZXzq5u!#2BL{^;{Rr&->rC}ZLfnju+%Q_JIuOLL49 zvb~59F+A;JJ?l<+D`*d|F`TKE(%CgpeQ|23IB$9?Rf!fmiT7-w+)ILpU8!KLk)F|X znT9}_Q2N#hszWI5(L3`qXY+_~&J<(exLHB)+l)g%366$a>eG5^mZxyv?lNh?vggeLtU&EYBIDAj!urR-{$E`ubazDNn zM#*2|ABUdep*M$#AqO^V4wdlF(i{QRpPxq1-|a0$ue{5C_$JDX5drxV*AQ)>caqoY zAD7H*HR!Dg5+ROR`6auOT4Y@(Q$wY^shBVW{^%T%i`XdyWD zw5y3EX+y-yH!uyZydgGQmG3D0-Oc*U4dWB2eigFFx2L+S!%rAY$BrQ7kD6l4BWuIl(IS{gixlGqg#%B%fA`C57afxC4I^xiwL#ZVJVD^ASlL zMV-x@_~LhoEo^iTNXCD+x+J`^Z85(#RwrG^#Q4?9$L9von%gKuNS8?_dxN8F>+ynk zdDHW;?<=h_%e7e#D;CEoeW&sx@q1oDU1D;(0>Vk*Nl=w%3#Tn~)&)coHTf(c$|vtP zl}cA>F&Aob)zjr5i}AuiZ}1of#Xg?tUF|^iG->0F|2mysikZM^v>a?6zM@_aZ?K?n zI;hHPFcTt91YiBh+BEtBL7UIuSh|wPJXd{!Ao@m3E%S_cZMI#G!-fVNON*P5(mV-T zq7Mj)E0W$cy?&rk#;=NSMeGl@V^+LgsrmZk$?65tT(E-|4|T^RMpJqvWxkUAwF(7xUzbpdqwc3fbe_aAhum0kvmBN>&%Enn6Ha6)cyj`E&FL;o5_)N2a*|uxE#+7jYNgQk z*pw8H5IyoS4thIU2IqI?U6fuMd?@gjKN@1uLt|Kf5+kI&pp$2hrgBUXt{Oe(vSMh@98ADCwzG zo-yQCR@?z=+)=@EW#=e$R3|(mZjEm5w2;+T%g#c7ew$?)Mr$YMsa4Pqt9lJi7#y51 z*3!4O?qHAgey4NFW^c4qoVnC0r=$99)IAo zR~0+AHx*!4W{Zf*hZtC*a~l5#vOUr{ zeJvlV$82MsAf~#+-~_^TW~>x*N*JASOImLGypChX-vHyt7l3ZjG7G`Ovff=uuKS z%%i>-ULUh}KGC4Izn#R}fon#lyzR)bG6CjoxsD#meC=}(h7+3hf>w45Xxu8p0|vCw zRuSWP{)lodCkod~R*U+|-S;(5$~JhRI?4QC-)P8&Xoce~n}PMo!vd@6A#>*fp~NG; zO;1IIry<`Q@!uHrNB$L}Rh4x3rrn8M5#-%7K&lEyJ_Pfnz4YM>JitwD2rt_bj~icQ z1S(lnw0CqFx+I)+_q|uwxhp~bKvR2u?d!`A0fFT00Gbu?R2w>d@TN6}!-}d7E9&LP zeYYMSFuoke&UXBGk(>YJ^`%hhwRvXvTZ_Mc@hEU0?ppvQGl@C!!BDVSnO}|d{wBaxspMk3;YF7v%u3@7GbL7? zf4yVK8g*=FFh1rLnPsfgX32pU(~Ebz9G@u-JCVVHF}+b5K#vSQaiGT6;v+ZDWz#lu zW+33JqcrZb)NqY8I<2Y#`@$vZ8|JO?9ua-iUiWEfJ;PxF9Gx}t=@ImYIhVz|L8O<` zGYHb+k`toBJXlW^OdTg-OKrAM;<`H`(Uj!WhJC za^{H?(N}art5~ytmeZnHejt1Er@Dvslqc%b<}(rk{S#_N5(a3_V_G3XGxOYM!mffP z?Q(3@v&C#acn_V$YeIaaZq=-~TNONK)(g-|4m{N_hwf#ZY^*#~jFBFf$}+Am&q<7F z?{f*E7bGkQ+|UYVK%mvCgP>)O?eV?lp@5;yuZ+qfY7ctnKYZ(ny9^{ z=To#aOBN~1cx7&?zLZzCl~70h8Qy~%v!)lHd~aS(@QJO(2;iE8qQHI-=|LcgH|cB{phZUWGJSE8 zc4s|v$v3Nru1Xcb!)i)OF5Z2k&|Wl0GgdpK)3L89Bb)V z-J(-w`*a56!ko{LWN2-_9GEqksRwB)Ck`&a`@cJ>+d;$HF4ixNWOT15|2sdsn|)#U zk&6EICOae_#8*QM-7@emDJ7cMVR06>YD5=o_OJ?QQNxT*ntL)8YBf2n?X&SVXyzK4 zGMrn)l#KqV&9%1Qg{V}ghc8&nO3fnQn7kbn)gj11@ND}xI!lg~%(N%=u-lNAU zRVwZihg(&2zN%#LEt|Zs>1)Zcr~X5}jG_9je(Z;*O@w7ETTYsLr%i`oT&xL6+Pl@u zh@X&qB`76Zo{(8Nye`RR^U|qPAA6T}mzfalH2ukH4Oa($(CIYNQY-d-C9heT5`UnR zrqzOpQwdz;Q=wu{3%n>e>c`i#>cE4$AJ@FcZW4ono2#CRDvFAIaQhXRP*}WsTzMAx zdHvhf-(l0y#{jD&%+>^-M^yE<;)({|=11?^js1LT`J!@TU3X;ej5qCs*TLBTq(kIj zui~Y}=ntiHiDD}0sqhSZrmex!v#eCil$AMcE`z*X*U13);^3M=M;uF{#Ugij4rV2# zk<9GZsc9!mJX)Qj?9CFJv?4+>w>%=e&b$)6cUFN4EAcEBRQz)1Ef}%XD*bQx{?N5i zNAdV8Cbj_opwIJ;4L|cb6KaAt^PL(TE80jXh0cL(0s;mglEmub$_+zR}$ z+BRS{EcI0IS|6ND6;$4C1fbj8hm+=^N>56GZLUB(R=mw4*1h~v#kNBs+F!MF9k-)| z!yOiWlJ&aqq=ts=^N77pmog&p$S`E4d~PiJ5`rJKbyU}N@}86gVV3!MJpHyVuD{aC zhJJ0!vmYY(?LD}ylGWS}Qt6u$JJYHTrx*BDUErrSOe1EIe*Oe%cFbK$dZd#v8!u5_A z3FJh_W0qB}b%_ETG>vUq6GX-3?FFlju|8L$nU1dBh3h|wy?iIC10?(mBT4rT0~LI8HR z&EWRMY~2)VeSa8NbGKzRkCQD*y-PJ&M7m?t5*J<8q$+4ed3*bNPf6EA^gj~itHH|g zuN&UKLgl~I%}RNKXe*Sz&Si2nHmVf}2?Dv&;=P4EBS8hdJ0NMYCQIWNh?U1vMh!c~ z1`{@9SX?rguwke$0SO9Kr?m~gyz`a2i)dd}`zDTY!SFsW>f`IfjJj|aNWu$q2TvJ4 zszdpUDIEmiHPl+2|F44ygsPhG#$wZ=aDTU7#&}pd{hh{EGJZa8z>8vsvS-bBU^;t) z%_3n|L?(B`#zG#mw27YH0_{lHZK0ise-Q6J(xaqBb%w%eJ5lNcrYN^Jk@Bmf6*AtD zUh8v$CE{Y}{KJ*p@9MA-w$RgP)wxqI*vdX|*s_rCa5>L5#I?`AE)8ewNhq}+bS#&6 zu~uZ}XYO!K1w`5zw5YG=Bir;R@THZH z%$Fr9$4o|pJqE2E8@>@XB`#^rp$|lEev{$vur$oH`ErXV>j4f|2WRmOyXKO)S+zX1 z4U0hG9X~|Lm=iv2!AH|W$(5JRwDfTO9Cq%8sSFdl@@_e&@kPM9X~3LT`mk&7LPH-PYm}%Y^uIoWnrXx0Cm_3h{2Oo>NCfB;8+QF>iDTG0-ZKn8=vYRz zG$FJ3%|I8$F~dl{-Sid_$XxmCD`3V`rALiX(#kgrj35&I#w4XU$R!mIySY) zofz!x#yV>DL@hNX@P`L`7Xg>v|jy87+du-}xsK>NU7@$FpJd89Ux z@m4Dp@cK1)Ba3fnj+~@KmWIW%*{MMvN z?i_M_m^0lna>5MvZ)$U!V`Ns4l*1GP9$Nakl1b;AUo6%bI~2!^V(&NWjU=BzLGwrR zbL+hZZEee^N#2ukWRR=lcMRk(8?>1E{NvKIWx*kW{)qP;+wB?E>Y$jbzO`uSKR zrST}o@!98yOU{}RFxyrh(t4n%jEOmO)usHy_6Jwq9Fqzsv2DCdaGNx>RYTTsN^XPf zy64}@P4U<)iwnIJNFO4@ly;_ae%tMph~*fyt;YE}&1LE<7ZZ;T%^}}&B{*~i^7eD} z9>%P}o zSNyKu_t|;l&?koKRD&goZ4c`QFErNyD`1hq=sg$aPi0b4us?;?5wX52tVT)dTe^0( z{^#$j`728{@i{XkSB*1Qy;c6^4cjz52i4vNoVNnKhdmP#{)I>RlJkJt)W78`&p zPhX!(-}Kp4>;8;j1#f37(uxWI8?^*Q1(C554n^9eGZ~=k^U_Z?IO0r*PaRKRp{`|= zV^x-gEK8C9#n-YS9XOj62~+atfmjT&`%}RY4?@Hmkv-W z;iH*$chWj^aVGrnkJOF-GSRZ}AvuE| zja|37w(Qv3Ea>%qKcj(@NqwF5qABRSy!ZC|N!6LHs$;PBBK?-a;^ujpN|$W#(aI#F zC1*N$88j^F0Q?x&P;ZW(flPX={73}aI`yhNzk655m-PV>@(~Dc1iy2=qWd(aCd9VV6kJ*~IAX~UI0OAFe`gEXVmowKtho!d`r-@S8 zl>LKkert>>=nvX_?Jv72-WgkDI&9WbeA?%G@PfJ@{-Koqh-#4q3L`DL8oH`J?PM7V z74%wKX~tM#NMzM7?nzhm4(tM0@dF;s8?+atiTzh@uMPqi1lib*;TkLBYpzwB{$Cdt zP&tlo1G{7>oW_6mP9!!i3u^rKN#-+Yd-5<+`;q$Wg67AFo%pDaWU&v`sDpWB{TLr8 zynp2XB+?}AiNSV!TS7#>CsV!w#B|rr`?XTr`?c38eW7}?LciFf*R5w1jY}6VtlJf_ zy3(Vu8J2>+d3=?RXFB4ak>-wDzcp(fNjLRn(LIt^ zLC0D|f^-=L`%L9w_SfyINpxN5KyQv)dhz{6cg2-gxAy}NHccCdauAyLn5Jo3wKYb{ zoCy!sfcT)N^KdZxmuUEFa*9P>>i z1RSF#w@JON6fOAyTJj6II z8-T%=DsPQt3E4}Z_@=ZTP5Z$q%dyoZKFbKxJ;bl52c2S%D=pXcLVFR$H|P`XU_YEi zIk$pYPO4W^(guci2tBR8+_7Fab#Vssz4LPm?6`Xa2CP)nU|K%f@TKz|n;**BVYlMT z)!qu+&94&J&YaD`?#2k8fN7Vu&bRRs0(XKna7NUNrhM5^I zF8VurYtR$$L_{7k8%PvcDB8@&eG=Q(lEa-JuDLU!VPn0tlN8ollb%->%dtJZ{;~Xd;rfQ zg#w$L6aQsO%qzD*+HwBxHtT>2A%Oo9Q?-aOqbl?1sFB^jUo#Y&e%G4Ccqa57tq=yH&-~!$_2&+ zJE&Y6B8=?SvpeqRxtx_1d<>N(WO*elw|K(JHHv0M`?D7|@6|OROKmUfm7!3n_ zFZ_H!evH1Ol@k2)Wkp6&-6dKu31y^yU>D;DTkdT7~dnY<_KrmA-ld_FA+Hd=Tk zT$<*d>aWOK8+*swy$caJ_tH1Fn16K=z8Ixy`v)z20MNoil;{|W^<)j6l7}&M`8OoQ z6Xh2RT+ygY?5470G$On0=0rjgsJ?Uz{H!~^imYiN$OukXMN&8PoifvYUX$+2t`qtRG8iglp>6H1d(I zftbwm-xn=%GIxJcd}gOB?mD9$o*|2ou=!D|QoFkEIcgdYXn9n5Gj$4fZM{`Q*OuqL8dywTyh@{M;+fE|7!dmTAzFq>b#n#-{&%U<#ednU=X zxz!{;#!YR&PBc%+vzj+Rk#4%8j3mhT#*+>OJH#7l3sfP=1%(iI;`~*ZyokC=5f5DL zguD&Nv~QZF{=WJ{V6k5wEfFD4WXpX@sn1QEFT-YI7O{O>u6)Pu_J9||lk#I)40+cq z&JHa#Yp}|bLi*+t%J7|7RjQ7{#p5tt8_tk!X?Hi9fsc6BkFtq+^sYcyB-^$+f}=#he;~lue;6}` zy!bL;*6ZH7yLcc+!4Hu=+y=L9RSErYnpVW`5F+Y}qfFU@yWpI9z-5Cm58`r^k3`;| zp2fXeNiVbY4S#;1-O*eTWOVC9y^4B%H)5x>PZnEM6^U1O;nOR3?Z5@N&g3kaDab6d zeR4kMPn9`g;+bqQYJear=AGAU|6nBME!dxR`%A}7&NxbWqALQjd7kC1UVV6{VB(~j z7dHxOAH1;`Fl)I&=@N4PzTD20`JcC6eWr-sZE=ZMj>~lX?iQAk)!F1_ZKKOlSZP$F zwX8_}X?Edy>d1$=1z5cc2|*A$eSFisF{(bVTk}Stb?2Id1SXjRx5$0{o33P&S;-?M zE5`1v&wi2&;M^)=Gx~K=Vip>HZ2NqvR-peV580c4VZe!1E2!tivU5(4mMUdGxCaw2 zP8EZD&{3iKs{mbkpo2cgCmIjizg~n?LnzN5)AlC{X< zr@Y`9h%2abOsklc(SJkhbcmu}yDDRktq zT1sGtRsDNo8N#urCRYHa`z90EyKLF|urg|yRHfQRsdMOSn`KU5H-({PeMa?7V0`xn zI!RlS&u(tazTIT>KYJU?@`aJjhAKx%VPFCWBUDUONGD-CBzg` zcmFbRn8c&S58~*@6VrsLr?*qMS>?I)r5S(DfUHB*}P}ediRSKlc9|Nj4>E!1#|iSA&j| zdN|qee5*3U9$Mxis|n^c_vd#iy-22JEy(Pd=me^p=u&dlW^Z0NoeLaH^v}9a%|$J_ z2Fh_OTo1aKlP0+365nT>wTa!7W{>Cjz@Ax|T$UY(Wu8uCw;_U?Vr-XUC3;8bMKMCA z`ghi2gj4sEezLnerIixGsZ1136FMJkYJ7lDE@refa+d_KCF+-Z%XBHt<-%*ej+jEJgrmg$m z%hQ)Mwq}AkT6wf&eM;TG+0-wNJ=6)*oOQ<$>T9<7b(^o|r0_pRTKq8biPC$!*rJn*O<*JQD~FV!b-}Gpeu^!o6GgIG3mQM^vQ5^ zh?~2jyQDI@W-i$K;;65Pk*$p6{Nxruuts?mqxK8k$ux(DU4RShudPE(hFSEZ`Wn}niLekXp zK02ePmZDBjwkc>CvOtl);y~=8=qBaEM|2=;T1T{AGd3njoqkcA>($EYUrmC1gQOaX z3B)_x{qA08hBv4Uy-;HvR1_h?GnR5kxBB_B*916RJV;i4sr|GJ_NaQuU;V}c7^qRc zYs+Eru+@dC!~FV7cRtj|dIUWmamz>58GlTs1}u0>dxlC+EefN6dr2dE8>1k5u$fZc z`ZQ=#kA^TR?UYw-`{6{5%FkiO(n|&S>*Lt#L3&{nIHn6?C68aq<$Ea)*nU2LS0cnz zuS#~Ll^FBTF0Aw`s|!3vM5RRSmW@PQu{UL9VxPTQ;9T0~yUJ#-E^e~9c&2YJUH_*S zKud;=z}>@Sfv(7M<6-ZcgaN_QTcp_UCDtz7<`T$mn*7ze)qP~kznNXoDbKf^S(qO zs>00G(kYnH52Fcm399df>9?qM9aT+zmy3H_EZrTl)0FABw9qB4fk$2K6~`;l4J5xb zx;bqp!2PZ{G@ILcUo=Mg*#adeJix8p4tF6Sj%PqlQ12X{S_IZzF6|UoAZ+ z{IWo)%ayqy8k{<2q@sXYQFOrFOIArfNO_Q_)DAUL-zhULPR9}3Ehs^dgAe{86ro{uAkR1IhQWB#%IWakE* z0&G1p=x-G49A$bC>%(J$k8WYjsabFxAM&jr&WD>xq63f{YAd_jg5it`~ia^oVr1>pl8d=Nijg(x*LDy7Xs= z2B7`)z+E{sVdP{g4PecZZtHblqax1f`NdzUpUoli1P~(k#oF9T3+}I^{?&KmMCyaP z`B#SOT}EYc+G>Hj0%tqUV>jpm2sHe!B^PHs`VjXZcq|*vS`PT4H@18uXls0mXEG+& z4`!Zp+;E@!94#2~bcX7CqQFCT#dL1gOYca%TBY77Cq}f0(#nNCd@22Qn2~W50sqD# z$C}e3q`uHC-n&`}8r zJE8Pi?^EP5Q~CxbFF!;CD|D9N43E^=i!qr9>wbA^xGn`#{Ug#+yr2PZaFgnj$dO+{ z8KEn|VRhF}pB*J-XVtxYOt)UUUcXhdrI_4IcCSTspg9X}q0169GtN7tsSqNd`ADyv z`p~fJCjkNVs@knFvH%uDNNjw)5>50bORlbT|J{+6Lv^*pUrreA>+U@C3N*6R@R$3o zc@rzFyb^JTA8!W!bW*=sH2C<2Q38k8b8M^!CMd%E@fVQcIeQMJiNPn%ypu$EEgm-7 z-^%_Ixm#ZNI;DIhzgS#X6d)GZusL`apLw6l5)%ezkqCGsLfW3rzDkU5;vKLFb*LvP z?I&Aa-)Mv}@NFq3QaxMP$>bfd;~65GIIkpj&Fa$^VP=QwnMh@#aO*8#P*fCh)8`8t z8Q;|1&_N!REwd+0WP;Y_F{$BFOJvh`#7*XNAN>#qV+D>fG>=2rNBr;n8h8IHH~`sl5JJIqWGPur3koXd8zf9TZ-7qe3okN%6ebz%*3W7V9UA18ZKew zLkHw`ZdB8+&zTN_?X52Ot+MV}t^kUHOI8rAbziP?M;>7z;}>=E%%m^Vap-&2?c~zR zKAY#0-+lwPjx7rTy=jS$o7(e!*Qp=46u!*e4taaJg)YJoZ+~f{s+p)Zr_-8$R{%2a zE$RRlC2EZ6dU@9!t$qeVX_w&|_xsTzthKKWs@YRVK-9gI-4kf?R<<7z7^Z|FzSQaI zSCmt*<`%fO%w3f2D>B+38w;c#tt@ntcL3Gpm1O>ELW!#98$fL ze!qCu3yip$c(vMPn%U&39G3GnjHZWnd&>;{u`B~4x9p)yHx$E4A_SRTC(dL)@cgcX zE0DbU-HlW)Z1)Q=7!k(6iiPrnMf&zOR$^Uy*tN$XpwA`>ba`t5>W`^IQxfM9y(!%B zycrE5cL6Wu`A2pzJj9K&)>mYe!+)Ikz)!=Fkd+YsLd)n~vxr>^qMHa=+@?OECCuJ1 ziEs(wC{ySj`Q_)rpEulF+Q7KTbxQEYTFd}=vtC{<4B=71co#Rifa@Zhw(Ol`htOv} zlP9y2HM4bEHBeXn%38R#tB!T4?D$6lQo;9bkvwnfeUBAV`QkYMOKEhh`4C>F7Vi`y z_^NmI>{2%kw~3Ru%`c5&_wQ|u0+HJ@kd@YPzmYbL4CNcGW%u>ct8Q-&GJ0y>@dWaW zdqj2((X~n2yswn{!NQ55#fbwufgo=+O%&5_2=gtRTp%1tejI7L20W8{)3LUV$|l-F z)>9WxHRfz_6SXT=rH)SqzX+81#H09Q(`&nu%RkB=vMpcTd0H?EL6eKVEb6%1{P4Ie zV>t9&&~&YM}ZeUiinMbM_yu;tf8C5iY(L5de2f1B_ktS zWY_e;P_otB32@x#I3N|>E}7VTyRE{sHTjy)lc+p4{n>DcURZCI1lLuA&)i}E@|ZCk zA>&3j8hwr=pAICUKkrI3_jV7(e>;UVPFcWiq{a7GQ>(}0D>a8qG^=Y&GpldYh)19O z#FfduxE_yjr&9$aQWE*d71W0w>K7LO1FTGbvB$%#1F>UZR!brnv6>t?A`|p zMEgRm3?V5R!IHSH1N?Ad(9Lt=L?dGZ1X%x5h4G(>Pt5{3zCZ)(+qV0_$*07;up4)l z%lDLM3W*1Ci0JkyFY$_w0ZqV5ZhE=1XLqQeWFK>HVs(Y7EGWOC$ey8f)6JMH4ONXi zthqqin8M5C493oDLj`aO-Z+4x6**td$S(I5F~pygLX=%ZRSQdJVJUlY@2~5IVFCqK zJWnYLsJe1z6X^mHU1FZ>hz@|{jRO){5}hyPqGc+-*R~BAgAbLOP-r{}lHi_5YhYc_K)Z&1w%!7%W_Y;e-j+_p%{vVCQBw+PkG@!%9=e&|T`5 zOP0BNg-CT9jHg<^M$!=aLdMLyw8eM?gWx}Li3vjItv%C(3IgxusTovR zgx9}z?=C67htDE98^$@L@7$jQDX?UlRuA4YMbzj&p-z{l6@gFw#j@QO1FBSp@HURO zAf6?@d^W6Z>T}a=93TgL=`J;T20I3>O)kLfLY4H#ZY!LWb{GARe{3isb4%8@7T~)? zk<0HkDxH8kN-bbr$m>-s3in3i-EddKjAkR?Bd@8__T>`Fr)s? zXyx5MVrZ#7qLKYE@mzG;%*SG#4Okn`FgPl*bYA7>j2VY;3amuX_Qm4tuvfB7D0UX8wOO?|8e#A$)6W`7=A#`rG+T6 zcnA zKRX)PtSbKN`8=m7SS5(=kfdWAbXV@fc}(V?kBX)^OOwuGUPl?*F~lj`HsbW>H5?#Q z(BsNQ_FAUsSpW6Clp}@qbDaoZ*k8vl|8Gv=|LM$G) z(w{Sf3AwwN(E2C&WBFz}(VwQowT7Mz%I-}kLzVD?!EV4r7%I{z*|L2?XUwOO2&V3VvmdR_lA=W$U z7Pk%Vfh%z8s|BVToe<~GEnoj+NwZd--tEqBacaBQlU}Y+9r~+g{Z{N6Ff{5eLTmK^ zweAlaV700?1iVUoAZu9u!=j(lJGu+92_xI>evQpnxK<$F%Dr5{y7qBYJYY$@J{i)(;H*(=rp zuzdzM^P@s}*Zl}@7QX}rd||*bXc<$=J4!+T5=V(DphPp9lK^Z$TDiAqMcAmJHAPl# zfbTb_RP4qV^XNZsv~}u)V<+cYsGTPq(*j@=>y^OfsEOC4`mdVC|7xKh$bnabMn(Wd zE&;BO8pxDwJ7E`CQM(|hL+u4RQ1Zci8R7S9e%`JEFjRB5BCz+@DAM=+b>N}=wDN;J zfQM=ed=V3$dNcF#ZBib2Ni}Oz5&)A`luN?L6@` z5Nx$;1mcg5{XSj)eKjfPQu}WRNulH1FFOu_dMkm&P#5d#hrDa7(bv(9ixoYhZzrOt z&w~HYek0Zy5J;+1U*p)|vk*Ufxv*+tDv|f9d8kx3}g z0XO|RnE5RhCT>IqF!%zu#;#Rx&D#RChYRhG@t832T`gBMiQrdYWZyF#R1^%B<#NVp z!=K#^;Jp#1Em|Hx5x6J$tn=@Xt^S;gANZhE=3`B*1^kimVxJQ$SCG$qWl2Lc7XqHe z8aV6y&_8X0?#8{JcLo@dT5w_rdF+*jdv$RFgG+p~(bJ~>#$vBkt8vJau%Q9G`DMqm zO~4To4w+k;Ndmr%lv|2H)uguvt5IiF^iMflyV}XC@IJox`#36iqGe>Hp2*XD5qK6K5c*h6o=bmLk2VN5ars@;9LBO)L$ z|K0U`8hRJsBf9Pk`0I+xSxc?^G_(-MtYb!lnh;7`)&Llyd%y^3V3`v;2F~Qz z6U=*?_Pg-~T?BSn5IpFM{opuA#iPd?AS>A)<)soe518!b% zp&@OQK^p(DA6 zIWN9hXY`Ez_ZQ$E1M^(kaP$ES8(s%;dU1yX9)*eaNg6y7qUETiy>l&!4t=mzlTv!5 z304Ik%=POSysma*qVy?6w zF}1WZs)v}4JwL!>YEMIK`8jBI&|u;xPc1AiNtA2ap^{@`%gQq#w)oFm<}uKcQq zWAZBDHKfCYy>pibQ&&2KPb12D0@CLU| zxEH{Wclrccy8G3N1^qs1_)W7dhwd*q#lI-%5N_usEl?sG&5Sg8h?bj68?7B3J@^+s(H60FHE96p_&4un(q0Q=tAcGX1Q zyW6(0k{g1E6|~2VS(2w?OP`C=#+Q9&gfFdo&9sN^AuQ4OB(gO8#`_Bg-Jamw6Rv;e zq1OD2QvWRfbgTuW0vy=HP#t`xP!VDjJ;&=aWk=X9>N&OAN%Po*!;L8lDTxwtIA%<1 z+qg}1;GLjXyC0}o3kFzF4(d>1X+@W=&0-|>Jr%@xZclLuh~OQDTb(<=Pz#ep2_K@;p_N3V5OKmonY2+--|dr z#E>WrK~?)RGo%bfO|bFm;;!jn3*7KkYW!w@uPXwMPBulDl$1-a@{n=a>=Z(o>)!cG zcI9lF+<|!lOK${ZB=Z;)e(VvNjH*@0&XM<;x0s>~Q8`5$r;tgtsMt&sJHA@by*sK& zh8dn)u7NuQERfmLojgfXe25rJi6C(o=1%Hw$QrZj3N!sB#(gXS`^3V=+cwx$hKa_$ zRdAe&EJ>|zt_4y(!GQE9nG>JW2ERqm2NE~Ci>C+{Q~k4O$Rl<;f&Bv87OV{=TW0`# z1jQ!=Oed}^$HKn}E7D|=&P-E=yBl(%2J}&9GfngmC-Z_0i}m}hTxRQar4|B&n=)G% zB`!G41l#7D%Y-vIG4?xdVtE)MJ3oYM8zp_ZCgTf`wPOrK) zoa}b4S*CZ<0sAD2`MRfK7tFX$B<)~AJv0s-FqXxoWnkJok2>qU`!hRQ#S!s}|DTEP zf68e7;z#w*zXBsaFV~20_^7=@c)NO!z9C4mM<+ortZmYRDc`}gKdo`&$qlB>l*yn|MJ zmZVulpgAs4@U(qk%`L-wA0D#pIhAogr#$gwZ&I=sg2~^6d_^XrImMJlHygEOmBk1H zEG>zb2i;&kx4rQfls7(fyD|0hpfaQf2wUMF1Q9(L*rj4|4R^m%9^@Kvw)^1SRGJ-H*?(N&LPazzU>L?l=1IP9nNNJ+371J-=INtxUzockzI5k~xc-zZ zpBz%Jbd#O(_3bnW_~MpGz1PHWS)B0g31q4on4nf{e`ybnXPTJo5(1kOk|PJ(s%oyo6EoZkBJ?A(uUIcrt2u`?kJn zh=K?49a0ooz%OC}1Cah~m-oyeF)hR=E)RVS6jutP5_~0gE@!wJq0_J;E5FpDWRvG<_1G}P;6^hb4fK0ZbcKXKhJX|HolpTLVsZ1%`i5`D zg|Eg^86>YCgzHVhiD$(v`A_kz?cR|^St4@A#)zm_u=hNnds_lUS}+PVuJWS@lxr1= z>#0xazER4E-FiN|D%OK0iSrC=CTU=uA>np<%$ek1m{m79z}BsF~xCVW-J2E|Sobbd>p7>jak=OO0j ze|%yK=w*ivbK-J`U~VCS<|g3;aF>i-G?u!*s6G*~6G-GALQU8Kdo)^;K*ghSJ#81L zha>L(n=`_H7i_dZbtvOG3sQ+w*r3HL*uyr=Avg7~rC$=jiGay={b>KatT2&JEL|T+ z2wFw2A0OZ;M0wu+WSe?_42v-`>>|6|#$A)C!u{mZB|GbZw2hMCIPR@T=OhH&`iB{} zd(<&^)HNR(P>JVNZZZGfeP8`tGH$vy1v`Nf%0Ub(@B7^Wcx$QkljTrTnywxq=? zbAYUA9MMBAWTzY|roNSwi(&LFU9%HO8}l0lS>sK)Zf`5JluwsqSe9gu8x(7VVw!3* zvN_YPUSh@M$H)@DPAtC5F~Jc+|Dhn8^a!RR;v;vIPfs=Jfhx-?0L z_(6|}c2Y6b%ac54sZckn?HOQRek$g7|>k)Wk{I8uLmT?`Qt z9eTWB@k9FNuqC>&*mI6u?M}Ix)K5-}2o$M<-_(&25d}x5CMHT`}TFmzMB2T+H)8=_m@<|2|m{KiN+IU0_18o|fUj2(yo|rw%gu zFv~K0n@M?~+3;yI&yZ9`yg2IOL)?d$-JBqIw7qbF=wls&3iF4l0RqGjM{(zKvxgVQ z>&VCg`gM4KuMPI=?ObQcMyYRq2YLDD7BX~(IutE6*U)E?uwWuPV`Bxfn^Ts4S$#s% z0~7?8!nin?cv6~(vaW))zP)Y(dq=dMU2F+f`eHx=E#1wVsYDHkFU?uQCcu4P-MJtK zBekI@uK4w*#UL-bk?!>fqn#>1oHdwz?icrxU2+39UXra|4~ z07VB5oJ+1)I#=XkmU#}bBPe?7VIi|+r6?Jf)~`2@`7zCV0W`J=)T(hjNI@xaiO0H2 z>k!)^KTsiY?@m~~;{yls>BPtqN8KH8pT`Z8_mWENPjP(OeaNbR-pyBf0@$aHQv@Qq zehti<5YGvx-Gyh95OnT;cWC{>xJVf}FKPxGd5vW6Q1u|WFZ!vLwO;&hGe=;w=UQuf z{qUCAj+9L2i7y0y?5Z8T6}nt*WEeTjPb|uOW0*kvc_)WNz<(X& z-clD;YkrTfd&f_op;SH<3MezkCb6sGCKdzuuWgavz{mdwT40V!_Xq5c8@$^w)DU1< zWtJ?b;tpH4QB?jxEdNQd8Y;%;j>@7{xdMCv?at~(?}NLijJJRDUvx%n7UmY2aCFV6&x+?d5bWX&E{nY z{JIbrne+d01A|&okc-B`&jyCy``81_A@Jz!kH&-RE}8i__uvgca-gvtcw-~a^ z;b3sc7yS~i!h{D*O;R729Cx8_U?kttU|+wMg`B+8in0wBIyS>jGC>*9*!~Mho*!w? zyf#A1_N>=Q4|7dAC%a2-*BE(4gDzE6@-cpBeT~8gh#+uf2Sw zh?aumSN9D9(X^1W>mYgJf8D{Bw*w=ZSyU!YgS^+pPC@1H7pqmdT6>lfy73eIt&v=K zxJH0)k{c2S1;O*4XvqLwv=n5C1%vw;D*kT1wE|T})6`rM-_b{f{?i?_TQebCVg1+~ zZ?`ZtR!s6tKc%1J(i0CX9EJH(#7v!sU2@8OBh_R9W%@ncin9js0lqZ`N(?{SM}C8v5~t%tNVC7g64Jql_Mp&hX`oGi zpz8DAjgzE)L+8{Z>sC>6LNZ!97?W6M&{T$a_v0R>+r~&smi%H9n9NucMN$YLcr=U(Dzw`0*&q9CX6+J zOV*{RuJ+w3|9AKYiMS|Fb%l5ubk_5GR(!ZK#H5ybWO9xJaxz&TKkc}TE)Q0#gu@*+ z_nCdNzmqn9Ow7_`W5RgjKBvH}f`+R=n)muicn+!Lu|4Kmn$!g*v0@An`u?SFIYzgz zor*QE7`3#Bjq-a&xT%lfu-woach|AJo=G5cfd=@qeOo%ZpHnL4DZ_S?@ z70qkdc+qS?;J^!XYLxou(Ngr+UeJ&`nv^p0%%H6@$Ozx!V3IE;aUY%`xrCc<;s62n z3-3=nfj`ZZz98y4c8FvS>ih(LSgYdymxqPuc4*f!W~j1ik^GC zYB@4gpq*p4?9Vk0Fuo=q*4o;af8U{{xcocA@zh1}{tc#!ksQ1}?7;537=p8Xlp#X{z?mAdX}p2C>33uE2S^_kJMnw-;0_;)PFNO+;53yf$YabA| z=wxMws@Wi{1>`L|J&JhHjg@d+EfsJ02lj*3*=Q}=f17su?_yiAKe4TzA<;*YfY+e- zZvBNms|MhS821ud=MMoONZ`I8@r4SbE7)zv>^p=ykOgHuo9Z!ml<6SZcSE3>ru#|Z zVSuk9c!W=oSI#iHUV!qU@%v#K~T14 z&44yv>~~oUlY(znFBe4C9)Zb2&tKv&wRv8+P%}PKg`4H7z7kJM^V2n`A;;6jqzjH} zC<{xIT!ede!wx{ihB6;p&;#*Zc;H58U=0|lLK^vFgx*b8t_8x%V!!V_q9I(Wi8|s% zOI>>Qnv<89Bbdoc$AdVg1h>t)AAqrEI2ysaFDawg-!_!96qS~{=%Tuf$AXQxy;65` z@J^H9V3_*>7JN%A0V{m-01VKp^U`bgyT$HZ=>*y=sB6q3B6iYfK#9!$vMG6?~C zV~d7>gxJ~m0qewc&Sj^4tW-%CT(jUw)^0(5Eo1CyN9z<`2lss_K*#=$nphmFiWF@A3frObjWoCWv@UTLgP!CG*vR{BYw< z^*gx7=Fo7!Pr(+_a4s%IrFn6j6*9qNM+}mf%r>>WCoceGdcM;iYA4U@snZd36Es5led)|+cc|J3G`7lui(cUAC0zt8Y7wLx_!u1 z{Q3TV{Z0S8*&F_bCYwR>bEhyvSAftEUjK#VtX(M^o$TW4)n)%Bd9tXw26a~;)d$qt zTA5FA>Up_|BizD}_R#2=^w()}EQCLGo*=>=Fma^{nmGT_P#F4Ncy(8@A7zz$99hsq zll-S=2T4oT_>0{_bpY}N(g9@nje5O#BN;sG7H+c!Ai&N*yBMWomT3BxjAHv6&1+#h z`=6*ic1VIL>VqtSkRh+ky#e&`E9lIzTu~Lh6MAncv%60d#nMgO06Je-&dL#E?b>8O zO!!>JFyet>LEP91P+5!9v2DlDx(O6$+MS<+Nb&{gkp6Xaq(e(MayIz!nf>x*{| zG4Q8sHibi(I@EN26hnu3);u7Q2QYFKb{IChV@eVRiJgGRwGVz*I~b1emE)j87x%j| zd5!!-QC*YI3%kKRaDq>KbFJy+Ny`)@dyQn{>wfTz@{QXeR3)9z>g!JrVRb=zbm~mZ zpo~6m>xZNY&b$WfLK(FeyJLS*|NQUe(*HwY)lh*PM1_on2|_~aZ{-9^C#aU7%pq&^3c_qOGU%p>pD?;BV3%hB*F zeK7k8gcO}nQ&HPuy>Ni%b#lfFik=AYwbj%sgm#SH{nD(RA+<(dOgo<^+Ib)uDnD zLQ0OQE<+B`q)&Xi&P$nqGnkq|zVex1c9F27`8MmYUvn04H*e5xHVyC?B{=%4MEbu9 zg$#)vK`90i>$grOKH?PpSGW#KqT`Fc0KzvuO=1n8n;pn8;`mqR!TRyJo*pd76Wj+Z z%`m(_P+x8TJi1Rm!?OX7iY_F2V*+f$5SPe;9G+|Knx3%d%KwX9J0gz7@+ z!ru?H2PZ3k&}K5B6`5Cz>8e$WnGRtS>zHyIN10l7H;!4>Pf;79g#AR>0x_k`xoH(UHndBD^DaOoT<*?3Z>@Kac^Fv%D`C+~BrUo?=bpp*}>8=pTc?V*o37JFG? zIQHs%LAS+&Q;$hI@sHD|1_i6O`{X6NBF8S*9ot~LgU?jog?~B00DYS0n3D&T<~f`% zU$I?o6_PtaKr#xT5NI%c{Dq;rzWg7xm6Jz?1F%2E_QXhSx{^3u$H$s?Vf5bn(@?O` z7IlPXIGu{qG7f6a1}Jbr*FiP;P>7BU9s`vmJ*ojR&Gd4nfyGDcpti4WxNfwQ!>j)JAlJYKtuxicaf8$qVyY zt!L?aps^ke3$10sClG;__ze7`uJ^&5?a8m<@Va|mV74F`(!De|-UpvvzwjFDJdQ#j zJ?kt4psbw5N;hF`>{suqL;K;_)Y@dc>^B-!_dT}>CaY%>=qwW++e zqSKvsbRKgx7p|GqJuBZ# zjmeN*qdhC}=qmsvesS)99JhE^v{#I$$-C&w5u+pvWfhBi;>sjNpZ62kxf5HytTD~x zCF$hnFD2~-52_!NXpJxv0rzBqcAuw44)i<#K|Vjql*a{7ZlmkTcJ;!;k26@L0=cbQ zCWe40iQIG<6x{-$h$U-1n0^A>yot8Hd!Wmh^aG@i!EeKkop!B9Nl#DKebz003)vf~ z5}$N{R{4S%K)W{cW=3bM#*oXYd@inc*CvBX!6s$*kL-W%`TUMAY|sQGxvE{9RXm0! zH$H33%(_l!KhAlzVz>r;9t+&0EL_-6{)Z4aIDCmA5-i)#*kaGSai$fgS4Dn)=-~Ci z(@eH~gLF4aXdg)IfPO`fGBUJ8NjIPXKhLSYu5TFK1J&S;Qw51v+%)$BtDG64-5I?a z7KbTZ0F=9Z`=^_cBxVA<1oI=$D;jd`AEzR~Fs>Sl*FK5h7y!Ypkjh@L<5jty)8D%N zseVY7KAJxidNth)w&kWMUOs8q@7bOJwQ1%D>uNn+IFH{}P$tc=Ek`z*u6gyBB>L<^ z-@03=waQ+=;epTxQ-!rUBSUtR3DEp7w>%8}J+Obu6d|?_x;1eDHh=_+`3fcO+B(Hw zB*&Z7}nm)Ubs;YH&1cEi~u<~4jn$Juzo4e=NRN}NZn*B@=|&PygN%Uu0t9;JNv zGI`VO@R5sWK4^aUgddPpph!9q=n6#%SbCB;_4RW=RcfLimIMTZaf87i44fXwIx_edmCYB+VC{vJ_CwdYn>Xxzh zfl|)Gp>ILqYnj#PY0_^6mlXnreJLXms5y*1%*aDhvTja)tfbcAVt(N%-(L*wQ4|As zhGFUzCV%2QR=LI-4L1t~%cM1#k!`giG(K#El0+)A{B<;h^Z?gQ()`sZoM9u@2%bi1mbXU;MD;A$oQ0$^jMG+H#TAabXQ^Uw7V6JX1_ANdQOSBOaj>CYrq+7>() zfb{iTfirYofa4s7@0_coCJd9W%zwyXngUDrsAb{6d^&tt8FqqOZop2c!g%K*Gf;c#1v~ynBm~s7qCz!OPj$7N5o7*v5D;#^Us$eu#&-}5PBb#y`zY`~$a)LECZPW9 z-$W1)P-28M2!f>2p>(HoNea?2X%J}`FQe`yRaA(h>uXlB%E=heI92k*n>vsqsCs28kof=_}CnTddcRee7Ks9_Myogo< zo~_Vd26D#$J(@M_$;$E=&~dsMZ-WH`-YRxie!?Zo_n@1PWVd4h3@lVT zeE@M_^C zS^OD+-+&yYjYWWu&i1j42aSiDHo6v};Ej~udJwT`@lo^}=4h%+pmvh$%mVaMccwZ& z*bV)49bAp&i^QLufMVoLTjig}_exq4KaDOHt7`q`3CuD&GD+e0ri~xEehK)ooMi(D zi#DXfXzdKt&3nv2RD>GNPjYp-;QM!b%idea;fpb~RD|0ia~68KlcB{)X(N{XvQEJs z;#D*zGE{IZV2ahCfrh4 z^+INfBz&WK#R)%jCjDz`_P6pl=whxQYRI7GOYbkOXKHvzqTP;JURd2j75G$L)HP>rBq{dl8rZpGryo_*;*@ z9pL!JY(k7qU{E7S^1F8>A$0f>TnK1c=b(G;mm?$omuyj5)%<^J#9pSOVX~koLBnK$ zt|RrxNP^Ed?pxDlQL)LJtf6MdkRwWt)U(HH=pF%1l#2FRAEAUq%>?kE>&^L1l*en(6P?3U-V7?30*T4R>kEW8&y{MaALOVRx42+w2 zn-_6+{i&|LqHQfMzqaQ+S9=bV>{~Bp!RTxo&5|`#PyI{Q&aa@b9`@N( zqa2GQRo9zvvbrAHa5MAM_9joXV)^LC+uy6rwp(uW~i3Z97`R#Ku@VyK{U1uGGJ-b*8;fu)H z!E04F^voKH9uDYv9dvR3L&{#O+vD55M~mFMga7vRUZbVRV7irdG#^(faFg-(f@%#{ zV{SFYKKy2odM_1nOMgQoWdI4di!_8E_qw%qS zqqkzWY|8_GF#esp7`U)0IW!t-0FD%-JIVeosLN%H=lna+oAeXs%P=De3)DSRimC3+ zhlWibxO2o)GUrLO-`$p`${e^_ekSVpXS+%nBYfc-gxBHY^jdHdy~5j_xztYDP- zNw=2a>vQ@6)R0c|*2PERJFM&oY=<+W6Mo<2rTTT)**ssz0FdKOYjd!D@5iI`+W&zg z>O)+kb;*Qk{(CvZsmZZB@_qqSJ56Bl0N9IHhmyTfp7X%z_%s7f1NlR9J`0v>1~xeF zTcaDSftK3kr#Yuw+GxsYO&e&dFZ30?Rk~>mfaO7FAq{kTXpdQXTb`mvcJdYPiIny_ zn(V6i{zBeYyvch@Z=r~4nYCi82uFJU6eGuX^fL!8mr z1;tgD(2nLS@cR}06YABWts(jD1}**5=P+a6t1+8(cHajT#-ulBqXvCGv)hI^qQL3+ zxy-wH4iK|{wU~otJ)BAf5vr3v@5A!ZHWESLPOEBW#ybhXsRnI=AhMWonGRkKhg&_7 zTo&%;-_v2UX1TFmXNA~4Y*6j-OzB@H^`QbT(?h;CJ0PvRNP2tp|$zBZ$d7@aWwtX||=^iH4`?e;EjFiD})gUqv2@5j{ zi-;f19thI+W_oHMe9fs0_}7Wc$c7`(z!IDy!S z;^;0HtJUxW^t)+uWn6+aS==KJS}p|;@8h|*(+rnauG~i>WhVeq_C>FNTmkmHj;p&e z(4G-aDzdckUq)z0gH47(1u zf;*ev4BaX4?YUDDWj1%4r^gZarm5mo$@$CV=M>~T?K_$fS*3PT`I&p9#nI4A|3?Bn zXmiv^c~z|{43KN9-sjIy&-T$xdz7ZeaxcM5%NXJB%) zO{JT~29b`;AUcimZC`590(BY>x}fNl9|~85nsk!L4U5W?DVCL}Z`l8OCUSrke8VpF zQxJC4lhRYXE+FpRDZ^G&*~>3ByLxD1|3jRVvV)=!EjB1VaI%HP~nXDO7E=Ly=hxz0hg)|5qi zrtSz1+s7TqdZzH$BI~KQODl%-Bn*0Q{I&B*;$_pb$-xltx-w&XZJjgd;5``W^v=7Z zDk{eFg&ePbxo)B5r^GU!Um_a^sqcm3KtHd91Q9J^=B zom@>JQ39CvMZeK*ulmvJFXwWF*6kYA%P8Kz9x;<;0iaJDYAT|tPfc(E{*_({3;3mA$-=c1w{Ms%Z7D9J%0W2`1%T&vu&UnsNF zElSuEy~}RB`piyD?~6FFn!m+`SAG=l+4q>PPaGoj!wofLNk*Z#72#$w+`UM5(Hno< zV*KW%-G?;KH&#E{_p^jXgRX;2ZwA@Tx({yI8S1156QnmR<>}7{UcY(#K~PC0BPNm8 zFn$?f@anJE3pGjTA6*Rr=mNLJr-(R)k1b$9(#;_K7hWE1>Uf3NP2!~{BZ+!oQ2hm* zSB5_Bozw$Gw}&&ZQ$jl9c9!dJA!n9;SK{^|NozVUTV?gzs3EZ;DDVnL`2p@6TD?6_{&~gmI%ui>?C?`F!;5AGtHbbI&bHq4wnT?Tjqbum z`;qyX$f5a==WF~Q%}As*H*dBh)({nQU8(bV<-?EeFJ#fZhF-ix`NRrEu~@|!07Z_< zQ{gtes3V-w-w@Sum!BQMyoUTSM+Xf<-gZCj4OiMas!dFBO@$gef!5`<48m*8-2JT&hIS>l-#_$mFa+piy8AX3)sH6JQ zC9d{$(HbScRQoVn0N_vCuMDktPmkNL%Z^R2~=WfO(R*x%wV{rdEuYb6HJJsLd^}W0ZPi7 z=xKh*%FCzH3stq^yG*p@_b7q?3XQmR(SBw*;`EglkQTY7<%bI6M#+!Q0%TXxcp^mJ z95J04`lYt9=+=E(ediU=2N1XY%gtk_^LK;(s^*tNIYs1Kxyq@FJ?VzT+>`05165BI~ZDg;4X|6KTwcn2ld*w``d0XKKni6RpWF8n!sgBDWX11X8%_ zbew6Ft*;rJ-pI^%C zjA)jX-3P(QLuaP1iS|NugfyWzK@kGLfT~TIRPm}s+sa6P-TcJ(a93RmEOQxzv=xc1 z5XPn>g1k7w^!^vgk@u_+DWHE6OZ&xDMfcm_U%fIE ze^p;oGHCFMiz&L{ov|v3Tr8)uFDjRj#R z8GFwcQq=Ql7DR^ffA+h=e}rFfrDbG#vvrdCV1AsF@WaTvOLNG&Fz>S0ANKTzIwS2> zaZk5Z2d%Ff@JNK|iz1NUBYM-jzRka*H?WUb=eIp(yev1t3VyUpj^^Z;8JtudFqmiQhBxh*DX#D91nz0M1 zsF2oQqUnj{4a%8UmmCz`GsWO8gqCrB)*@yeYk(ZfeyQIKEQu4hFUus9+y`0See?(6 zJX6TXC-bAyk&5sik3JXw#BVPA>1o@-@XR4=6E+KIwN|0WP@+>T=X%+Zr&zsXZfBoL!zQ_)@j1Hxb$5I5r~8*e#1?upgI z*`duOwf`dgof{VKvg$jz%jWmZc|x$ZMkP}#65-;Rd3D5^LkIm^6-)p8BtO#T{J{;| zKgo9Gk|>yFt7WRyA1*wHanqu|&#Tv-0&}S1Q{kp=e51`&mYs;!tfS=%xHKg{TbaU%6Y#M@)>7L(|r^Vgf0;@k(lWdtaVlLy0%J+3B>G{#fQ zP<5&z*%HJT@x?H_cPdiAJK%IA{U8jnsH1qiO0gv(~Bz;^E(TPm$LBM;BsG zP!ZL4H_K$E<>%y)9(L`~I(4qH;omMBW=$Q))Z|h~kDDP0l0K#sUn4>+A3S-ZYZn|` z+nGdtQcCnmV`Dr`FA0y{yR3wYkou_iK6CY@t zxidNca)kt=J&&v8KEGd!cAksS>looj9boYzJR2d{yg$V{gwr0mV@>E=Irn@P>OR+= zM~~?RbS+Al_FOLaduN&Uk}{fIW76xdWB^1$ag||jT7;agA+fpPP>_C^RJgYHysuGl z605$l6{|4(lrpcdUp*k zkTV~0lh<+CJLV4Nru{NH7Bq{ChWIiFX_>vbB&S;GJx4gh)WQqERTqf9>eSPDnz0}0 zT6`!t zoNfz;e(X!`_r(1oRGHyWXoVfz zsTz(7Vd26y)gKkc?$s*;_gtr~$9RyRTKTcTD$-$LGvJBRx~w5pQy(%f}?RZ_tiI!iz79~Irpcdbe-LO50 ze)Y?PyCNgVk4Y;)npil~FG%zuyKhts!H;4V+84w?L{09O&Imd!1+fULiqaPD>a%wLx#VKKH|e;H=?GF3v(j6{|W!$+m?#| za(Qt-i~z^x$70MMVgaIr^_j0rQ0lxfJdPmV@mip%d*(Lw6hu}e$In4>Ud)St)yg^I z;0NMP5X*1;7~)nFDL6A!i68h{S-(3Qna^ z_>I%vw~j~Q_cs7wr2Is}=D;6UB?2$yxADw(&wJkUO~ZS=Z`jGc8?ZBOu;}(exFH}L zdp~CeC(MDSFNfFBa?dy=)8N-UR48=l1TDIsH!?xE3zW=m@gY?t0hD(LKiSpSCLBO{ zA+(zUSnhc7Uo#1af_O(JTl7xTt2dOh6ijhoC(yYYj%6%amJCy=$JKXzmo*@z`*nlm zHg+2(UZ>JHt5csiP1$mAPgQ;)R#E=~_%;T%f%cka-y@qX)u`he8qDjGZxD#BBVrSM zD%BK8dOJ3fl7&IYR_94Q>Vo+t(kwlJ@8=JX?C|qyYnU6yS3KEEvV)nQ-~v-zJ?5|j zb6Zt2O=cyrj_)cP8&N$!InA3(#G&1MBQcjteOF&w@Qq()b=Rv9ch^fU{mF(PDx^~4 z&d16{lg;%Z?$k&4i<{^hYq`0gzguhJ=h`vWoNum3wO0WO>b($e{FMo3k;*FWnqXH(cgC-%>xr_V5=d`XfmuDTZwl-r)Xc|I+x(18jeMQ*{^d&!^d32 z)F$l2*o*iNMLcK78E0-=D(l%NHi0YKy_eO(yJ4M7VnwBhPSqywd)G;YVP`D3B>uoR zAV#9sg%hOl^)=pSY>dmO(I+gMIlNO$em%@kSsU3=&*=PIJ1aX3G@fi4ymzS@TL4}l zXZXKK+jR88c0txcSjD(^&2dYi{ICcvp56!Di`sD7sGPDY^~uK1{`x6>LyZG2oQPo` z37(wt^&2`NCq;wzt`(~S2c1uxNt5{%WXOm6Q=jdbOsbM+ba1`2;6ncOHgfc>Wk zd#TYzurzTWDCo3$hnNK_-d;Ov@8-32ee$ae+Ilx4Ru;?ofL~Re1SJusq;NFiGM9y_ z1(#Ar<@?N|wbt-cvOJw*z5c#2!Ab~}Ip<6-Y3Pgc%Z;eXj#s#g)v)iEg7jv>W{+-M zT6-QJHm`T2bCkaMYlq@gIV!1m1T_<&IAg99<->DIV>5pf^oBiv4bPwm_JV5yamr>% za@e{aR`HeRb^KMm+DGq)6izH4op;(}z%TuqjBg33ioIibvJZB|{-&y(GHyw3Ds16C zliMqYwq?#qlx*t`Wk#&3yKu)S&zxjYW^~-RfIa!~aW1}A?@ZZ0<%LV=cbE4N2w5nR zpU&j(XT(PLAcRdCIV1w;sE3#JhWNCWINTc%k8RgUP&(K{3t+0*eSa9?v4}y{CrBV< z8YcAk$=>S#Q&VXf77GcTqPEm=3?h?;a^B7w=@{C(M7Cf3qO~M-Er*S`JC_`rgX3Dt zGK-a@vA=paE4BYVkr=^?8jImD*D9sJCdy)lmf;er=mzCPNG^)L&9WY%^qMA*v9r${ z5yD|YcKC)$g-;>mlWbQ-eQiw(pPo!rCQyu$Ki~K%um1HF|22 zp3*n6;bCaQl^^T{&GbsR$N{nQ59=USI$@64uw2_+GifXA}3G0NrL=52E%KDY16j`Uvy5ob*;8! z3H7C8?`n{z2t?$eoVAX!Rx^8BVA63X-|ym4kZ6=!CyA%T@JL1WvQV8Emfl}uq@w>W zZ3*eg13yDsT1f;m%Aj1&#L$B?B*7_gi`YAr0V9C3yr}0ZZ82PlDfZ*se9G~wISh2N zu2H|4jRN$I#=oGmtkl(%Q+qWVd$!)Nb*Wr9-+I$mIbl|eaagtW_dv%ASEif=Z1M}yKF+= zLowt=)su_$@)ldC*BD|g`fr{sBh09-JTKC*5LK1%47vX1L?79eF8O?S%pV5VIIJ?H zdO04t!C4k@!*9c`=!>dQ?+g8*B;ytRVo!WdSql{~q4;Vz)XFfWOwUDs?zAxmC1cco zxR@R|nDGbf8IvrA?MvIf+I8R=gyx$!?VCP3MvO2n``=DWG;|H{v=Nv!tEU3?&C!;D zH|3+bWi_lUwjnHAF|;kghEI%HgC2Na7M7UPKRpihdaWbD_a^$=V*G`=$QXN=DC^J+ z8OEupQ7smuhe9D4UQ@DOa#_RigNI&Y^v8fMV?njbb!ug5p%YcLJ3Pc-IIIx`=WKa= zkbI~_Ve9;f#zAI3L_`|dMv4)HIc*Pwh3gOQlu?B~qE%G=#2G>xLUbMY*_ohR5yz+f zCBW!ZI==81`TXQb$<-pm>zirgmJzM(!Sz0+lCs^vJ|;fZYfCOc7)kmRD`(lP@VutKrnPAo0tcU>gJ~l;oxd7R9A5bR19Gxxq}U_scF8`a z$3{^Yi){*LSxB|&EwS1>K#aRsDX#hwk;wg{;)Ap9Q5P)} z&!DBbal!hS+^;Oj!2K-TjNG4>ZVb)XAH;4`uUNE(!*9qQn!oEqxW}H(ubh{xkutL< zWj2SZk~n+F8Ik80Qy=A3=c_C6Ghit5Xvrr1>XD~im*jCq}D{ZFyFc)d1*Z zlwWwc279H7si_}fa5zqRSDat|e1wb1)R6QQDN06q6LzEVioF}wQr%48!;j2iubG}w zu;S)Q2XP>6tC9x36!Onkq8p=GP^S`bHtU~FZ-}ULTeF0qB4yMAjLz7LCzpN~nb-_9 zzYS_CWN*Y2d8gD?C1GW7Xv&S%{W@^WHGC^xtSlBqv5gINc&^?P5i)IIsGQQef`9*P z>b6aDU~Zr^Q=n@acwyD?8bSDChPt0u^(F60%ZItFN&m&h`!@HUm^CNiUnz5&c7g+=!aRPHW%v0f?b z7LAsB%QN6jx6)sCwMCL`U}j3k-smmYq<=$lsFV-EUxW+^F?3>)#N;K^sV&0=8cAEy zaVMuQ4N_{#;uOmiSKiOU*F$S*xUp20Oc7B(cF-V~&NS-(F>@iBtN#^|>j?ayM9xQPICPv8)ptCkHeaiS zW0hlc2$z;=ybhf2Ya)BrQ&p(Mx?Hu* z?&x!;^>(De^0T!3*;uLZd8`ZEyEUAEUbn9n$@r)8asEzCuSZ33L+(bheDDGoB_C#G z4aLY`R%zj9Qisfso2y@wv-Jds z8YCJ|=O{?FyF*$x_-ML)R3ES+135T|Zo*)Z^sJM4U~y>c9A#XxuXUQMl~k)qpY@U@ ziz`=8E@gOLHjHOKH%h(r+rW4BaD8u=@Zb1`rhNhihDgDkFt;(^cwWYhd}9&GHu0_~ z&CivNgmgNes;sE!N+l*xtSP+h(!+O*e2o>g58Ho{q3BkF8B;bCkxh2&?WW^3mR=S= zo*gyh__>DDZkj0B&rJT7U8C}O#U^CkRVEZ}kn-HDqY-6=CvB}s(`4nBDhG&H+8 zH!=+AsiQlxfE)}L*x_+*j9dsj)WLD=!x`#PC&wTj5JslhO>;h1?B+X66cC@*mf%wA zLQEYE3bHGs_B-%~V!HC0zo7h70@G)Gz)e=VtVc&9P`}Cf?DS2*BT(1Z4A+biG}6$BAmN;^Cprrc zWi|!$hUVJ!reEoA$xSa80fv|}5tJWM1HaBqk8VmXscO=7_8S7ZjPN?_AFYiHp*dm`}h5m(rfx1v%n4T{%|EFZ;SY8TPa22|ETlHF%B2iVaFoVK&r*b zhB=gDyMK_1KVQDgf4S4EmaQC){FqM&xa3!tsaR4c!Gd%+p{V#TO2HdUbJPc6*Qcn5; zE}XIXbo|dDS|{C;^c^ynGIdk!FA-J5pG@~oJ_ld^q7TC+K9CG}cJGVZN!IhcB;zM? z?M@1j(GI02mG%l*fclBsHIHoNi{FWW8+$sMfd%E88(CyM;Jmx~v(p3QJp zN3Zv&eB<19ck8%LyV30^?yz!0PXbvG%ufC4q-nql zET;zD$80euRRT6oSmH5T8W9CrR#LIX-31|Q)M-et9w+MWU!uJ zt)G@gU5sIOXQ9lde!nv&?ugFZ9@bTl0)U-T^G<5n=&y&>0-(k& zn+>OVv{MK}Tk(U*c`$S5`w(SdSh-KXk>K7J@0cL)5G$LCv>WqT4(AwEP|u(J=W;{Z zl*ZZ@bF~9N$4z(>UgnXV%0*1-MAT(})Ld#z9cD_(Z6z-`FJ~(BDou|bH*QS#HnS}N zCck=>7k3T{`xw#5JaQZ6Bia=r$`pN0CgPZbkaNjuuy$X)cvtXM);{bZ-mxb<*bL{l zfg39^S6HVNSH8@R2g+zfAJj`Tq~mEpOvvN!JbFSdAa{Y~4sw)j-dQ zp;fwob;1GqTQI+^kdqOSFuNicp>ppDeN=cD0-~xlB;QsK?wXX6uNIoOH6A*HNM*h0 zwjF_{;#Y~k^Ses~s77Uk%CdPxtmhl-M=j_h7`L*_f)_L!7H{|_hvkHN| zWe~MCP*P9RPU9?-`OE5+XIPVoH6IYbL57cGLbVGORa1TifXPLW74h{%ke_7!J8FBGIc|*L9NbpU$R{H~P8wSzq1=612J-+TDEkGv5Prgm z)vYm<4Mxg|JGOG<`AX_%Bn)kAKfbD~)^T=>iHCWYBu=-*F6ejl#AFApxEG-8ic^N`!Ro31VMVRd zB?RZ;f(4|2pbq_{uan&A(oOYQnaX!DS6e~ex01wtKfy~Aw}Z0^e2@^yp08At<5To) z#+b#=ESoLHG6@%T2-D3^O4~#-MFQ1`xFK`0Ew6O&&FcxTTKvzQm3;@|h1cX={D zRS$D_jcpx4+!7;q7;e=8)p}cXObIZ3M|cjmE7`Mtpks&a+#|V$%7)b0oCcBG81r4s zxukw=!KAVY5XUS!RA}l<5NfwhX(i4ZK6XvJA(mHXdC{luhQ@60V`N6Baw&b|z_-!i z>y+_i8M6)g=5Z9>rS=n334u1hUWOPRYetYawji3#y7~ZsxvwctPDM7nL3w`$?MUx- zON{2s>8Y1rayCajVE1ziiFJ!%oP^{F_>7*Gb%#Drg^E;oMplnuJlGF6$q8vp8U;*D zw<1$W0g<5O%5Qh|`@3j4O&!rryV05-t<0v#OS5E?O4Ao@{Hr4Iu7|MB{)J!x3xUuVmE9>tDFi{-S4J>bJmsk zfm$>Xqwm$fvx|wRuf78IifrF_!nBTR zxOY#^r!3OaD+eb`^})jPH{)dOq4OqWQuEZlX|SwnwXkaS4zD*(-LY?ETA4gJt-&i` zKbU+vKE}yr?ls|E8iC$&N@>+^x#YBHq6ci8FGP9AMXe3hcXdqZP^yMzBAI8+Yr+?) z5eM&O07eOHIceAxCtnEofT19CNNjD$d&ZGgA%D=`7E*Bb%|%}X7JF)RJRIBeVVy-g zf1NI9Yuy3rlkFk;91K8GWyg)qvde&^FuAUd*1$@qRd>jFEzNcHO^tIxWz^&9o8%kL zpD%?W82>qp8|t^MI-D$xBt|+t< z5sxxYiGf|)u1cn9kvKPMigd3!fS0=kvh)(Dl~G2pDeAb!E*TCfTLj+-+@+ry!Kx5i{jA5YDvROYXuUCMa8an_4Sc8G^ECs2=Z~9Hq*Xk25l1aQ5-ufied+g|&#jzN3-bv)~ z|KUy0>mgv{{J1rOBC#lCqX?})=zH2UF+L!pf1N3whSCRIs9vqjW}EquEvpu#$)S|8 zs$RVbf7{8%oXkS(i-rw|^bHuYr$9eyS+m)0cj$`=%Xl(f_Q<0Kt_vcDX#U#HD2%oy z3!wLKTy}{Afs%(3B$p9o4wD-)dZjTL>MkZ8kH$U327(ie!rRyPZFf(GBdYJ)jztKO zWtSmBc?2q=Gyc`Xg%r}WW5U>JqGz_FF#Dj661aT ztXgCzHDcA#RI41aQ7n^k{faan9Mxa7=+v0wC%_-b`noRSM-+Wm;x$(i$^bN~karb^ zM76AGZ(2ta_AVjw&(qZc-tVjwTcT3S^K&SVE!2ukq^Mf7%&65gKF(LcnXW1z>tQZ4 z5#9&vwq+5wo;wokns!cxJr1f2lv|1um|O{Sf3ehWndVxMM9{_JmHGQ2mg}V&m1N3O zV~T+%M&6S&279c_fom$35xL{F<+8fEg-qsipdem`F2)0Hv(Uo`MtV8lrS8^G;?wkf za_#$i>Y&q=t4=~=n{~0uW?&*p6+P>(?R(;wb!#{i=2eDQaP|&nsgoAG&*m1l`WBew zRMPmmX&+KiFl4t)VgIR%)cP2u#V%X*Q(Q6yJ++2C(_DmK1XU>3jb`CK*%-22L!VaO z-)A7`&QSxu_RR72*j-y{!(+*mhw1K)kDJ3_!pcCmK{1*|XEnoGx#&;V!{AGJPf>QW zbfB5ZeWOCyTxoJ%@O-HSHx!h;-(W7j!&2-5kvtacAI`w3J&YDWk)dM8faP}bm3E-;5S+ zzb5;lo4-8o870gfO7^JaSu%=d#fV`tw2Lw?&~gpM(1=&jv=Y!<1og$ykaEY`V;c1e zv4}VRZ}z%yhsYU_khNO37!ElP3cruaXgQs?E^iq`D*AGZ$a%vh4-8+lg*S`UaVn`8 zwv?4+KN*FVHrWk3ZuL2K68YFR{@a4}^1W#W(t0-gXIAA|EiCQgNRDqknDXSh?bw_n zhd5hU*SE>{E)`2uA?(ZLiDk`3=KLU->raP&4sZpg2Ps3(q6XFeqGPv8X_qC^s|W z;YOlvZC6L_ak!xXeQ7wXw2r|ZXCh#sz7m7Xwy1-fODT6fW2OiMM4IS1hf`*6rV<7W z-fdl!G5vx66rM`$clXX>@h=D6a>3-y`0vfK)93_)X8np5vJoP2=|1})KWY;*U|+pP zC+uV6PlSIj!Vry#yYZ;C5jW5)sd8YVy;lHO2^H%n-_xy4SqWt{c;7NRge~b)*>=-P zV<+X5M!CM${$+%j``qHZPx2lv&o>X|fpawqKVB&hy6Q(!i#5ePD2xlsF8iPI0hr2) z_f$%SR;?qVk0*uR5t2R1t-e5eP*IP{0dYDA+un9rl0Qo5ut(^IK$?PpxFx%7>>=+| z-3bGln4D_agYK_=PL?$>E`0(@p|Pi98@DY9W(7 zIIf-BgFxOFyBllA`18kKLfLJRk<45il_PQ!+K|nxewFu*MNzOt?8a}~WY2^WgC3?6 zG88E3`NB)uLLzJ8BPaX+OPp2&;*B{MuU3$S@02b{^K3r+IEd9$!e(1JZB+e|Oo$eR zWvbA_U8$YG(b&@NxpO-*`)fZ_s!(UqETt(vPI9#7Mx9Oqc%une4i<~~R z68JB2TG-?11^uF_7AMo~Si>A5_&ahD3k*ufU9|REa$gx=KpW1JI?>T`)#9*DZ zHt~5z;Y-V@2ZxwHa1je?#5m$bDi4RiF03;o!AC`+anCw8)xg(UeUYOy0`Li8j)8%!heBFkqA`p#z&%>_s0 zp6n5w`AnlV@-P%nmma55-iCti(b40e{N!qyI365& z7!Rhg&@V3fJ@x*wN#~ySDZFd7BOGT>nM>{UEYwWOzm*x!d0Hb6^FI=7PUZ=r zxxnpqaa(}x?4i@WDQ+6fyDGFc82R;93uwkrJ$n7@%SMS{R5qjSY~_!&({;U`9W}TX z@tjH*cjla;R@oq(#wYf_^+4lK#pADqmk~S;c%%3ic?#(HcY1TzomH=gWG1 z%Id^6jNpT3{oiL_ajxpuh4X@0Dz#y~=(;qhTzQy7nN9)a_ot$7^WKyDfvq5Jn)9{`=VTbe*aq=y zwVec0jx}qgKfyGTKM(uPwYewUSV1d?mokL+?$ToKeUv!nT?t}Q8<~fDl_9vY=cS>y zoc zM=Tyk)|xe&T_p**JDrk{Sv*m+hh%ql{kmmn?)9z}emo_?lEoNqBOA^x!+=+|kB{hs z%~i8_r&k!*f0CVzbBDiFTXkXf*9WSj#eQx73T*Q4SDcAfCgQLA3ufi1{1ZD|k-y~6 zxH+=t_7x?=gpKGJMFT$mAEXog{3y35&3VG z_sz*a+S>Oj6iIr^Ls?(-xD%v*-G<>obcf4TADS-b@ZNkU=x8#e*X}2}DHL0?vb3Lv6Q{StDK;Mdwqe4cmc7Bu0EKb7B2f$QgM(*@3za_4U zF_fVB>8W%fUiU%Z-g+?S2QHDVn=!Mw$1Kw0?{yP0TY+xcR8#A+)R6m#E!Fu%ReOXN z?hm1>PNm}mp{GAoqYAyo{F*~qWk}I9>s;KICtNu6vp#=z5|8LNHhJV<2wDwms`0u7 zw030NWTl=}K*Q%#sd1WzJRMH~-lx5%&qxA48Xqq6RSl0b_k0E2OQ^5XzLVq34-0+QWCRF8JLB!L3 zr7%b>Xsjg5pze8AMz*H^9SPckk)U%K)KM9pQB%^B{7j1LhKn*VF3b1PsN6+qlzrR{ z`KZ;pkQyTNZCVnWbSyNy)1DSR2$Y$3D(x?ST#noEdilj^g0}N#I%#S7KZ!eZnSu%5 zoFp~LaLKTP{Pw?%rLb5-`vSK+k!dHJxgCxUe_qAT`ntdCCin z>Zf}xvgdK4YT|`JR(54b>>1`9R3>idaoc($NcxkGCHk3+`4vJ(w9@T7LM+bUQ)dg2 z@u09LWV!3U#gl0i+{x&`YIn|B`(t92FH=}8n%8S0Tn%t}BfgVP)by#gcS_R~%GcB#WuWfY@$69 zX7I-f5^~!v={;Xr5VZI+|BR)T1EsQ*AL&@pY%*lh9VJJ)Ohy)A zOsE^b{s-CgT01D|%>G9)^AOGl!yLSzlkjVcU1svN4!6t_{Q%K0B!}Iw{mzdUweE(5 zQEQnv$-wUEupXB1Yjdp-;Jx_xbf&Mk}yKG|E$XI^n6kI9XZ z1;s;p3o$soI+#@v0ugOs>5x57r!(u{(o`w2qmb1nN!l2TUZeXMJrZsIx3I6R|x z-(wge-D=x=qc|}QUh2#WWt(hD;rMh{fM`s>U%fi}7BS@hV_flRx)oAu27*%Ses6H< zpfK4Mz8w1YI_R@dQ>lMbdcTWK_-@Tr>IXz zdHKTcp5)6&lOW_T(%QKz=UJZT+okxBA8+#4_IXrn%w|>CHLkYvX&e!wTA~dfw5QP2 zWTwyzeJCMVQN3w?RpFp)b&E3f==Q8+w@%Y+jO zV%%U;0ngI1$|+R7ub2C_3r)WbeTuE88R^ryrp?Rz9%PZowd&coK#AK5297_MF?qLt zrATyIXTi9ma77d9$_FD0O!~^eW>kpTdnZ9L17duS0Rew(R3_B@=b@qG%MFi{!_*wq zx~JQH$(;S>t#cd(G4_#W1m8k{$}Hki(EJ@Niqt*#XLzBrhw9UfI*7H*^ga33 zi@vE-&_zKl0w($qMu9f$+E8=WU}JAouLUy zc9wKjj`vfda}1LE)N;^ZUVdeAt_=?-W%94hl324@o&PHf;EjQlu_q^$Ce439+>I2_ zCAcAzf8O8=kcjd@c{_T*({`jY(&c#(?-1NNyrB17K^F?fELU4XBw*cwLsby6ow*eW zcOsxk>B8ZY!OLY%8IRFqr!%>%7ZHkfa3=x` zcgSH6qKNI;*%)~%?i$gE+~V0-EAd3`?m6p$XYcxB#*d2dYxT(&%!kg}B*Lq2=WrM) zs9ps%x7#m!2>m+FVsz3^Y5bCR1!nc&@?dTxML11s=ZLBW6$J>f>~v}~&|`G0X9iT4 z$oM`iEEEzGVBWG>;x*WM%qyj_vKGf#rZ{bM^mIoD3|U&Nx*M(8u8pO(Q9M7M_~6ff z7Ogq6z@nQpCIkAe#hQmhImhF*|1?2`lyCRt#_eSUoJv0n8lUJhdQEQ?t*r65m>dri zURRCro)n#Q_XJ2@`lKnxI%X-RCB@}#H0Bnfl>jv6kIFSJG?q5-I0B72@Prsj^oRLn zv`<}3n(-JpTWWGL%eQ6aJRk!w;tGoxuLdVBRUV@C8;@8&G0x?_tb)w<6rLO*9A-kaSR`7E;2gI^ZT(!VZv)fHg>=f?+h4N(Et$L zm6~ymkiq(^ha*bbw328v@6J1{GkrBI@sEY4O?thY9N*W$Cbmvqxhi-Ri zTmjIJ&z@rmO=~WhBi1n^;2n55>Jo1Ztk8}(OrN$Jv~=r1W@0gmLi~ejKP@>6cMp;Y zJCHH?5pCW^uwR!oX^EM+d-_9o0jr`ZUQ@RhZRyttoBIH1OL7Lenx+$4GE?Lne0!7v zC|S3x5XkA3lp%t9zaNYcpV}5@ofi(ZP;bm%PSgqf%?&+jw?PO>T1D2hGOpy5|2Ud2 z;6XJpB!I$3dLCAbZ@Nhn?YV_Q$tv zy@?TcKUIJJ^Nbs(Oe!^-+(T%YG254q!M3g^d@ceVo?E<4I$(gG8OGF*a{Ygtopo50+26QVQ9?k(V(1bO0qHIkDJ4Xt zltu(ZN*EbhL_lCD0clWDr4git#u>Uhh8ViL<98l*_nW`>z24ow_S$QA-I-^ebIzyE zeY39HXsqjV#%rm(s4hR-@8tck=idb2u|NP0A-5m_I7%({4*~dKi_HZf07vZukdu?S zA$5zB9pIHY9`6nzF(Ky}dP}h`nKE>S?qBn*jhc;=cJy!jfD^oabmbkzm|LfDv*${v zk&fzSz~C+M^-(~wh@l7*WaE1${h}WtHtucolNS4NHpP%0R#j$V;d|vR71uK`=8~37 z2SqwR!8|~$Q`q$-1!>a}Pdb0WA%JP?dL!iaFo(S|{VHMe{QQeU#cL&~3F%>5Y?#Lb z@{c%B3sLSHcszSI3%(F!7w*W}aBO1K!eS-zwC^sm&E#878nVr5_U=%-4f1+ULAyc> z+&hw+b~*;P^Xe{J1Jql_@P%WO_DqsY$=R~W5zEm#Y)JiwEAr0iqyrUg1zjo`0Q5?K z3D#SZ$~B^-t}u{cX!Frc?c~f1;jvXr+AsdoVBt0>U1(GA5+^cN;geBIhMZ8V{4G}a)x@3Shukxg!<#{dHXO`v?=()FN9riKaJxp}g5KOY@&387T>9e`@8uED zWhV|Wbj!LF!+TUi?>B=oOUp#wH0jlMS>n~h<~C7|3QJjf=hTr!SfX1sMUK~y$$p!JdLw2H;FG{Rd@v4a~ zSehhJW?qtcR|OE9K>{4Z5*mKtYVatR>{q(%S+@%+bBEqROMJ4YsS3 z4K6Fit%;gpCqJqG30a+m>bzyZfqm`LSOANJw@L(Zh$wZ8k zU8qAkIr111Ru}t68n`=6H4etZr;RmDY**3#cuBm3dhiYUm$yLPKcbF0qDIi zR#!N7`{m!N%hc^&Wn%;=)xf1nCm;V5YlfXc4jm8#Fdm9X%J)%8x&Q*fz-U@_h zyZB9gmtz|HEGBvOkO41IVtva2ZY5R-g(a(CcL?4k$-}3J-?B90=+Bf zAhbfIkO8~TK#(!M-qENnZLHS_>BL{&0l!~t5nn?3fT54}31?yV_**%poT3JXuakG^L$c!M@pe`y^&)CsQBWNbAjbl z47qiGzCuE{^?wk#0Ro9NE= zAYdV(FP;@1w)KL1p2Wljd(*LIu_lmf+$icU3IT9eBLD*}%CrE`X*?3WtPJSdg!qn4t4y`NrsY6wuB5Gr+=hu&?o!x}(&xRl01WG>o4 zdZ%F${^mz8wEn*n-Mi&K`yKBs{%Ct6j769nHDf>U8r7WJ$nAxdj|WA)gw^X4NpKVZ zkoR+WLI~u#{&l5~fZmmdU+I3E7qc$_QD&_;J;hfSJ`1^(x2uPNCTnIDW)3w={(!Yk zLEl_d?s^UM%`cs8lh{z2W(+Z*wSCwIX~}VSZGm%wUWVn!KuyGbakItOk-y@cg_~ z4DhHU;q2=9fnVBOr}<4XY-JwB0PUlq@MqVE+8jxBXn$thOlnD8@sKrY%N^?3gs@$LHF3qY*m^2WFW2w06j|0gtG z5Bkm&H@5#<9{lG!{C`4F1CP5t8DnRuYZ7~x+?E0&uZ0K7`KoWqn)DnOqwY)czxbnaPz=PgEr(xgk#yY#*UTabx$G+6f9Q0h;@j)&mGbJ_5{)16#v%r>`nc!X*)4i5mu! zx}h2tjp6Lp5>D@2X2eJ5SXC2-dvp(0Dl3OT556>E$V=pGPUUgwGhhiE0UqaRfM1q$ z9QUR`;|;3+^WpwGG5CM`JM;5kYkYME4J-6g#SPqgHE@ee=sVA`6anLL z8T81$&Wkjg(%<@waWaA+A4LG9_M*@g2;PTlh`A-KU!uUyRSK@=WMoKz-{ZVu8^NEn z(|n#j1CR1&rV$#>K1Gp#@!dInLLm?%I&GXHkc%#@nTO{>Td|7>hQ_`aQ246d07U|B z1|ZsxdTgSaq@LUf-lz#O12PzgV5$ADE`UQ7BMAoh0+sfTfGIh|iMWp8Qtch)zw@+a zxuI8$z!jW&WS@h^Z}<)rTZ8|lkB;$oju-&3hAZo5)SM#AfYm%-1iFMc5+uH?%~5gw zv~cnl*QDn@w3gkPADDo@u;0hss^X#UBefsET)RT*WC|8%J#}u|uanNmib)lTwK~p7 z;HMb|l4V!7){c*VS=R-tRKU3?(4~Z{*lV#V1%|0S4iYlx051jVz*{aw0DN8w-kwa3VZeKz zbl=J_C;=o%7V%~B+CV62f^f;!fW5bPA&34DSiS5av+56dn)EV&_b!G^YRf=b-~iTk zaIEPkone5^TmYw~co7g@&wb4rHUMzqQ|vML+CKo46f<0g-O45@Qx+cShIZEu3D9#E z0*>#a+b%`mgWM<8aNkU(F99qyY&nn|JA&4^UBbn!{RM-Qm=i#~D*tid2nbutfPL+H z92W~n_^nHM7&_*JBOLunec*GcH9ys2$T=bYZWnwQd$5D}KXcpa8G+!vQb5!c^npzp zU*`=wUY})l30gT$q+^a7&VF>xVHn!CR^Q>bbtG`) zpz>YXT(G#YR7f8Cv=5Sr41{)#ot4%D+H0M}(})1Xlv5(c@S68T%PltU_5u~|2mnqN z-JJrjTm3PRIj9r?%zX)j;$8RwpmyaK6Go-ML9b&F;SLaSKg^Eeq$+?LU~1godbTeT zsJk_HK!Du`kcRY)=qvjt;P8u5ROKj5aLpQV}l!r0*21$AP1Qq7R~utF>ifc`Hm{ z^3Tc-MpjyPr%!=#YMqZ5)Z7gN2%-(t5Zk2hT_Ocrl{56 zcs(+^0Co2%9ftZAygZTs>@1pe%OI2q#q4b%`CDfZcwZ!xw1a@EA{`fKyS_ReDD2#T zbH>&D4or58>IH;B3OSp@A?gk|%JY$te)pVk<({&@(WzMo1-;-MJT^{E&+>V&3O02I z0E~un8qRgk8)arXpvQjZ3_dB$0eQ}W7nxKscnuHPkXXv+1MJP<)2_5abi+PIO|yhR z`E{RkS{qj_ZO8oz;bEkv-hPyD)xu)M!E%QHbfCgfp{4>-WCj4Am~ccqTyCc;a136E zcE+Qm?REV6c<7n~RpfCcI4R|EAE&{pimluL=d?w;)(+4Ef-i}mD+=C~vWYnY>?%*N zDqT0fQo7a={HgLvIk|bl%<;jBK@3uOxv3V@*vM~lW6--y_<9@839qKc6-NKOf{|-i zrc1kOgj#I|zSpQ#krZGCO{M;i&aMCHpS8=MYc}~@lx7|~k07Z_PC#hZs?*R#tUmIM zl$Llteocwc27%R9b{DgG66KUIaRyrfN#qXWE2>?FWGVE*C2M}f3Qsl|K)QJcr~!FA zMD1#tkXcX6ZHOp(hq`MNIV0}(9`hY&7KINKMOP)jbrMUcjG`*J^<-La8PHyBu{`Z| zu2$5WSvGD>v;f4a^B9|3KMp4eh9fXF)UlW7zE^2n!90D9VwohxtNZjGK1bUq1N0|8T+9-3-D!^0eJHbkesWo zYq7;Ud_1V3s0cZkei35Ib^^(|qP2vU$K@M~fQZ3#X^~W$pZgtZ19v@AlaMTMVk=+e z9jJ+UL&Ws?`|KnKpwwjQ79sR*^%7BKZ5tv#ObVZSVvUr{KW4K3g_E2#<{8a7)edO} z8*S<=4g_HB(mMd;sK)gm53os_P6u95eY+}Yf+gsK}lFbS3f+Zv*Q@55NdOLXPi(e%aKwuwqIa9j;yg|A8U`#GIuezy%%5Rv%cgcnb4F@DAp zup$=@!I49?hMFUrf1rP;w%z`4h!;Z>R=I;#(HlrXj1XU8_GlarJX@>PF*<0%u(q0% zDv5VIr|Qwzv`m`7N9HE>6uL5vdoMjN&MmBe4BHS0O{`1T zm&Ga;V(!!&m>|W+(i*4-;V8yz_wIQ&y$EjBzQ8lN#-eazOW&} z+ryX-%uy^P@g_q5>%7rQg2Xw zm_ZN5Hes#>XvV&X0BuRrL3PVdzCVB0Rd6`-*<>(?adN=ce+IKN=Xp(n^Cw&JH7YlDhd11 z`;5(N*$^s|?zX^jn7cnkJ#3Hp+eYyh0wvU<;xQE5aXOFlBaK;>0sohR6w}XORAYy4 z17B~Y|3Vv>gSiZ@O7q6nT~keZ7mrq^-rvWyBs&4J^!_%(O(5!)QS%6Q0nAR^0T4;W z_^zh7(qi;>CKV36DNVloAMU^tB} zSiM@p{#RAr6rvo&t$>dAL|0zpkuQbLl`dpAC5E{x7=OJwt-dM#0-+Ey$ip+O#@SpH zIJz$~#FPUE0_oEC@C_t)Dj#+Bc(_#rn(ODtU?;9V_j^dUO-R_xN`vOC4CnI!#AD{RD5AtYD7$`ROB4Elw)PH* zNS&bqQlj{y&tby`Q!NUxO(lg<{otEz6ZoL|y_ z$4L(~(Jr^PKD}&%~%Xo9wRFEG87AY&gqwsmd+v(%^`ixuEH5m$|(02rw*%g8IIViqcxfPnC^&F-Epr{6tO;?hyAe}eKYe8c3LMw*G zlHG?zHi58=>6%14P#DiqlO{EXC+=VG0GVT4-x^ev{$S`Oklp3uX;hLDGPpVuU~rfu z(VFeWS$3Ncor)l;&>p}ZDqSg~cGMSgUZlU~MLI!KVB6NspsX1Xz?K=Mv<-<#f;ZtX_!EiEh9 z?k+A^%dgWp3NkEfg_qj@x&ujfDAia;+JN18OT_2y?UZ1}88-#+HiE(@8|I8n@?Uqw zf`Ess?+4=+QC?zI^}+ zJ}nyn&wnKU9^?e_SMMKH!LF^C*Qmqpjmu_pl|m!X5(Rl018&cJ6hMi_I^hJ=9Q(gM zW%uy}gmw7@4@7~3j?wA|b`iTwOL9L76;!{)AlMr>c4hIMPIpo{VEjzB6}1-gM#mT# zQhHc=HG=}Q1dzAMJQ%u4iEoT#x2hekkzlkZX){5`fJX=UdU8z6Cuz?AqWjN!c{f7)1q;%a8i2H3HNxE%D>I0R|GhLSMf1U~;*ek1JQrdjUE?+5_XJKvT|X*F=Ih z1%h@E<}0!81B)r=dhafv=rgpP0AU1GXe&dLR1yO?i*+toYXoZs-RRmP#0imDEr0;D z2j4=b1t9qI)jonp39G!24fEQf(yFO3YG$(wKt=#!5>`T~9s^5zaHwrijnLWuZM?vU z!8FvlkIPXzIL{Ea3+5yzwa1nUDaMu6Z3CRPm?1f0oG&{c2}vz2M5B!Z^y;s!{u8y3HqkH^6`YW>-AFObCk}{y5-%C8EW$qVyuhDeI$;xD%P(Z-71@j zHq?FrPN|mnzeK+KSAPN%<@)9Y3T2=U(&%(N9*Xkf6d$uav>>-wG5kh1;lB>6vq$C6 z2~|JJ`yhfo0Fv$%XD*+o%0ZV|d4pbEPv8`L7vpZ*qoEa8A8S^+2r7j~A9fDMq6`!d zz9t7zttG6kFN4YpLd&2FdzFX+1RmGaC+y>_$<P=oJW4*Rb;O<_k=n#jZ=1 z1`8Lq!5sOtTv$p^6OS#Zyoma|V}Y6nxg2^^acb;a@Dtf#D}T==thN z{OB&|phDk443|Q#KEz4wMilFkg(YapEE&G_CGUiU$$RzkQhYVA6hJ+oM*6KOm^-9x zOw%O`69o`(c2gHZ^&CcUIvj- ze#S8Dg8!H6Mi8I26}-`-)xY`TG|ykjHudp@GFm!1ZYQW;MnHxPV^du|+{*D-Y7`eB zNui;M2oUB&e&5W?CCawF`PQFXXIfxQV4`;ItpXjj!6Ob`cVs2AUX=aL9WPJ!F8m$}{!Z z>U}yp0o@|c%UY2=h}yMwTOf(0f-azb2Ow7GY+Vb4ZhzX8sjY&PKKBg{=Cvwgwul>* zDOX#GK6RQkaLbwe;;{w2oTjz23s9+KpucrDn{;dT;2lvEZtu!U;MkkF)W{$no*TN{ zwRcwA^$m&Wo!^k~Qa>!_Pb9kTOY>Sm`^jHd-a@H^8`QKr`0bYBOD}QW4`cr0;c3uu zARN^`KItBzq1iyMoojSkas3d8+yPN3^ehcsR9tJN}_$0+z6 zn55)%^F$d5*g3e{P8!+ zFL%TV9iDL&hAlryynxMJMZPzR5yGz?<4ghd3$B*}wR!Ofx69nGV5<@2P7mB=F@Zi7 zW8W_3_{jQ*)5ti3_P`7MhMM9-^zQxjI)teJQLS9Hd?SNakbJ7OY+YXE;80>+?G=jR zrqC}1GL&|xWoXnJ6IxL015E0wpsKC1NN7Iay-<(68xda$6}L(ph4oZRFd`So-ryoc z){W5we(?BjpnIPP52MI%_1Psr6-2b|P2@|t%`1nf?$aUj{&A-Q?KcC$7&gE{|&HJmV!N`RvFattqXfJ?V-|sDnc0#G=<>STRmEBNe9fx5vXg1Iup8~ zKM*H;YzIYYv5r_L4i@P_(?0KaT8C>tTdCGtY)|?IAz*W(BO*`hP5Uw0yE7rxJH_FWhC(Yu&MjHPhpTg= z@Jm$@g2TY}98K*J9_}ZYK9kCLFWuM;wtlBty)Sl8_kp%aBAT+oU@ezjImCIr^E(U-$@ryfal#pcYwB7(V8v zZQ0UK`{Ss=OgRLpE@-e~beC3*Z(`SDt}b^a9v49E%)T4*QGO;qo+fGRnGCqAXOKy2 z-}}OYL?#Glf+ek~%6kxqv`vhRxW}T?r`!5?pvFJ1xdCgb15TN{P;Iyb^ea4cUHoX2 zmAJV7`WeaqeOB}%$|N(WT3Pnzh(n|^=wZMqfM6y{B5e(1AC3<>^X}{SmI}wd%J&lo z?TZCDYW49J9$O_r-)k>um~00v(o29`>APJEq}nP4iD*+Gu82G|*^763`h$Zq)M7rU zB9Qt_KT!VH>(0>0@JBnncFPciC~z=4A)`MF@EC03_YjaTBQY=)s&k^Dif~{6r0or& zRMl7JF!J$K4*h}fxg&Va8jR`Pz?%pZ8mxyrP(6GL>Nk;PP@F^lO*KZ~v;+39a~6Ls zA~A*{XOaj;=Qp5J0_nJ!#6Q}B6U#1uGr#^)g!??_JTyeWv+4;Z2)LkjoJ#y1?JVfR z#U)Qr)T}!K`W#2Fms<(RWr7fHS=q)}-3Z}g^ml5eK5wDnB|)k;Hh=HG{^w6bA__XK zB9o-%zy3~P;NQYJ0q`e%?a2LGwV8j-m<{V8Dfe}%Rp1%7ha|OH4}it}2eb*6fyVp^ z(<)Geiyf!Mzj-PQ96B)0~%X;990i)%9hL1(Djo(ZT4+8)*AV zKA{TqIA?A%NE-vBKWGo{EFZA^+r6tk&-`LnPjiUyBLGU?J3?CW20vHZ(UsE-7h}v~ zB8QD3UN`MIebCAP_?dD5Fr;g41XU8XrTMv&GrIgY;wh zqaX&(5(hZ?4M#ZCH>KMFN?%j8UTqUnXi|5w12lj+9KrfGt&ILH@YuI1Iaq3bx$@+A zu^MU-LpuADawSx($MN0>aMvvh4Vp7Mf`p~zV-F?U zgDVe7+B0%kSS7QQ>EHAl?P>d-owc%Y8T#5AX&2?)2aF-(kYQY{#U+ZL-<^KCm)ga2 zWe3%7sp0`T&wO}Quv_4#EjeiSmAdNEvoQb(e}pYMQ#wl)38Z&Dhd?bTzW|JJ-G&6$ zn@U9uO>1K4tyr@dLp{NZ_N^d;U9v9U(%b`vn$7(64sGwdxZWs?a zp=@KnVtEd6>HKrQk^|-EHv58sNk?>YdZgapLSU0mEWv zT|d|YAKvr1q_D-*$XAjZb>|L%_tN(;PJ4vB!=>Sv`Rrh@n#|`>3I1lc!fd(%mE!{_idxZGatf|f^KJ#Zh0GHdIv#!+Gs7fC>pgk6S^NXZNRFr+9&0dvjM?5^BSdOvGwu5R| zpQ;h`_WQm`sjt9(Rd{KYnh*Ex1<;joHr+a+b!fkv3O{ajtO!frf&xjiUC68Iq^*a~ zjoL&C&qhL>GJR_sLct^vPyj(3w*pBiLTd}Ca#s|}LMlc#smbBs={0Y_)@s}xq(Dn= zx<7o)K(9DO8OU}=zT%_~p<3WL-L`L%e;*~Af2TRlNyosFIe0Aw4aRRi?g-kPlltDX ziSBp8LZ+s#(~`~6l$AS`TPWkNxIl({=^UCR46rXF0&PkPArp8tx?Z0*UZsFO(t=A@UZ3IlQp##Pn8!hm`AJ-@D?fT zEo2s^{ah<~jknD>24cI|jo1^zY%)=$c*Wj1OJ(%Tw<|r%PY(hsTERA%u#t-t)@_>f zTJBzK_n2y(!b?0gu|*f&_e~)k{azgs{q;#A~RT?{UI5AkaHD z6B(n~7v=7a?`i{s9W)0KZFRi5@hvyar_Z27r2?c#HXCZ->zBzqG`GmJiGQDE@d zqK(G-%?UWX8F>U5d2W;9l|Nfh$&s%3?H02~fhIxQ)x#c*DlqNFHl%dl$?uLY-v*dA z(xR9=9bVfuge2GA#=FHo(aSmYjk(v{THE~oKuFlH z704)g)VS=zV0ehdi@R<>T6VVR_Eq38UI03pn~6ufdC`aS!Q-<;veJ_H7rS76&rNFZbPGX<+FZz)^h>@{@v$$|QJNWRe8z>-g@$Y6e4 zNiFd=J8*x5)Qlx<{qf17nRF8*$Tdpt&^0S!lrHkN(Hp4Da+Md4H=<7*nv6DD$$)Qt zW=HknFDp;Zg!4Tco+czYC`~1Gw+h0}{t~FI3e~RMoe0=V;6YpBLhE=g9kqcv&p>s( z>UO1Q2{M>Vwz4Kcy*MWzUDSzNt0Q!+3=}KCcv4jQUAjo!*NX1@cy!Zj$}!z{dtvxg znhAMZ6a8biA@A*yAy`#utmn}2Mhy4rKR1xziPHu}T&liwhF=c+I)RN~FeVCzwzD{5 zAnyjVc+?{uPd>nBu#1Wj|N6UZo!tcmuD+bb3WBrf70D`#wVqrK+l6}_IZk;EaZ)jj zGwxJ>{uBlh>XD8^7juHCmVM)H+TY$?0-?k}=r2G-(+AaF!3pp5573OAt`8cKdy%wj zm=CNA_+Z{rF8Dw~Nj(nd#QMN})F5fWQF~6*tVzEvKh^DX{V5dXO!m_}Kfg5Zt)6Oc zxp-sN?Q_Ho{l9nn8(w9Bh{U zNnC<`-Oki}m2FFYxjS6~nsZ*iRra#BD%f?BIsPGcd4WBcqMf!31|DHBVaiDsfP!X< zOz!V*n>bv`-I|yk)BObQL4RU=z5>PlR_D?f@8*1^bdCXphhcD?X$;lbEO~OcQMiCR zPqzbdL;?!E6z740e`CwwYiB_;G#qqVN`X4QudcYr}8Q^z&Zw%lvJ~ z1{e#<0~#Mru=^&jN9z2p{GhKZ0UOaUkcEoR2(yfwM$u5#3MO9PECZ#dqZTB^J7>s}`5;MOmafd-06%xS;@68Xl+n6(d}eAh)=vjJxpE$tIeZ!*oPeUll>p7%SJ0##s7E(qYsFU|k?(oO zZUl6b@DDD3OHX(Wh3A@PEB-@AqbElO+u}%Ww_i59rQl0pppm^+o1ZUGV&GariH4@m zFSQ^r4Ax@Qy;vm8T3am+&2;fgrMp3uk<55=j>_tV>mVsMA3hy@1l4j!8|SWAKHb=# zCDIq#_GEp_KrLhV9x+$=_+&6=^I+l|ud#TY7tyI1H12CS7161yGZdVyF6xbgP-Els zhR^q9^~C)ry#%$#41NNXY-2M>~e(%oK)I^(d<-g@m9g(DQc553IDx-djX(<*{PW+rxNp)et@;tY{^ z2inindVeB(9L6XI-LOtG@J({WG?pTs#bO}DH8ttPr+zgCvox-g zn5fKd(zgP`Ev?+!BR1|Ev8O)VQ3Nh9{^UX~hZwG0xYZV-XLuh49U6S&ux%uW%bZz^0$- zaG{Y?`O3S4wU#^wmGvbV6{abUX1Xsk-+?cd=aDST~c}h+bq{CpFQa z^xU)YI2g({tk=g{D)s>zE|{zw1>)PCll;UnVlrN9UAte`q7UV$J%Azic0}zB$r4Rk zoVn%nS#%e;``8o@S_3-RZr?Ze@CRKtT4)cy6YVKXT0VacZZo20L}nzLOi}9~q~{>U zS7j8w;N)%2+O~QCO8?=Qy&n4DkHG`$xugjIJ9-n|p*4sXeeu>uZ-?E|9b|knck@F- zLf%NmfP1GY+K+?Mlc<~-(3|CqIa!wsBg#lobF?;E_R2w-sbG$j_*=r)Fd(msZKy)p z=jHk3BXaZxim!eO*g}Kos$_YW|vU9;F_y`$GdvAA(r5PLho|!GQqZSXd1&I$7p2APx;ZEdmo&g^Q|s+Uhkk?PmyRcRo; zr2o#wJRQ=9{)-J)fBwhncxBr9hmfHk{f9RW3&rzGqOwlpZO*zIee^@k}|vY_#|c>(&2I~WLW*F!<|3X;j$JB|E|M@ z|3ilh|6PY0S$SysA3EIc;*bt^P`60n4;^mIs%4a1&nQ)ys_Vp+XerHJh`!3sp^;FUkrTBczHYdt6>)$1~jN2+E8&2Du$ETxj?bpw`GtpDj zX%*0gUF>6>6yD$9zNR35;$*6pbz`bYKj+T3t8J?1;=#nI<^2RPkNG;S2UcNP+mlZF zEtP-+rd5OGv8^l1r`O+$N?-6s{lkLmFj6v8oD1^=S1)fO39PY~(m1|TNDb9Ij z-mKoPg8OS0w_z3eIGI0L0B)F@g)QZu!%LVWQJdH3wyA@h+z#*k%pe?_j+k_o zPp#A(GY|>VvYD$k1mq<7D?}#d$Tog`X~6L?eclC4k{eoDmkrI*YQ5g1-@71s9b>cj z26aYFOM{#PSz(CG8F(TByW z-*Mq1edB=D@&imuHPMLY2P9u}@YqJ%>;1Amy&^p(Q(ZJm_(%`T>Q^czU?K#Z^~l5* zd1xj7+lU*91M{EGn13C1{%_FIUE*zft^G6^FhFd%dTcOF_2n-27~D!;I)ZCKud=yS~+<-EQ-lW=knV-6JtLfbMP$CpfjVVSm#5L68ZCiYtBi8%{ZW#7|`thJIkl=hRz z?`=B4Z^>8h{blGi6T8d8wF|9usYlUW5Zf=Xw_7cSu(98lFbB5A( zr~R$1P5dL~nv_qp337>mx%Xc}T$cu?Z--K6Nlua<7+9J>;z<3wnmcj{-s=UzRFO9+ z>3u}JD?z2{+rp=-dpb?WM$xsSvg@8F<~cJ@>B)>}BsK2?ddg1+{%?oal*M;4{s6H` zm{U_(ptlGUKk>ZZRuQ=KDR;zVq4lcMxe=u<_|NX5xu;qaO@)4k!`m<|*=-w>c;$HD ztqw^qJic7QaKlFXV{k-$DYjUQjBgQVuOBLG9kZ$wgTl~R($)3X9$;>v3CM6S{$FIc99mqlJnkA_m@2-|ZzVk$ zNGt#Vm=z6f?3b{JpT3Tb3Ogr!!JbK4>C(<7*!&dCa-95D;sJ0(zMl`59R4A))vdnqT5)gEKU;A*^vv&9U}uHT z;?{2(UAK|-kfrX#`b+R%GWs}1CUcE4v2X-(227oH{Qy3*o7XW zD0fPH=QdKOk|PRLZ&~2N?fxHJxFhW&Koj|g3HSYzPC|;a$He=nAe8*td}ZBAF09M6 zkGjMA4r=Y)Zptr%9B-QHqdu&Y_PUADhX8jHB~!d0u{wowj~W76ZoGFp-V}God4K#>qrsUQ%Pybi%FKkhQQyP%ix1uUl4h{M%i`I^TS;x-#tkW8QY(x)n{B>^2MM zM8+lECi|~aE;1BfI1cARZgq%(QDLpwP4w{)L^FRl&vbyrGV>#?19;JjFzg1p?%S z5MmLrXZw21Ax|3LN_;)|o`U(o^wPvtC7L3jQDf^58SW0KqPo_S(aie7MtyHA5Kr(* zF=7y^6!EsToC3pLb0Lw2E!Prcu1|`hG3(1<^1Uobs7664T~#!inuTsd{gn)J;bM$% zbR^2`;1zOGe|@@fB_zabv!4~mu^=?UI@Y+)nj*9-sifWqNK5Tdq}S-fCsH5dwVy91 zm-b1pOo*gA|H+jBgQnsF(!Gw#jCtY5lV(EJrr3!GvBZi0>A|IT;j37qN$zHP$%Fwg zCdDbo`7W@@d3}loREZm;Zd(cS8$WOE|HdRhav$64mhkf(!Di`=ILpUVq2X|$EoGgB zm#E?#4(S~eWCnm$npu+FL`=W-G7`r$t)>vW8^taZx>QxUVczm64mX(i6I z@>j??Q@~FM?j1hQ7jyHM4pc9uhZ)D-IslqUx?Yw;F1JhWleM0Fz>{$E=&Jjah>1fQ z%;6*3g1~mn{4ypoB4=qQb0ub#WW#UME*q(luhO-G=oL!Mrcj^0{w*K@_kH$Ar_Afh zLfoyCz-V6dl?Od7Au1#J53$&dwGTqRm{2kuEld3|ED7Fu!9NVRFvs5wxXUaImY=s? z80+1=AD&CNYWn`QY~zR3D^%vY$*b!-H!0P63uIG%B?VU6G6#8oMwx^a-J0_zX?&-j z;%#>B9`N82dOu~q=HBxC8Z)8tmm)k8vU7cpoU*R*8w<>iXwHmB$1x%mWJ~%tV4U-Vfs%a0Wn*$`s>G^F0Dk49mZTEikKcSYPdjATWzpc)V|R}@ z2$o)_;8Ef@87hx{heT+m=LCM}A0wdtOhqqLErb0-hD#snr7!Td4C|VbW6omIY}J?B<<;?f-HJ97?aPd9RTQgbL2PR!bar**P9A=gGjqAGb8zM|*FfD*LS{DKCGrpy ztc*W=9lF8k^7R~P;8z}0XvOkKey~F9@r?inbd|P2-=Qy~s>0Jf8vc)O$qqxcj0$Q- z#VYmfA5!CfNE`ED@R5}Ej8?gr!>h5u>c0cQe*j*bPt`(TsjWLH7@!(9?V_!?K#}j- z&v1xufI+@hln;)$OsC&_ZW-ZzAy6nfo#mo^E_Rkl$~BOj41beR(SX@K45ZFEt#Nc{ zE15k`|7N~lismrbEy*MDXwfueq{i2InEcaDecfpK0tzOi+$M~oaROGP;OcQzMyq2? z(NFbYKUt@=rXDZamryI>G2%tYMp8DtK5JCl+?CRk2QKD&k1&=86U)B=nciwI&eRfp@2hrGE0q;zKeX`ayD3W?QocLFy_SZf4srV^c;{He z&MCqmx(ya<>;}_OoV+%HL1?-hF?*nTz3ZsL;SY=HdhNhX1-v^}N*~^dvXX_!KN#3T zDV}ue&weX{6{@ZD1NJzFPkv^WpoOYVeJLZ*7OlF(SEMyD9jckRe%f{BCywK>CEoi$ zMtey9;4(|suN25$dHq61+F|BYlI_SRE$xxn;M<{va2m8=w0iYrS`8$~H~~Yi@ylKz zQDvB1SpF4m4PhB7#yLBFU7Vse2n8(7vzm-vh36?siCo0^1Hd>uKECu1WM;2K7aB%z z^5K<&LC2ANW4$0eegqw}Fd3%WYHB{s!)SG%+dcsmpyzAldU4@Vq*=}GVRq*!W)D5{ zMAH1bJ9AwY>2S2~E=L$-1o*Z;lZ%CW1f0R!BI&D6@z!NCx%fIMO$hCG6#z6Jj=%$g zhVBP3yEmRH@`QE;m43_?eTuMfe9k-gSYf4zCy_tVypul1*OIEng3@!#QK8&+dt4^-Jx@~!|C2EK7NM)!8EKID)9T&wL7tdN^o$QAXJ+?N`uthcum&g=`Wr~ z+zwE`Tl+P$L>{9#(zLTCPG~bA0?2Qr0l}c`ow-T5tryu;JfR=2ES;a9BSTGI9&^ey z{v^&GgVvW`7xISts3!)Q6+=D!h1&1bb1-ui%;r!UukAjNpV#kc*GNx3|EOG)&1lt` zY0g*vrDBx+AM#tPf5>myJ-*)WOOV{n7rxQ*hx~To-{rUY{n=f84pPd$%WpIOMSe>S zV=Vjy$!}LbOw!__b2-Ot&}4q}|FGX8l+cMhG?jbnN}U%eNG~Vk+-D)t7ZL-BB5j6NGI(y?0`KLK1S%(^s^>v+k|^2vtktUqN0^(s&wt#>z6VBVTf2OA zfc@4vwQC!tZKguj)Z??RGaUvl62PW^DKtGsQkQ>ig~^N6km;^{zI^!tInfp8*>21^ z%yXA4?chK=u^d+FSau2idMC9$Y8+9umVp||8_h4KKePjA*z=;?Xfj#TY_XzC2Dodn zJO1mlTe`N~L;c+r-L#2AF%)ph`QawSeLZkDsw(@dvtBLD)5rqe*Gl>WgD!>F;}%>) z?ug#2y@MQtyN&r>rA*fv*eYe2PNncN|E2{jco-eyJFt!K{v@~Zspj7ESqvTOr{`GE zne$0Kbma;gCoLqs6$V*8!c_-TNcTzr$r~TPMsy%Nqe>$h!vQAh09xcz<82!!$Jbg;P4aEL}%wH0#1*cVRYYv%;rleqxHnfLvFs zEK4v_|4NcacLGU()LUX+`k!l}oE~}!L^!RbjBh5o^ShZ-eyq-qBqM+3nT7g`FEN-r zk3P)oBLJl$XQU+n;_js`+eo~5Ah{R9aYL&9)6SRW^enr33u*JSbq+cUkpC7?1RLy9 za!mJZ+aUiA_2H@HdV$+~XjMjQ!UAv{`-#f)R^PkweBS6>2(k@U_N6A<0s6blvLex0 zxU<@`T9^DHQdRGu3;q)tf+Qq6#_w%HmD@+=$Wjqnn|B|piQ=!Jz8M)xZ<{W38w9hP z;*6fzKO7N^Jig-@9P^$h#%vg;O3mNo90w0}zrE(4?TkduMSRdITCD33B7mLsy-(RO zqaY1Nhb}Rm4ksVosoG-hxSPU$fd^mhTwYS;Biq}zENQ(Gc}U3<8cFa5ia*_@XW{w( zT5oeyul>V%d-1=mw?n7a+pP8;eB$+pgl+fq;46lnx2@0`gCtX#O7maNYOaTK#0nbQ z)(HM|Vm2z7=eGQ6pm9d8Go<3OHB*DjQR=KW_Yyj$-kR}TlsUtRYSoz6GUcb{HhX6>FkNt)BH(ks zNOFYtmn3zLy8+j)J-%+W%L(traE$9cx)Pi8*k-y6Hkb3|5q{dMI8cQV!k9_6DY#XW zOerG=qX6iCkbx90TG%@gqIY|-#b(@4GM{82-(-p{x%HEm%V_JGjr2z$YJ2#%7N@-B zIJ!@Tq>%E~C9rHV0{~ZFe+qxt^cm^&`?~IYv6++aB>G}8bt-VRsYA@|TLbz9U>YJi zGu-wCx#zcW+K<2a7I^DY7Rd)V=$T+i8ymk&am=ev;VyJ$@)upw&!LZ^uZ!heiTOCD z_cQNXS58^=Cq8athrrBdJE|PTrV>fn`vF#ZY)dTq(qt>qS9VvCTc7HU5B8Zo&tq2m zWhRjTQxSi@oeZ0_*FzX~Bk~AnP-Ow|n3F zJ_UI~$(YSirkt(!;3}W9ThOb!uG8#{D#alJg{TOo;3pj|vsV}3qISCKDfp1h!K*&?Ub@h_Ul4eOv)4W@mAHb=8PpQYQKW*O;w_aD zW>ZfqDezC0x0X|waXQh5aYNxx<v>fT(IIp7@tYTOk{*p0%I) zo2iG}Of_6T>&PhH$l7hnU9=k)70Of~W>M$9Jav|ujJMO^d_E5;(HDYRVl}ac114H- zuQ=z-7w3PLAZ%_VGF3k$E*O8O)R;cRml@mrO+kdLwvU!=vY&hKt!&vP{ItgvOv%hd zO<;ha%Pw}<1jaEXb5|pxfQdX+hkWZX04XE4RUQ|3a=XRtdrQbTspZclKRf5UN~PXA zw_mfedTq`8gwIB)Q?=IB-`&@rKI@z*?xtM6XLXtzvfwfhgahccsK3O|dG1W2hx)Lm zCa!gdkIeYXFIHN*hw^>W3qYG)cjZfbNQEf6nIts5ndTPAH+NW@gHCOJn#a**+*f)* z*Nuh(pOCk6<5dR+4~X*Q_mU&zWDn^$!X(0udmZ${9g2mbyWodH;cdnrx|&vz5`{*F z9mC1xbK7F*dfJ}aEpmD-PmLi3^XUE+jXnSPbuERBJN0kk(5V zaahFhMovTz%_APV-H*PPkk}J&1BZI**T|dw1@}ttZ&?ae9k+%ZrBi=T?5! zZ-XM74KX6Q?ec{_*Y=G%R7Dq!Ar-eXNX6}GP3YuPq}_f{wo}Jxbi%5r=G!W$Qum*| zEM?WiYTbPb_V~hAIw=Etn}&T}H7i6YCO4O(5sI50002cP<|yR{biz zHQq0-=gPT5L>>Rs!sl$A*g|A^BhQ z17zsYy4m?(+846i35t9`Fbb4(LW(=b;j6D4Nf^m??zW5eHI zz>dSxwv;VoK_Yq0KpfQ&C}C?aXID<&T@y=m%}5gsv?tA0lqjecRg+r*v!!nLG%1zK z9gH(Z4wRvgUyzO*|8ojt`n{biLKzH_8ByC_ za*$y+1&#B8!-(?FOkQysj4Q|z1<_p>04Y_6$NR{m& z2j=HW1!TCcY+i^`KF+;jRrbisW)i4uf3xjp?|;aT24MR_EqVj3KUKEiCgDG;Y=IVH z!+3J>gx2W)kIL5C;7^q;0A#e<7=&6pzu?s|sj6JTxjg)$vTQ&c+~G9dZldG7owysC zsgQ#-RWl$Q^2sV5T?OdUjy;*CdGzU1*OEz5cU}Qe{5Pf(xh&o5ufoArq z26hzEO0-f?Y(v_@lUKg&UM51jP`%-*EE;zKaZhYURG4jCXDr3pi_g(rwh^9AJM@o8 z73b?cvv}|$&|=^V4BTV@Osn=?&3nHL<^_a?l!4H8aZH01&RxazTWGtBvAO)SS~--7 z-(0=fUtl}CmMF|U3TB~^Zt5BMJuP|)TwqxIeSzWXWtw`(P|4Ga=;C+z+EdSpau<5e z1^i&i-|Kq-jPwD~Var$I%_Uy^<_$FB5+W6PaTk1llak|hi=cUF_lH2Y#a0{T7~-e) zqu|${I$M^%bhc5jGCQJ!ZCcN);M;yY^S(|0(AffSPT2pfv(A&b~qkzs< z;p9DXvI3p0Tg@&KVdjAGl7inTNmuXLILtQa)3Iu<>p$ykK}Xvmc9A+02`^U@QfIsI zTW6bbufj4r@Qwdxe23Ke4&SHEb$+1~;_W&(sf;9|;Hv(sq}C{|ns&C4z0LhPGP*9? z(X-ZV{y{~y&m4%O(jX9lgEB*xavNLZzO>HAjqeAw52r!DHn*Hw-R_ucT4qdrh>I+c zQ`bLQt0-71X$d@OPLu&sP1TJgSxwL)DNdUUWVm78PRSMe_YlW~BcurGFb=P0Nug!p z0iGN%V?YUuSVOF zl_3F7nZ@#-t?r+%hdR*TRs&r+v@{FD>Fwm#F_)m1a?R;$KY{H|U?!4)2FvPJx+`$n z8tE$ua{U3UEfvR$j8>V`oAjCtxN7V!KM~PhQ0LSPlo7LX_IsFzk9ZK3Cv4$@1+VC{ z8XH(AZOU;uVf8^VYdN#jQ3dISU#4T z(x)c;N^9_R{cUZ-UryT<_vn|J8hsvG{+V;%@2h|UU8d}ZF9Zd{a^3@z1)_S-vX+Ch zyC!P`eCRI?zdm3p@K8?QoB`zjy(8fOYvg?i37els=4zyI?hF2~Fd5PkpobZ)&+V39 z1!~NVj}%g&6_H<0h-Cr_^rZlQtFq?X#8n zw?5nE8}UAxLy334{P)BXTNh@AUjQ->xHeR%vn=oi!T`HT8(Y5O=l4oB{AgqQOoa-7u5vYdBXue_E@iW8G{mZxS!PrlDSHgE!&t@L#T0Om zrALr?R{oL1CwQmBkGx~R-?t&S{O&xgZ{~G zwSjhisGOwWS}zdMg^IY9biT+e+^#pFQt0Z?Ph%deC%IUJx)cM^oLxvG2f{)e?bXan z8at`PO}Dms{5_+JK0)Q4pv4&bKYg}?WWRm3I@$T?bTivbOIbbXKZ#e`cCtNX1_!0Y z^ELt8lA#2>-XdnFTvprO- zFgIXoNYB_Lnnl!zu_5{9En`_tO3`Toc2^_MBz~AiIOyGs?zzoa7>TiB;mMpf6zvho z75a)EJTgdJ`BP>q^Oww)?%kK4BPbM2@9)WDA)yZS*W%EMy!m(ny6**@;|JifTkjB) zz-Ram%0zoxl$xao-!vA0ddRP|Nj7u?(8Fxxx?2{yi!zo^O&AL#@P`}W{k5qeBz62N zL)~py_VdN-?w6fs?1Pt&l`71?hJkJ@&e+!roIthQ?)P}VFc$5!Suk@LW-&dkjUoA8 zLR%h{=FtRDC{Ql5)T+j$(s+S%)xQXBL6;8@+G-)YeEtWat<}F3 z+QN`RTW|^GGi2nq&=&aN$&?cQ6x!AUmgqkFA6Sd0559&5Ad5{yekKWjom)Zu`=iH{ za?=3nwjMDDH0ICA>dZSJ8favFp17y55yJHG#QaQW39mNH&Ub&q$%!=iOR)vA+@z%z zPF1{XG4$I%4Vr)!NdbGuRZ!9$Z%YT841l4;$+^s4NxM+I(M-`3P8Zch=rjd$KScOn z9u3Bv6foU?YWpHwG^KgtGsNWRfPO)&WazuN_odv8_Z#_#c(csi~;Omy#f(80j{Lc z&G2kD8a5Dkbh9=Its=%DkJdk?Q(amI{6D@~NV5b}?LYfwqdy>t?E{-W!ei~~^pW}2 zxr*~o+G<25!^LHDg+@9ApE0em`YGaZdMwlUGLWb@L>qfB^nK!BA0$b4a=|Fk5hvsA zlwkBSfKa}6F{k{)e3>0;;pbP>LVy0ARL6GCbt;n$QJ-q@rTjHD%Tw=>CJg@k%0?AOWGt)KS91r4$) zbOxVL7A@(928CS#&yp1MgGA+VR7k%KLobzARsD`yQ30HwEd(U4{t7 z@Mst+@toef7diYW>s{yrf~YLT74aWA>q#to%Uy)ekJg-)qWmIs+R{+oxqVW7?JH(l zU&Nd7HpCet4;a)16y=K94HT}hJ3(JKnQ`(BrfcHi+E+dDPAU{$LT_meHjKQNziUs} zRGQ~v*z1j0w3=R+hzG{@TU8)UT5?R-cmIVElp8$A0#%Wi_BPmZ3Yed^ULxS780S^G zY9L*WSmVhD7l+tcH!e44k(H<#EcW4tD_cM_`+8rKC&JUC9W_z6Gp6F@;s<=R55I6o zI)Ym2{TA|QtsZ1Ox{LPp(Mv+C3XC5Y!?4Yxr?Ao$r|n96LG7wiGAOueci={KLx9bJ z36pr6cL^MKjv5J}{Wwt2N=T&B{G;;sO7N0jaoHpfeGCN6htj>AJzEs(2I(Q;Hb@sD zIP}iH-74O9g;MP(Pc;1y zs0-tcZl&x(}=4HCgW0Rx^111Fkg&t zV71rzos|X|sz4*zGv))%O*^~SKEE`Uv@^o&D8=crH#OhSWFSUk5J8E5DyA8l0xocO zEx``k94Np%%GQitzxq0yauAhc5W2DUHDs`8U)#Ha(N!&Y%$RU1eG}Z2E=qPJr7C$c z;r(+HTwkoFB$p#naw{S#_#@Y81rUCS5#wd!ZWsX(A4hkiEs$#2Qlkqzl?_GS&|5)AEWBA-crGRNxex z8^QCyfHQ!Hpz(#gJ~lJNIh=px3(JGUxGnJjcAZbNF3)B$f&%FXLa`_?9%GA4L_V1{ z!X^QnA>5W1dARH46ykF)i2C*4*vv~`TM?rwVlkuWm$nQ9dVJ4mQje=|c_eV<#Hz=Z z@S3ot8y{UfxPyCImuiQn1C2+?(W^qTB+PAJZr!3aFaaD$)T&s8Xoci1%I%&nx4d{2 z1>uu7X&z8xU6W3oT!F42D*$=ki)_k_i{{4zWMhA0igfOL~RY zT83{w%?v%WkW%p7aI78+AoNuk@6&tgLYzfIGBw{t%d-_uk$XJL2{3?c=Eqv6!Iv9- z%nS(5%k(K>OU`6x%a+VA{suGXT|T=+EbGh`THfjp3J@R_u;b8?8LV(ktv4m#G_EC7L=y{nC1M z0}2P##!6QmV}#M4rU$FJK5%gRQ__fMQO3A+4av}a&P=4|a_Q=1dn+EF5h87%{CeOu z0rlq~oKNpqaBHh+YW#*TpmRqjQ$^*o4JUOrt4Sw49-{|I7DWvw?{8ES z?G)@Ph=!Q4;3JhdqPr?kscFpbT2?u5s|*8B4C?yGs25Cd z#bWo*LOJkcNQ5dGb4Ogtkkgmdgv0*5qv54S8dh?Zh&BhJ7`4&c%Bnh5d&1>?Sq9ei zPhxcv6Yl3l1g-M7v&#|Ehufv*M+k7u4QtE0%0o>M#rt@59C~S&NV|$gqH?`ma%FGb zqN0w*#PA!(tu@x`4DHo}ZLP&P-2Jcck_+yzU}6dC&tvy>Hn6z*rFEu`XIC^s4hA#5 z;kz;r`VNP06OUG|t#-7O$*orURh_Li>(@TroEk7MzhYH(3^QdR(t0#l9gt?36r=0uPMy8=5Y;{r(OM4b2&_G+ZT}8;TJ5*G zs4iZ)mf0?!ns!(}OAljo^THPWfYYM>i_bJGD0oyg6LYc+ zguBI7I@{m_?P-_2-POM;IsZCb>3W^iKC<$@-s7uIlWRZMBG=?0u2pJq+nK5Toc=;$ z4`Q;hytKkftExe>n#Ip2!DfBt^EQ8*g0zIz6_#5>rb)-j7(9V7%%22RFKx0kMoVj^ zfDxJ?xIU|M@~Ujez48adw)t7_Ku_bK^3Tv#DX$^ETjCS4vTk)&_Ma%!GHc zy+7*k9fwGq*VAHwpwzqTUfL)5ZUJB6#Hro5l^w1F!dlv~WsgzeW}LO~m^s>h9EAP* zGU$m{S?{?1nU!0=)1V|>Sgr|klSN;OkCx!u)Wj-J9)oqR-n!m|a@=Av6eiVmv9}IguA^e#s$JQv6Fj zg-xE<&RGO6L^I_h{CIbwnZU}VHz9_^pWTAvb_`)2y)dcTG_CUBPO-nwN4aj299ALO&klkt56vZuD1}D%^BzXls5N!e}Fy z(EI|;c8F`pi*(pmSHg}U%$05&{rnxXf zF>jdbv8_tSeSV!NyHl5&Agb^+^kSM9LnM=Yp~o?SLfxcHh3aS|~GE%JQF9fi6hP)axzg+y;T(yPRAM6s0 zt-8!z{tnMGeTOAbAfR(m=H$bLei2{K*LiCizq%M$EWUt1obP6Jj%=kShKGo+kMWbu z;Z8Z>#tj`GDL^(3DVgw(;^$mQ#-zcoX14+9Ua_zncWf$VllPSHfa0)f42jRu%re#* zJB1M^)?eVq2zm`f$8ulmP-BE%Eo~80p1{Wv_N|L0Z!Gn!`l5IxctzLYThmQjp3)QC z`cCp(-m={|C;SObnD%aB1W>tHNG-Fw#(*K&SX((!Op%Pc+9|E8S~a}icySfi%|~;@ zi2_4e(`tpvYP%eapSzB~oKEtYLkt`;+`%yO=d@3pb;#0BtvA{@N_tNA-V5MTvcHti z;TC!Kagc2dn{mpNjIf+n!>=5YPAcRgya{^gCPn6gA_HdxrV(XKeHH^=Lf+Pl7?u~8 z{LJr%<`Pn~mq!TYR}zkKYm4h#Xz^Ezwmn1qtvDhqF8Z1W!>gPo3;erDv^F&{-fd2M z6-2~}eG+5wiY>Gd#A|ullB25+c(P`{n(RI}5V|Y1N~nla7ET|kiq&RqePUsSQn>dfty$zy@9=bugG9Zb+EW;9 z%Oz_BM&%A9p_-yI7Q9;dg{sQxxu5h}o;JK<7d~R=$xKy1Sr*c*V$9EgiMvx! zv?i8W=_=h1mr}FJGJRrnRNNC@4U<53E4`~bT!VMN@MPxBnkI>R;A^VGN4 zZhz!6shZ zC^@^$t6E9D%#BWM%VITGvP%}vFBv*0#%ohCHf{*v`D(!?cSzy$?y~fmCq-dBqmAqG zNM!+!@s-?cSYT``Cy(pc=Q4(=e5|36m3kq1>xL()sj*&<@I)_}amckiF*X7ndo}ng zRFX05acHNwi32M=k8sUWnEQ$+-Dj@(;Y;x*kACjcZ4%pgc4o+LOop#p6<40#uHI9< z7z;^0EjZ!oU$@cNfO)aAlq@ox>npv?;Cmzu)vDwaeogOSr==u1*Sh}Q3g}{l-VVpp z?VEG5z)6Qfx^g#F^&fR7mJO>R%9I4F98}=2PGed838o&SIGtF=>ru56ajeD3W4l=R z`4izwsj2v`;)oadd}rmS`%EGjA75FWSU@i{X)DUAEASGBH5#~*r5PIc2no1X+FoW? zCrzr6u1VVor;#lUo)5aHx0&_DW@{Y^_DLk$eq>Gl04LR2=}(1r$vPas;<#L;1@L0D zIu07UcpGKTP5g@lSv1|O;ItUS++VSjEw{1bbSRf8b80kF{&=;wo}LF$)zUVi)Yn1m z;c(!;(W*3RD13WSt5v`3bfCrsjeR_PX-cA%xlXyG`9{(KXValo4pG?J@62q$qP`SO z?p{W`s&w(ZYSK!en7){2t6{4$Ha5%p#&V&1Wo?DdX3BVb?zl0l7tChq=~ZPxkNI)=eruPv9Y>cUdVEo9XX>qTXABt-|7IoZ3Vhc05X52|@~n3($4;pXm@ zwtM%#j6#O6gnCP?j(WadZY0Y>rU;_yS4Z(B>PG}6L*z2z%j|1SV2+if6`8+GSELJm zt&v)vPg6Fmme;5!?Z7-64!Fmhw{7-9kASobD=vmzIPSzl^jJ8`U7ziF>= z-((E7Ep3n^GPxlt*ZRCkM<~%O;eA$#%BErb(7u_L@M`Gl>;}5+<3Dbpyh;2ktuMgZ9wfD$vssuHjYxqH_konflP*Ob_QNy=st26`7Gq;L2mg0wk)q z1L@m-i0gEJBp=}XQkg$ZU>A$#Rl=-aCGwK{OoiFdOLw9z*hS!D3M^~b+W0NU5|)+55+>^jMgp9xzN3;TZ``eC>paVHNxo_BLzcQI zEN7kc*BxtACXuCxYSl~WsGrXHttHMY(Br_yiKa*=K(_zn-i(_fPdq$l%Xb!YfJK>< zx)gU#?x<)r>b<;E@6VERT#m#f9I;;q*vtR46D|?Dh#Tt@ed`QRPuiUafH<;~{Vx7< zV#$E%j??+-^t(#F@wP9sOPb~E1Ly|R=#X7sHkL~IH(P1fG|)A2t~X}dZopusqrO^6 zRYLgUs$fMCQuwrMhkUx4drZ4re{Ey7mUyTjhp$$|so^wmJzaFpFiPg(W>JVp-?`Tf z+X&LQBLu`6Al8y0v^3ynY20=yaqv2u`q|D!5INLP zTt&9YMgH?tez{THGyL4~#{Sv#dsDLE);#9iEk>3je)E%rmJKic3T-vlu}hmaJhdKJ zp1W)>_9aKlHju@3hp%o0CH5LQ_0xJSisnu`O}WR7X+rqhH`JqQyyIL81iiO8tlF~N zhMn>+FogpLg{J^1YSxM4D?W7Sc$W5+zUs+DC^U~UvE&qsH>qlAjuySB1s)&4)zA-r z7ctggJtHD}Mw(r7!#3@pK^i{dEOfrASF1X8*smjWe{1oqUA;1_>MgG@g%?I731SFE=5X>kUfF zDC8%nzc06tD~cZ#6N!Z>Q8S{8)sETTCSX zv3f$sAqe4=3Dx~wQ^xzg5FMiX_*p!;;hB8AQN(!P8tvygxe zN50!NWZk+iiT{-XUvXu6E-DAR*aJ)O+I?DDXf-1pLb#H;B4UrFM@SF*C4b(n_9ms z`xNC($hQcuL#kCZ`p%4#QM+Vl5$#1UCd#v0w~>Q&h4L8nm~-Lny*uanlCfk<5TJ`2 zJ2%gJc_>{-;2EO}wfylhiYa0tA2-tfr3MO;<}qEKa#YBa)~Fft_B$&-Y4{lp)pT*) zc0w|FEU9PZT3n4IlDFbp`C{4Pr}VAjO1-=Ww==%gBnMwUaaCIoE4&ZO6Yp~4CuNzd zI9bTYvR9JTH6t6UTSL@7M(t887m?lTrVQ-b?~WF1dMD}HX6Kwo^=+KPOC=nbaxeVR z2~al!kW>zdX^YKSG1WU)INvDt0e(b8T@>_My{D)dtu>H*C(UzPxJkWKSByThu;&1K zqBVC1jHD>LdG3X_eUWp&V@B)v(phLY~-@uhq z;qpn|Sn~W1@&|d8)Y(v=B&4-Wj@(_7vWCOav9k7y|8Z9gA` z_HAmc+YglK($E^2n7f2m`TOlSuvre~Ih0DW+<%<{_?eEG2JEpe#`TZOrk+;-cs{ex zKK^;QH>5GGMGVBlVC#wHxnt$$17JjjiM%!tb!f(}^sQTrU6QZGlre{^&G@xFD>!~h z7ixJ;ywo*&_6J+3D|7dL1%$18{o15Glt^#g2JKEIN?1~1c=tIU=I@vM{gbB&-jPWW z-2s4fS@L{dCmbi_vM%!h*ZK~wDI>=4{dFYNvF_@4iu2)=RaJ77dVfltA>A7@I*5w? z@vn*P`43oRS0@DCXLgTKG#gFOr%b_(!)`Bwn1a?%sfyI4bYx2J2Y!7w6Wf!gWhvf#l+ zjv?|357YfzIy(SX#RVjI={#7NOh480_^nn&5I zuZ>p#;Pk{V*sYK16{eiqVALC7865RmG++$mUieT099n`J7_EQ(PS4oB+-m;zJ#-g* zbPQ}C&^pAc{$Ln}$st`5r>Ci^j*kK=JqU`f^&42mvczx2BS3gIS#J#IEf^aCBJNe{ z{o}tTqcqWN=JcFB1=Z3XVp|myq(~r%?2b9+ZvCn-YtL;%F~a-cO$dAdr1~4u>#)%F zQ)_lVcA;w;rL(uFYIfr=Lw*WqF1ptdIq{q(wK6;ttul!`R(GQRV0!tFA@K>8)TI>h zPWh7g;|Mt35cn+GgfA=s!lyDWS_I6}=7we|#-b;xObH^M0`e4pE)%~;^QXiuAn3rs z{HM|VPyeK40Z#>P)pjTUKNg;U{67EpE2Kq$0T!Wt`qvNo1lMH^7*n(2AjCrT0AS|~ zviN9?ax`}^&Eu)#_g{%zn_ z$6YL;$9Is9N=I;I3CIFhn~GyTLk5w65{jf|#IsvIdla^S zwO0%Bo|&wBNJJHqP3_sS`LuLXTQ3#}Hh0zNJV=^0pwW%fz{89H;msgzEjyZfCq-cO zM7hcRZBP`dbKj3jO-kHGVptD(3zt8C+XHATo38>kg7Bp~hE~Dis%lOQi!vsG7 zFVgJ&PqF()q~wnC0I}qFpUfq#ZY?7u>2=`BZ(%lV&OG$2gRG=9y+omEvdy5=75Y{Kf z=0r#38UkCd`O{S(-9$%f^Za*ZUI#q5d;e>y;Ct)RE_xpY671Fa=yT6Y;BBx6I7Vl_ zGT{5H$q_lqyGr4My?#6n&IL0_TU;ZI3mf$sxLnayYe1LT{Wn^K^kNCET^p;4W0h?3 zl#Kx2H=Yb^eP)UiJ5`NGMcV+z<_PvcFXuu_2@fP@PnUTY@FXqPIhp|ju5QNn9{Nj1 z9rU|Xqe@yJQ(cx7y7$@MTIgaGxm+O4DYutNfN1=e!H3{)sxKir5jaen#0jF_*RMNy zT>qmP#O4Sl5WEn_&xiRo%kA%#p}Dcb(cjhptBiaI2WfiW1~fI@w+Db6n`d=p`HoXs zKN&m7-&4lV2iTdtAGa{P+VNNw{E()iDz>buWmWldKzH}&{v4L~>8q34u?Ch_SR1}p zweazH3xGTu>($KA2`!*me|W@W8sB{5+hKOKV|tT+_q8Z#;32_k1T0V!MCeT9ZOt$EvAv4 z(#SmIycpilsp5aX&6#=@n#gUT8gvY1Po8@lBV@n{N5g{_!(-bab(4B2ie_|uC@1g$ z$u6Bi23gGgOg>A1n3v$+I~9BY5f2h1ZhoPm8C}_wr<7A`B7`7vohWp~2=R$wtK9bk zyQB!3Q|dv|s^5c^o_GY3pvw~x8u=eB7_o{V>8_=EX$t-|HhXRB=R;*1+J)uk^VK)z zt@MKmJX9&$deec7(##z|O;-G@c#?xeBlFtK);}(L{S(}tY z9e~k4N4Wv(C0;Cs4C1qQ7OS+f8(I@stNeF?2_njUC~(i(mNj6YvU&xdnF z@Qzc@dQ1e+eDvu)ePW8q`mIOBF}zx$6GgjI1o}IAnSqfyz3}-C(kuk*SPfk2k0p|j zE>OFXs=|yPp7VFf*R-tnVoE%++exoIqy+MrgXuWAoob{n}8#wTA9yhlBx&cyNMoGssb^?+?zR$6aW!?;-6{Gx7@NomwL^|&I(Kg(mQ8eVHXa$Re`feRv+FXAW3 zJ02ZBCR3&X?wKPaIq#7t&=~YxW(zhzSPx6oAg)Ghf{vscnkwGNh97~-hT3h{es@SL zd7D~AbXngzaoj~KoyaHVHtkr*bBy6H@>LO0D&j{`c}k*J`DzW%g{Mz0kaoUm*^#Cj zpboXyY;*-wPc+1b;AbQ6pMs_&Pd%r!-ti1@1H!EUX@*!u=J-+jEDFosm>PgbX!8S` zikBimRXoxfxDCE^+{Je+uXz>#X=KC93qyv4&NePBB!g+m*TqvElJ%o-?^sil2rvu@5AKN_Sv#Fijy1ww5YW&waRqnp{B^{+3_YV=}?39^h< z>)C$#8crdx*{lA#@u^(>&VA^2%h~qA3Gj|DalC65@Ww!>IegGu$M+V#;~%KO$C}-9 zj1V%NL&q&=)7j4b1u>QXQapI<`>~O_p){TAUBXS{gG--`b9m|EN}IB$SOZ~^>ZHU| zyVuaC0`vFMgjwyMhjyb|yFsYqNmRGSS?e?jN4p|PTztt7@%dgQ!Z}u~uI$VGvn*SL z!-vr$Ij+4J7EYD7y*!GRm-$7I)~Mz7fx|Qc1!LE+p&wIYP6|u-tdQx0*c&Ot{>zMv zdH)WC$UKt1p56o~EDX|9AtdNmZFKtEG8cTT;eCT)@hW+AtQ(uI78W%NCjBYNq$kba z_@*Yj63fItnf53^@mB?BMc2|kxIyYuvP=Z+68QnM8@<~WfG#y6WvgT^s#(rWMl5>h zZEiqUOK-B=(iyWrHos(w1!p#+h|MEM-RmokSy#yu(gWW?CKgt%UxNMr$({vvGJ3i{ z+Fsm@)`x{KqiPDls*hoD4Lvi!eYo9=~0VX_N{bGXXQ*rW|lP ztBzNB!@BUQut?*$i;?G~dcOnMOPbBk=A6d8GTkLz8G0Lr{u@Y_9$z618PNEQ>CVWH z;;ZbK3)(ksqD`?}|InNQVAiuq1whW->0&Y-#Y{qR0dndOP6Bl-hG$avQ-B^5Deh18bV7XcahH3ODk(-%3EN~xI~|!Fmksm!&L6b7f~yuNjPa95 zp}K_j7>v+#6vJsY>HLj`VLjljwovF;m`~Mu)$6jAj&bKXpJlOn{_uS?-6gg8yrA%N zTi#|t=Z}qo*npV2y>^mUULP+f_gK#PSo?mf8pPu@G7s*+bZa;SKmD2p?~pE9-cs8> zLeOchYdIpjdccGVDkqCZjxho4v`6K{1i;-rZ0QjX!q-$4m1QLBH`Uij*0Z26vP&~F zEJ7e}k7Mfu_Ttg|gkw9M!+w}RA^QtNI@8q;{nBD|4a_G5b*Nk>kL$+s<`hI(=r;Xf z4;XXG)Kt&)b6k-7|Ec`A4QIB+ugH6{4zSaLIanlPNIg<*mis*`;`@b5PKq&i^imMS|~O#PL#D9w!OCZ_8=%gQ=&KpD?eO`sK8N*`y!{u5mS zw#(|VfbRYfSCDnSv(H`ZZK_c&c$5n@UH8=Rr&&q)n?I*B6*;XV+4oE~FjKs^nZdoq z)Rz{4D-v4%1W;D(V63=pcQNH1ph4bgvueB3dc=xuX4HqS4zqVrcoRQnE4%LMBah#X zF7w9kQ)TWr@%(!b4PdVxQNcelpu6Q=0Ncsz=45RYRF=EDT!%yj4h2T|!C&0hq^s$j zld&#K?CZYnNqe>UcM+wNV@FJ0A12j^i$msMvL;TeaV5o1jGn&P8e&})qD*3{|DhOP z7&mGM%QbU78}ATNC017D>Mc2scy7&g&ez@YEW`(vZQ^wTE{9=zhE3)s5ApQt>n4^h z1Jg&}x%g28ohj}2KAVnG0lo(H=(_-0p;6{siwavI_mQ1rqqLn)0|dc&!WoQ7W1zd)Ub;>#fX6)18az1=ghdM`8!o!lGjQUS+F(cXU5BN{ytV6 zu~#Q^q9KBKg(PG)GQ0;407f;rP#Wg%GYi`cayYlU*s3zoyN@PtaYv2srwX}Rm3MJu zo^UYf@yzxdQ$ zj}_ZrLuzQ0l_cq^4ZlWl56YAv$^0f*$rDGh{G$AJDsS3vc?Rrk z;ZR>{G&>gHg-bz6I4|eR_mN)DoAxmxkSsey<~>Zz#_ zSxFJJkK2Im)5lgx6&-?UL`5vEUK(^;5W!OLhYnxUFQ?IVr?4MxtIt;BG_lS)>u5hV zhrb~=xw4LD`^nmn#2)3}CX{pCz9`jE3^$Orj8Rry+i$<$^wc)i>-lwvzt4KLkx^Ma z@78ef34LE6nyz{&egI^%eW!l7>$NnfpmDqHJHqQqass z$IxpqYT+brFcp;my(H7-zYz(wKLYraZ_&4AQ|1yQAs+N zL|3W|>o}TKAvbZQ$3@)`;PMvlW@Lrb^BqJzygL=nJW8iYhH9l`QCr4Vn%gJCM!T7> zQ}^e3(nay<8r;L437OKuZUO}XeEf!Dw$c!Go$iSo*BPKcbca32%NnQYEZJld(Fkq( zN2>O}#kT)#gkK1Q^X}%}#sO*5J627IV}+kjxp8k7q&cViQ1cB9VFkfv8QU%}w@5hG zzf;#+^iw7z)N=p);B`i=m^Ul|zfPSCm%Qpqo$xfi-=iJ#J-h)FV?I3od77!k{8g*% zc~Y=$zGm@fwjb5)W>SvE7|gQCP2fa|-mR_Xg2RfJCs29V1qX8i|BjIrTc3e8W*Z~A zo^(dkI4xv_pQY*;P`V?IuR#v&2(JMw8<}9)kiWx+@BrNOHbV0qQDAzaDzuY&tfsP* zL({=b4w#wHtO|**5#>VBXm=4=1tI4EURYkOs0S8uaeP^3h*kGFzF=Iq7#Lww$;p8y zsLmrDp!I45G}*fEU|+V%8*!_uT9dMBJ^yQb&*Ibv3b8x3UR)4!HXc`&{LuPssWM`s zX8SuI{vQsqa7y#H7ED1VcQXie>&H01P=6m7#E5kZ_z9*O?I&)#e@}gQHzvh@4p@77 z+>5qMgN)6M=I#1nn7K4@2Pn|zO|7p%BZR$*zfak{3M2%)0i5Cs<&vno{oAXg_0n3y z@onO1nqm8s{Ahm|(#T845Z?{ano6{3oT!J-ZV0&VxX)=_$aQCh2Ph@y&2;#RGF|}1 z)b~c}x~1YydXx{{jo)BUGMJ`y9)Qz*3nCg#elm--I8x^a=b(a{hXpY zzgL2=M86nh)FZd`yb_kb?+H$iJSACE;vx7(Mlw^TW-ny_&fWjJQ0MPu`USEIWb}$q z4!l!6H@4WP7tbI#?D(kpOl^nFW|30;5`#n4RJa;P;dz%*M@WRVT0+hbiC zN=)I%sF^*{O!PIs`9-jivdu#^LsusJ&rS>&p+yRLTh7ad6YHUzeS~@p`KD+?v}Wc+ zQWl#|aPyn5TdjK_WziaRDJlP?uDGV7`5SGLlW>SZJUw4u(-su8H8dbkrc~GY>LV4q zlTz~rPV{B7&)#3|b(mZu?G0G_Dee%}+XiJtUCAaB?7X{(`!vQdkN#BArXCCD-x0~H zcuO9RZ(xzgMbn0o=x)?TQ(zW!m)6PBk2~QlA?fs2Ntu z-I4oi>8(9x-Ui9!#)tXXUI7Y(rbLB9$aTTMTGL*nzX2$&dd6eX?jn6*#@E1j8C0Kp z9zIb${!XcWMKEAQ+dlIeNYP}t z06C&0w7b-UZ~c9cRutHQ1gVoUdGKkLM+zGo+?x8k{HLhOVx&!D=MXFp=Qs>}Y0E?) zaklMS-cUN6j;1Bcx8V2ETY$nR^-}7Yy7)7*(TXQ4$ZQUDx-3Z2tIN*^)D9r3ftsW5 zBcq&n8RPa7;qiR+g>>fe%fop(cr|?2*|F3ii)1?pO}Cn!uB6z$I6Cqx{&g!@`F6K(yehyvEt=}UK=lmImLB5h+|Ra%icZwzm{VJJK&ICS%)(G zlJ>LEu0#fbn7uYCpPZ-IbrA9G7eYCk%V5Vb?u^u!x~vFQi#Z8-x&*@91tP`RRkh zf!xk+3s(Q4%BhgvZ($i4?(*ta@+(t0)sNAQeo`<#dTF!R2<-XG2rvaQ&7NE}0`O`X>mDa^oYft4-*A_NcC6 z_!HJ0h5_{yzHH``Xq|eGvJD0Oe4yjyyIe*SK)vNBc(L!zG=* zseA=;(T`<}&XMLqg7TcKz%7Bn<3H|VuJtDJmh8$&@5a`+m>o;A)MWNp@QuMEqGGs7 z2rMm@U8kmIN8ihOmhe~JJ1DpY$&3pPIdE|!gy2NK+i@QRLwFKkzg{pvPcSzD^>lGL zeePw2q{_qUa7IrZxiS(Py|m&3(0@{Ub->%W7gpFu{Zi!ukMpR69k*Z%%x;%SCzcze z4cr}P#5_E$Ol*t6$*RM#k}>Rrm^V6rDN%~4B+C!jRZfhJra&vgQ09dfV~J`X3=8Z0 zj{ScNlK!V!DhG(2IG^-7gBFf<)}1YJmkHHHOa~l{FL(toHi0A>!1^v+p%F>hl9|3YvRy4dSaU-W3j0L7AW%_-W6MjMWZ<88CX%P zcNwRT4dl!CLxU9L#LVz2-WcZ!ZT==?yjPa*shxbS@v?UOvq4hIR)^I|2tE{w0LTdd1?I6482Y8tZeH$I4w`MW>qbd+Qp;)*$UxET#zqE*YsC1j>x#xDx z>M=E$JQ$%gN+ck{4QaNAp2rq5Nh~vW)FJz$*`yfz+wKT3yH95+r5G`@F;^{XWDqTD z;4kF!X+Q-|=RZg=xtiN8`-7rDw}Ut*&fwb$dc8P*a_Wzfm0|@26+*7kueH2}$@J|a zvvP{UEi@!6cZMaI%v7Ht*x)7g-NehES1O)b)C(2FjKOgp>Z}!sW7^BqthN;_m(pxV zZN3`)-j#I3FTR+TQM!RBtcS-$=7vY5^);_(s4|6YW`wY8v`O1(2~FlR!5WDKHSULA zVNAyP!^LE`<`xx16<0vtFgv4kxg}$_Wf?BC(w{4oP%6o&Bjk0sRR=!WOtjY|t41x( z_a6dQlh9V*BcRF_U{QqIc*Yg~DAtKX9?LWgBvZc0QtAd@AeMs*D%3P0N1jp_-3{v& zFJAv5;(dKN3k*`9Dj5q*rp$X{R2IZ!uM+$};psg6&$19)RBV#pFVWSfabC7TARb-e!8OIBr6K_PmW>-n1H@;})13?{vMAJ0}lxoATvlzT7|KfWxfr zrrL2O5|{P?hyCtyX|H);cDLkmROz+QXMa9TeAgzO$HBAAZ?4p<++SZ_GQYmB|M1nH zqIKuGJ??-m_7X`0-XmKMny=H!c9!fHx3;``^ZvhY=FfFc>;x{LG@f#odurCx?V8`C zI&~J#yLx@){EyySt+qz$m+*Ww5IY*WcP(%d*{|{glhO0FMSZTD)?T_WqX0P6V!1zN zC+P0cJHU%DW3~60C3g#5`myNWq39c>b(wc2?EA&7U$%N`|Hf}N^J?dabF`WL1CDsN zZd{l*xAfqumowhA&P&u=V0Wqh#Dgp5S$Z3Q1Hz%FrioAAZCU)RWoPp<5n#ktEiI^M z!c`{0+7{rY9*q-0>pPwt)Q4n52hB!c_BdCmU45`W{?+ZZ8;?mX{bvcB{S8rmFnzY|ck!CtSWsBO1Zu=j zP~8eF*VzR&wV2(y=Bj8n;{b5l`6+PL^!!0N?wTK|*Y5&v_lqc)b~xF!Av=4G$7J|A zJv^<{4JHB(5<5(;d=d&d2JAeYoeR8arq{a!c%j`cUh_K}pjI~=_#kNUbV~3Y;C$pA zi{#_-^?N|K1WUE6D-f5c9TIK;n`koIq!jI!vH-6w<0%KO-kSDH(|RZH*31oW(j^tn zf%@4q8d%otdZh)rSJ?F2PvEky2XS8(iJS8II}lX@3N#$p036(~$T4-QLy14)3Q%28 zE0VE)0iNK((LZfW1jb*%jjR^p+N-F`hoXQnaDz$Ck+?ZElq&VG8$;rQ4OcmS!WPh> z$l~|M7Eq6m)uxHK{vk>NZtDdus!Cif_=%agfJJslh9j`q$X&tGiee&BvBe_|94$N? zru>ARxN&@BLoYM|ohsz z|NEc2pJ&fnd+oKpwO74gPPC@FA|CcjY$PNkJY^+0EhHrT9V8^wKumN*iFTJEJ`xgk zovo~_rn0Ooou;dkm94!c5|UDMYC49dwjNn9)Nh|YJWu4xsA8GL9;H9=%z%?YY2;x5_yJF!tQ(~fSVip&Gf+-`oWm)A>5H!^i~lO=*V(b zN2}Yi1i>uF)MbN)PmhnCG5nysu4hQt@*-c-yVMkK(7pi;YnVuorr-5CW#q8uZna^S zy-h?RqRHOcvdzud>Sb5ZE;eWA(Zi%f4rX-`NwPpv5{k;p?Nd`YPh{DxB|1Z62y-Ly z)X*>)lWnFp6JSz^JGBT@*f2R~dg8YJCczq+%(c7Upo+C(kVsGJ`tc!e@vM{Ch%G0x z=lMIWm|khBnpYO%ff@!hY}W70odu@z={u4ARn6VYG@e#@YwK3F91g|fe-PTuga#a*0I90Q333cWn@_6j;MmU(qw z?$pDLWN6HwDR78$f|maQ8;RL_#(E=!s)7Sa%&<$us7B>{p3Sq4a!N zKSzj~Q8bD+kO)7&r@}}>;ZZ`~e~nVx^|BiIc`!pY-Vw@nJ~0&%Mi-GYZhi37nnW{h zWvIa#jW?QwDH0fkaSfF`k`5E&3q3~>^8Oo0BC`bsfX-_G1N(D#fe0J&+X%VD=lZA) zVKMo{Kd63wJ}Ag5(EjbS$1#m384S;--{XFTUT8{Oj(zxx|W}@HI*{0(S14d{4ngQA7Z9mQ&!VA7cj?SFWyMG z2%_#=cYZ0Nl1<5j*575hf$i+)9O<&_+~k7Vtl|9(OtXRyjRp0`p8DKq-P7Oe-83ree6U%3Vq4>>h?hYtPvg|`jZ4FaF|U&dI;0H?z0E9!yuGz)W7Iie$? z_o7+5uf|#=cMDt7T1&)C+BMsiA9CRt=tez?jB3C(g;)rQT77Z2RfbjLPT;7@*B?rX|b7ixmI3 z&64aLPm7npk}4IfC&MrK;qvgPj>eA8j`y8!J5_>~eofO2(T(R1u0_*NB${PdG*4Cc zIQJ@z+Z0T^EJQDigrET|&`R3X+i^cAuoY&w^igac zZG%^C>o2Uy6w_7HfjmL2`YpvRG_KT!rmex9`BrZ_vqC8QD~8l|)V(yEpC*iU3ZPwQ zKd!xZZ1)B)%CG#pURf9O69}02&3h|@)qMjjTB>Ji7uw&g1g?ay7)fzRy_4FO(g?uw zPY$5;SG=dX|9IDM2fRmpXu5;dddwMmO8PPVVmEd8)%RM%9X9JnnnBo?S1Ie0Gzh4w|hMD!q^l)_NMlMEFVL{{a6}VzWo}bZAa=O}W z6|&W_*}hk-*Sg%;gYSU=D*$Fd?VL}I9k8n37J{{*zv*%|an^8F80$}ZtpBQd&djB2 zBbT{YwC9S=lx8}x5FaC39P2X=EV5Ss?IVZf3hd!JU3Q)iFjh$vhHGb2Nwq^ zH`>lrZg`WHlHMkvCod;?F%f+CE=noJ#nJuj+LiVGX?kcHWJ_=BVY?az=q`5m_mzRO ze~DpJb9ZSuIZqvK>r2}}sj&|qjxpj?U zlrSdMq2=@4W;^}^X_-)!@0ZQ2i>XhSpH2=E1mSi<<@*N1h6WAz`svM;!1i_c{S|6U zU<-yYp>UDIjo>&-6%+IUk`eE4D|Xv*`>=W&FsqrbOUJ&qywzHI?sp0M3~duybXuvq zwWF^()&HXQ&UQi$&~&pv&?ueqaO$8knO0emU;uOKaU^j#oIj~~FpYLqR#$#X2p9kA zHR(lq;@@dEUs?A3LWR?d#7o6%c&Qn17KAjU9VX0XIf*QZ z+?MrJv9?jQeRUFBwr}6|FRSo-1Dju@X&PTW>u|f}xjVV?>w>lVee*qm@tmYC?=I8& z)A^t7n9cN@DYk$BAe=nS+!U!lf%*?ww@zyOiu0LL&<@IRhFr;m-Rn-cz@-Qlu>s+P~_k zBhtTDBI5qr=AS8QQUnqP;)xh>d*`G4M{WF_eANHQqXr`0A-&O-RaQo%+CW!JOGh^w zCwJFg-AzORma~$c8xj&J^WPg;S&QiuL4Vfvy{@~inyMJk$$`hr!pYo{$J@dAFC8Qa zZ!tvH!P4D~&fCG>(M`--lHngU#1Q$v#k>r3|ES__C&{3zrb#F3`(Hgdy8UZd zhyn8ceZvdj;p6>J+K8qSe@n$QZM`k+_2g_F5Oap;LrO&SmBc^l|37d3tH=M=Qun`F z3WxyyucrU))&Fez!OhZD*2w|Ur@PdDt=GT${qHaT)lh=>@6i9-RQz+E|51vVXen$7 z-v3-QDeNs;Wk) zYe`D`_Nl*vG`iM8CrT#l+mpr@t+XO}tW?dn_fh5a$}(X|l733W3|i50meEJtN=1aX z@^>(4XsMmyc+2>R_H;$@X9wg({IQAB2&P+W$a3HR#iYtnF+!4Fp=r$(87WV z2~qy%L)>$e&c~~&y7T*eji8vc!)LtSZ#)AsEz_b`X664~qkoW7pbcS=TstVM%6$=- zdF+^RO4uo7d5qP0CHw#B4l`t(kuN!_r3b@E6#L(P6lO>u=5vZIJqaQ=JS^()|Lwc~ zIig=T44Oq4&jDWktEF#w{`V0gYVutX5dyppI)Rb}|H~BoixfG7rbVJPh{3#)zM$~m zRtqL1H$jWJ3})C#fQo+@{P*8hEQP|bqyFGk&iefSne>15h#)147K3|!^iFO>kjedj zRx@p-JKajwV3CGTfP86Ssa5`#wKp5FFvGktGMd1f^{q4m$Ni8s!73lM+FvL*mxjD; zeD+IiTz>Oz`?-I=-}F5p+azjChB$|l()Rzg@@%qeU+pTDAJ0HROq}J_GUrL@;3BRc zCs_Ylk-ax-RY4DSuFD>?mb0_Qe#aIoKD)zn|?2A~ya30nLAn zYKjt>wDj?IJcy-kgkOb!*3iuZ8{PH^(f5(E+fJsnI;nwuP43~;em9*g&*C`iC1S@W{5C5&g- z_}Z|iLl_8&+u{wnRY^Pk5oFf`QMeV9T`MigefxO7D-Dw?4&-iL`z$$oXjm;u;9`{* zaD7yze(!=WzIUc>e?$Gk^2V3WfYf9_dVd4 z+{o14owQjyOWv%7p6>}8ajJK@P3g$|iXVg_7TBWwY9@AhQjngH9g`~P{%lZP{5WMh zU^mB$g|o8R{Lt0Db-#F!Io*&yv7_8$9U%t)-9A_rZ?`U|G&F=({V%{4fs#31T7NhP zVI8pctzW1Fs*VVvBXfPk5{O-t3~Pr5TnmWv^YqiK?MgrSoVTq$@EFWmO9stbrmM~9 z0^>(`Ol88>`u{@Z7KGAC^`srPvK4|sqeQOA)wJY_1@wv&JR}T}e3N2r}yZg_9<*@w)s|3}nyb_?E3`<^`SP_^Ikp|MW12*xAWfN$&m zLzVA`TEv90xQzf5p{T8#(-bR!^8rQ>lTg3C$_O?3iBXy)s;S)LE!+>WM_3FTTehQ8 z?9Bl*en&Os9m{UxlJj1*Gx~O^jw2VzdG}27rlEMXayOMuh$)ij;tkkW8C>y$<~*!8 zwjC!GCWvu#mLy+e9@P%>3FoT)LnQoLuV%~#cO8n5+)Lx-^)>o9(hoSxZ z_J-}?qp{Dneg4Pddf@qhOqz~*$2y*qlJUcV&fS*#zh{zkXb^N{0rhh+UXvR$zY&L@ z7`;?LtaBuwAD*r3gSe*dxp+JOwVUTNWR6e&_`yHy$=KG#$*_P2m@}` zBUusq(i)u^(y8)YF8^Kn#h5s+K(QIwc>rwLYdThZW*fMvr=$-c0tiRMRI0I{m^_}R zCxZen3-yTRyl=O8gM1D*z}wtT$~>L-=W&;l)6N|p^a>Kl;4GOUd6`Yd&V(_l&!csB zSl*$2sxs!4FkfPv@di-95peJg26amHU;-KVsiy7$i#3}sWNo|06zp?3p~#vz@H2{4 zDA#RFbP%!Nxf~Nol#b2xUCk+H@q^)H#u=}D6O?r%PSP3zcq`d46J3B>S)tC5aAgo29ZO|Wi-o9?sRZGG1%@a0}X)@k{ zg+0xiqHm&xW}kt9K*OjOdo)wGk3y!=SCPO+j`#O~*MAS0B=MGVn3x3D3B+3ZZaysj zxA9qw&eq`&5D8k(W1cJ(a9Gi#{=0(Sh{)}GERD1|kMHsS2W%=|6yi>y_9=?liT2tbI1>5G`C7J3Fa_rI5r5R8-< z&=gNo%>y9g#n`beL8K2%3Z>-T%3;6U-c#%{yl zUXec@krEqV%4Uf;GYPA=V_w~ClAZK_lJFf8$%H`Nzyvb)5UVDe*;{;P24WI8EA)>z zuI!3EoaiD+^=?o2OJ>0JeDYtJ=eG0dt`iGidF~Pwx}GG0lDKK3abL`AzxE5A!x9B> z5%dHLVnxHcE6wp-U}wC4DeCn;s%3hDF+qf3wR)81v(U^8($UM+IX;iY&}mNnJF9UB z(fEwFAE)H_7&>mSy{UiGiBV_A;IX3>t6K#(iR2Kw#gyA99#+Vt;pZNsgz!l(MmN z*RwjKO-&Jc@2=PM@;&j@g=B5Zz9C&su=yJN^9pu_!0Wm^W*w6N51MSO2Az{tgiL{j z!)FYxRqHhb#f|-YjR%5k8DD2B%m@4AqcKV(vIOp0W>om8s~FW?B&MgMsmPpoAEmG zh!mEey@`qYIl~XZNLb@2cE8&3_>jl;WIIo9$^|~{78ROPsE$tbIlZvZ9zu`6;jT}7A;3rxk~)gh zbxV?WBZq<&H3|YZi3t7198~dpWOLWR3Od(FHis+)vG`U*4{%lN= zT>EP^0NvOV97c&`KHQ9uFRf^T{z~FoVoC=cwBw2B4)kLxs;4r_qKQEdhbDR^SHmWc z{CA@l;b@ab5s#Lbuj7n?@q#S*M7{=j&H@iz#b081T5SR;vfc~Tbz|Z7%0PawTsBk% z9NX39fu!Iz-6m}I3+8AvO3ph0!;yuOIlu%_{+x#W+!l};m+=hYr8sMzzpLk1=j~1w zuO32RzF74%Uu}7?Qg7H!GhpHM?!u|Pq(GHbg)pAJr{CdkyvmKB{|K$_!Jp42pvqw7 zV|lnNkcF%ZG;N5>wdQ}BwZd=3p(zm|i~hi)Cj_yzW+QZ4+(W+WA~>6oIIoU6FCYl`VUmIc(-7TGLF*Ai3oXt#n+n5()1wuWqx2Zc zkpN&VLjtNxdBYyf)(kuNWKMZm!V50|vy6oo?_Qe{_Ab5iu6)NUn@-~f*r_qvk-jR` z*lrsa?nCIMH*eG<=KL>4Nusz}5ZdIbz$9uf*5caYiHko%SLY{qEd&esLF9OS+~anl zoHxRF`bmc=7Y(b$dR!8w%F}*18MpQ}o8R1Vu~wGQ+|4!2I>u+WM|^+3Bri7{Yq0iF zl>W(-(kv^R#0+6vQ1g}wd(}8^m30K(Gcj;2;~l4S0i|~S^QZ`#u-2rT;fr&aZyF4_ zAyg?AyUG?T<5xoH`P4FW$5N-iP(=r`?HG1GciSjYnpgJq3GhSR#Z;*IWel?IkED5e z4dD~wC(Vq9$v*_s&{9^_ z;al5%9>@e~XZq*DDo(B82kvBKc&SS7 z#D*W9f4zEo7?|+50Iz+DEQ|`C=;vNr(b7KKECEUK@ z&&P@W9qM<&^Gs%9n5zsHfngm7H=7{Z!G2PJ7n8|wJ?HzJ8a}($@+pS(2w*6>dhgO3 z#-#wb8vOE&_sH?%AcVKiWuL5{V19ziucwQh3$ht-CnWe%d0-2%mxXJ3CtI z>wEwpJ;xn{kPOOk-@B)n6;ua9(D1RmYGpIc+g4@DmpvC9diLhkj7$y+0}aa;f5m>oXnJ!;{8aV-)nNG#+Zluqp{eBAn>!jvno93Y&@R-~ zk_Q8nKA2et;J!%wHv06l_`9+pJkP; z9Q)Rn{c7eb;erGBn02wqog`%4P@^oL5TRp^4X2C>*dmjT9 zF*6KLFG+7dvK+L^a@e6|6vqhts~paYityA9Fi4s$tc{(FuGqzB__rN5s_f)=83(cZ zCiK(c3Y7|wlWiVHdPSU7v%ZyM6mC{}`25`BvoIalhVDp-3LA|TS_4_F(!M597oorK z?S){I>%AwM38fP;(~QJW=%kB%x7*ZlRS~w^g22|=X9k$>xHdYZnO|S^$Vsad_mk%*s_}|uJ_jz;ChPRMWOMQ{85{LUP069fY|{D$bN7rPiB-N ziJF^zqcC#wYNT@znwX*T1o_Hq{Na}6RQ>9@sk8mLa4z=_5f*{Ae~IP!Z0DmOBaV^J zcB&p_-&cU~ptr_-&HROy3ipRJIhmY0_-tpc83xwQSwHP~htCSxDh>^*E$R@OY~YZ2 z_Z@zF?R+~=f=8YnPAqTJYY^OrCvg~+WADc~_RHRmkW$Encx!oENgG*Y;t%$oDc zEcqMAe~ac^Lv&MZ2HkVF)553e~V{y*ukA9AsM)_ z+~jhubqZ^o1}75cxj>%of}VI;KfB?5tNh~>7@Lu0;qkc!2dDt|S`lv+h*HZSPp6axHN<4 z45*^M%V*_G=eQYs^|W7*7N(w>c+#{2`xKg(%2MrI>?LH<@^yqFMMgSavJx)0_lrnt zuq{vx@!PE2Xn!?fekF4rD9DLe!zsJYG5qoQGX5rwd#iEPvx#1%c+$}0NM^Kj*^sFaqyT>+d)?IxG@^b^ZiBM9o`t=t2E;9t;eY_7X2H` zRRdoAo<)X1HTnlH+!s5GjV9%7C$fv4y+n!@eWui{N=^z$hfN}&3SoYlY9AIe)!)mJ z7szd7G!^&uwY~a;4wx5yLy#Llqob8=K8|Tl-^|qn5x>xa_h!6+QGECze9kkVWpFOf zLnQZwS;x-R@5ftR)X$xf`6N-Ju>m(iyL%RMY$yf(++;G`WxlMcuriRG%!A(FW<)Ae3bq+~tH#@$pYG?E z@MSyxhfnDLa@73uA>^GRo!~%Lmr1K8A;KutXc+I}U25BhdrBPZ5rRWtZQ{ z_pk&*+6-cI-;=}7gf_9S4<^hMe%gQ@zekt~XoYSgG)--FC?^_%5GayrxMC9NR>aGlE;$;PSxC)t=iqr6D?_P!gQyPULK3DHid(z z_c!m9$w6MCO3Lw07yKm_9aws|wM7cQ|89wIjU)XHZVP8(NiW&AErBCiI{xj_Pxlvh z3MEf9wZ#z!0X>n({y{n)HF~^jRxK?x@Y>H^ixq5Y_DCQ$rDs=Rd~wG|qS~mp_r{;H zUr_55GZjmy9`!a=AOfxsEF!tK1ieZ?tJW={z>I>%mIPcR7bcu`y`Wd(UXHy5U>UmL$M$gsrt zac0LIUtQT`M^^CfUi-jr0Y2Bs?u>~Nybw%}zNSAl&4D8nhjt5lO$&QF_adUq(Eb=> z-_aGzRN%bd5AogW@J~~JA-oU%x`&Rre9L37GJG{2+NmW23d(-FyQjx}Tuoh%243l^ zoUh4KPpr?RygKCkdayG&>S8sgyPVsTu%~b&!3@^_A}$#LbRHh3m6ddVYdSW(-SZ-6 zitL5*ThqZQ^LulV2;06LbOiWj{RLm%?t(Fo+_oQM!3}0)WdaV1yT!+0K^?^ohKn`T zsa>!)w0Cp;N3N?t?d9wW#yP)zFD?!xN+b6ypI`)<4$(}ldkF?{bOV3S#^wroO3swr zIaNgC=k3}R#yaYoEUtZZj7V#e-iIIw>`k>hR)1P>xa(s%l-k==Eg2NayDnC`U+`+Z z)z+x~=2`pmyhnXz46$CCR|o};_u8(#S* za_kGMo-kmm?Q8O)sa0cI!(PESOY($%^UYjc350pbfym|vAnagRYcxFf0TatB6lhK0 z`B;;KJ=ehgA>YdEwm-mgr@`oCGQq9;MC!;8lHeN##K?31od&H>XC=j z*@~)5C@=l*O;=&Rlo(8nE#|*Vuy~)!P4>ml9jjNZx96AARF~5oeI0ZBfhoS2e*E?8 zGwjE8Jx-&`fy#(np3nJkcC?rD&I9z3OzfqCu~|I)mTkKmqa!MpA_alGS{f(f;*tR}qe1|um1}GKA z!JOYCuID{+kwF4SQJ}~#+8UCA1mlkIlnT{|eLy;O`I%0jcxQvH6^n>h)d0chQlu zv-sSP*71x4%rY6#*XvhT&`}IQ*W)|y1+wQFo~5DCTvB>7Bm8&s=S9ChUqj$@j;B;d z*NzV_L^eJ&a-Pw+ajQ8OjvVc1ESh!u#U3Awlm8x4)CDU|lzd<>^V!JP$ghB0;ICNS z2bx#@{GTV4a~VQ(bO*ihu3$*6JN?&(ZmL1@Qg3|jz}V` z90E|!lxQ*FgYu`0BtK~Vxg$cO&rEl@Gx97VfYiN$5-Q_iCL{c7Od05Fzlm|GN^su?G?gFK*ZAMfq^$u4AwntX(5#%t(mVeq|B@BIvOfU007RZ; z&CWse>?}WG4H@`Fp0vhPvR}y9TG^i8#YyWr6km+3U?poUC*~T{(6+3+y^oQYrd)MT zSo*Snz*3q5wHF+wCoV^+8nhlBNNMnb1blv;YeR6Ru*JLsn4NT7fat-!J-Z>0FegOE zN?L^tOE`JZ!?U%%=FfpqEi!Lkk0#L_r<8wT71pD1J99UuX}aisFH(>rFy8m#b2@t0 z4we(AkZ@t;gq47?rTs8@R%^=3>s_F69^DmDl{d>YEhI3`r&M4jf=}VxIFZ}mdn;hs z589qNn=~CP4whUO&h28$iuTdAc6_E_OrAVp?MXl4wBMtq^;sjykvtK!*`mIP!6THe z7C+}k5V+(dJ`U+Iefyifv>eYWcSJYuaRk^M0GTf6H>|F%orq}4v|gNB-MJ{BalZPa zQ$jw-zxbfIoA{vg^Z-w3ICh@|8TpJ@b~riU7>|wg1f*hER0b!6KazFHvKuF=W<)NE zZw5?CF^&k@2ne5^j9#KCvy4ycjPXi6*?{G$!k7KxivMt)gJ?`Z0;w ziaaeuq~xdnd2-&!)nlCEO$QN#F|MoqH!M7Nd? zDIKkg8g35j$b2NFykc0UdSG?)BY|kgm1O|Gp8P&}Cp?N%XJqGRF%l6eO0qL}Uu;qU zD(T$K0pwoOIN6ewA2aX(Jt^G^7qND|_u?M>d(Joac4LhDd%b$@Ki9C^M&2})LOaH$ z|ExDvKp*F`vbC*$J7ldq&lXy8?g{INa;Y_9J=V>!U6c}bV7;#sa@br*2(Q4cA1zKU z;8j#Kl1DIS^4LuLF#n^)f{Ii4r;|~HJGW(4JL=udE2_2@-~YZ6h`>zEs$lIs~dn0X6Q!%9$pM)ngmtWEx zXR?WAi-E&oiW}T*N3}i%>f&!`-{!KP2~zS-<}Nh1#!{6KP?6=MDnb7@*XT`s^yC*}5c~ zezK=8N9TqhZ1Q%zdTR-f1+g`PHal_q5sRr>m1QIqDL+uKLO3w%&8}<3-?ZL*g9ZtL} z7w=nti=jG2v+Of$g*ERpWG)YZN?puyy*@b$go+psjOX$iy+3MetAerhj;)10ABw?9 z{%a!ss`mEz$Z#%C;B}ZX8BX0@DUsH=Bu|RZh(oL&mn=Aqd}hDfmP^T#TM#?P~hyS9qH`_HH-wDQ3S!1+-5R< z+nBR9$w{VqBtq~R8R=Jmz-5=2FcdUt@=iSp5iJ6cSXwQMDtVv=+h4tue)el-fh1P{4?3TsPaPNJS~CRCq49eho!{IDOsaa z9P1GcFey+*m(#19fIAixDKZ0_idSK|)G6xMWwr+nO>WR!Ef0NW-Q?KVR4wi|G)eeY zD-dWAXEh$>m5%TtSt)!n%`w?Do-4U&#ja$j7fd6~?FY}62Obx~`1$@w?)X-z`5)Fm zlZEm;KVL85Jb=uo8Xrg#q(b@XjS7ks&VBAv{Q1^}u3x1I^*d)Pt_eA0kB@--24c_D zGEdbE=E1B=Wo7l_(*PV2-;m;Suk733 zG3bTuYrN0lwbRvr{y!iGtl{if(Gp4RJ>*xapj9EqvJHT6oYV1#ass@b#@k~_sm$HU zl)W@*)>U#3>>5v=Y-nSq_2zd_BeJ0=T$FXCpQ4j|hpM#GxHb1Lhq?4GhSe2rAGdG2 zO1+&-?O#e!%{+wvZckBuE0GnFLkC2@M781_AZqK=wCUlBdrHe#ocE9*0eCg7n!Oz9 zKSObZX`3&^xeKIIjPxJ9gq4mXJlyrX*8!PQp^v{S3(Bzi4&Ush$MlY?0rzjbFPqY= zQ$?FV!%IUOqz!jc;0EDdWh~#vkKf&-o#N$9Re{?DkG^N{G#1}Jq1KiZ+|2ahA;qlR zBob@Bpi;)B)j+>o(#9c=qLb{b`nJ8itb~1gF6DN>vfW$U!Ms&)_=GiiWuU4p-1ekT zV*IM7vZCJ*@})kK-Q_zgC*1Rr@uOqMF@x>5tP41|`<$`$+OFdk?Ey4)=-qCsQ|K?c z=MeG{0{EXG`1>BVX~A&Yh|czY!%5R@ifd(dQL(HkgQdo+b~W*d#*RgSSCZcd0Ob?R zB}Ii2>^tWj4+(!Vg^mR%1RYqG{{JI1@Qi$7>@;px(G{U=&qf;#-Q0a{Wc79p)=I(tSpBc562=|nXj6=YL5);Ev{%td_?ov}zj zzfAUT5t*FqZw&GX-}ni^h0nqsa*u0mzJ=HufnMC6ln2Po=mtzchF3)q7WSF~B{#z^ zqKo`^L52`#pT$adD;|k%OjnombY$=MhI237ba#<>2i$-}iPAr)z&pv==7*}TQ*X*i z6s^K|U|qG z9%I9PlVfj8^7T?)y>`awYpNk!0P~^O_zwH1f+uj9YBIvm-QzBXdYb~sXESa*hvPH= z4E6jof;;u@Kt+v_tFSk;Gtz!GXa_y>J|BQ=I3}L;_1@QsMF;UcvGB;+`UlCLnS>8` zgaI#;JrcSSQ`-=sKC)b~hoI^H#O77#Enn1mgla*7_B;R8RaH_zllnXbfqB@@S*;ty z$*gU}?TKZ%x2Jnf1S1sCAzE4If^_va0%E+!hu;#nZC`FgtvzbHncRmyLiy+`t@SkX z4%`Pt;k4kl6y@0$*q-upv$=1pmJZn8%j?sk#yum*T+H$QLFTv|o!0DsTZ3_g+s)AqALk9XViV@{H2- zRBEXA@Wa7a%L7%bN!!sCbq6&kr-AqQxIvHKJr5&d)aE*HZWDBA=>|YqCsa7L*2`WI z(h#R3lN1JW+Er7MAZwHQFG6=Q|`} zXF&yyBOvh@h~QoocOD-ou(LfOy0Ko#-^4&(7=uPAA>Y2ncN=TW(n0xMCfo?-X6dK# zu+e+sB|q{E6d1sMOVHYp-az7T;D_5LHrp*mviKTP^2eVmgp@fYrNeIbPJDe+$ad2| zYtn-2=pr)PlKeXj^#g*T@_4viFW(8abjK~n7q{B13V8qsoaKkqW8H6ZGZrl#pYI4{ zc4l4fKN^&k_3-+Byz78F5YJl%aC4}n9**Eqbn53cK@%A5z=#n0(J(yrZlOxY&bhu; zPyla==#oW}wG}n!+)cVMmuy2TGUG713%of8=22Y@gf`3?`?QfdHd~viOI}S&;9d;A zQTcPVT?u&%EbsTugy2bhWkQf*xrbWErJ)Y4(q7c>edL3tqD%L%!PC0epJJxNtv z7bRhP*_`qFcUL*V=FO@<6`R%1C|;F6;cXoI2=n;=V%SFhXRtZB5Q;pyk5z^>Eb;?S zmEQ$iT%30b6*3g{F10V70=;izPLNcMn7Q}riLwpK~Gp>>YON3K%)NGqIB z*049q2t3NF_51ckl3*q2NK}uLAK*#?o$J6k2n+8c>^~4wEIXun=)U9)Ji}3O?Vi9z zS8}SI`8pz!$3HDdTx|4)Z)_0_%1_(G!Gk#RGTf=;s^ zkZpx?oDcU*L6Uy`^F4*zd|g&SS?Vw)>uZLn!7qw5nf<&B4!)?Z8T4)$mBKcctKk&E zlRRI&L-jktD|<=YWDU6pR+}_6;L0C>KlrF|S?RmYsLZdHXLo<^4ZvE!i$bd+Ks_ao zYck&`++ne|os#Q6cmU~m&&xp#(4GWxLY)^2PKVY}ga!Q$#&d6&c+fS zM9vuLPz6){gkBss#dY#G!l7$>(F+Vp9du{dc?tN^s^8Ze zA(#SX7&OISjCbk&T3}6hh+3Q$;1tgiHSKkx{QBBmfo|1+oGrDSJZT(AB0&nOk5FM^r+@m-vUpu{z1Y?u98r9q8a3hy(*L z-dNMQf2#_$R^(iK&$7dBwMofmM2mj+fHv9qA8=Rj`XVOM1{x|4DR`C7xHszHY?&7f@x;3wTwJEIoHh)$yEKC0O6&;cO zOYa+wWHTbSvp8GJZ}D~vfh#X#O%UExtxkDLLmgp7ANo%PSprnui~)wy#6S{akl6=B zdnAV(?@!ZnQ86ZGwKAl^qr6;kbH%6sQk)U*c9tlEr6??FMF1;o_Xh6 zXYc?%#q7@E%FkQ%3BXK2>%DAnwrZthMjN8KN*Z> zV6De;S#aa+`JwJ=tfBx6c@M}Tjk!m$+8tF|h5A47XCW>;U?tnHy~ZJ*A_6POt29gC zA!T2!8ypiTJR$ETN${hv2j(`$a#6xS#j@#}EVUeD7wBlU%{#Sjka4Q}fo!?BqZWN| zsc4M0_m)3oE#`6ju(N_2sM`0H5Fr8`nJ@PzGxD}zEi zA6~vq^#}6=H?(BrMhFUVO2d!;190NZRGWI;^bLl;)GYJ$LE1cav^+e%_4w0l3pNnh z$d6c>b)~2ti9vp0TTevvmG>T&;3&(tgLFzW#30=@wBI%EckgGv@B2Ob_qW#f!{riZ zG50-l-RE_l$8|(M7*;Q!b?>>ptg#>jOZ%kXG`$? zn*nC+tiG}sBlu&FUVJ}WFFg3ZJjBo|)8sEnj-4nKCh%fkjtW+-%ZTf~2n{&>quR4jZc!|+g9m3fth|1AC-pnM%u=ECuv+fNVXqF<~ z{6w{1rX#D~!gg4uF%+}Sv+isw%-y)o)8nSa*!>1cXYQ0eVG?lE{(#}N8>lG>AG!2Y zdi}o`8_PMRv3ArJ#zlG7-cji_v*Bk(Oz$RHq*37VENFvXhoLM!R4VMq`#7}B#QMNO zWsrp+(MUl-XwuU}7|qD)TtVZs{Iy&+DRxEIa$-CqTOI63`*WF{rgZG6MJU>53d}l1 z`~{Y?3_9N5i6%r=MtEN!PX(TO9Xl}W1mk-`NVGig%0}87y-Ih+4NmyGu%E;$$01D@ z_476nYLEbgQwPdMmWjCVpi)m?l>W zrRt3=NQNy^_e0e*5n2;03sR;)A=cx_2AL!erVW8yU4|cve;lqXdl%zkiecC^`BGKf zrr4wH-INSTGP%c2%T;ojbM$fl+c!J|#~9qtM!F5{bN= zZXX;VD`8NF%kO^WkBH|q+858=9@MgM2FO!gs)Ip>6L${R{becPxe{D`A@`}cEvP~r zB_b=#X`M4%n=W|gE4k=WwBL|;QpUB@*8bMC*C&H&|5uLJ;o=GbS^q{o&c=C7E?|@o z=BibC8h45vPqdNSwYureQoCtG13LXMMrifr?=WfwENonvJ5W9Fr5UmqcJbWtm@5=r z*#e>vvlza#vXC=GW$J6SxsAqjt_#4Lh;8CL%Sb#q7FV|rbuK$G+u~gS1Of39W*gn) zveJQMhdJ-SefxVwDJl@6H>mAW4n-cvXrjI%A z6`y=;XYO<3eIhk=1RxB!CB6;RrSuA+MO0uJ)AzN4<9*rQM@#=p(D|1P7vl1-nd?kw zdi}b|FO#9^L)a!nN25MUcU7;zR{J(5MmJFqu>&m-ym?9ukhDqcM@5*9b5rx|$+ep~ z(X?6U=zhQsG(5B%m(KQnmd=Q4?S~s&1MAc$yQ77%jT(<2vS{S8zZ&FV}qVWEB6LLxTv^R)=y-)!(HZnSALhV7HE9fJ@81?s@Ll%fdq@~L%ds$~&hg7D-_KlD0qej8CL!f}KEKu?lpKfjdT(Bwv`t}630X0IwOLLZU+^%tjX90+ z2H=j}a9;K4*vGW-<(*L3vnL~G>(d$@bC==z)dnv)#@?ME6g5^yDq*=cO%p$BBNh}X zPPd8RKK#&Y-q;(jatOE+`aJOx3NGo*Bv%ag1gEOCobjA|U!{GvO}xL+qZfMF_U_cq zjwh(Bhs=)3!Bwb2a;_#njx&>(Qe3DVxEUn||pXzcT=W>bnqwPerdhJ>W zS#O&l&?bFi7>rL0!Fr~{_uaJ2_cnpBG(G{h#GSk4 zZD+Um4_O1939O#`!?~LXTrMvROy*IvhJO=@2~(cI~ZbCg_@;kXIZs4#Y}HL>M! zk-k=<6m0{&i3FpO%2m57RyNJfWN^9hW7`wgZ0K)lL{L(g3o%*UL;cr-U6;jgQSj$x z0ZB`fpIzHNXx65^C#7_zw9nZ*&*-Ow8F}^$Pz~N~pu+7Ls#eXG>}^>7WaYWH?_f9o zLts{=S0OK*J9cgh+Da^UmPt;hQX59>@Ri-Qv*X~)FPoi@jC0;6G+OofZ#O?B;xZ#V z9up@;y@do(jD_m$ro7U)fLJ6#9X8@;u@6M;qI?PjJPFSosd^y#)LwH~wvDOBa%bF% zmL}|9^MUrsa5yc7`>`I_$;6f-W%ldy^SE1lgW?8a^l_%msK|TjHYu6NA0*JU2?yhc z&mOdQl{;d2Mc85AaGq_-x+=7BTT4g;&Pc<%i$CO)GbLBcxwtuZj85)1*@W|fY6a?u z;cF(M(m+KDlATwBBo1D*qd)(xC-GDZOYDJcS2|XRkL*e1Jn{+>bBJN|(|$J6X|ZWB zzV!n#i`S0xENK(Uq{Er!*pnj4oCeVyMRhJJ1zJUNeQnrqF~wVIQfkBxX+!BS!Jy5$ zMT(-_xpOHZia@%Sa6HjH=T|ywhgk27fg#pb!j%p`in{2`>IUhZOm8fA^IyJ}UM#Bz z=uPlAI5p(Wtq?C@;~r1AEUw$;fw+g~j$j9uG>YFPc4$CZrI)gBzgP*TU5_rss@};| z{B2^AUr<|PRnow(f*O<|JnHBkn4D?fhmvw9^uBK50LmPSrG>7Qc!A{b*e4q8Gdx~F z19Rg^%*Y}Bt;f@6+v(ZcHMAplZPY(Dwjy+j`uS|q*{qVCTP3Vw`1{|kOnQ26j69^o zJ?BbM8^_Ons}{>;y$LQpWld>{$f0M7!NA9AH#!aQpD{-Usm|vFi@a?u(aDXkqDN{rX zHP?zwAo}t_(!`kKHlCVX92;-rNeAch!bNkcRAL8^2cKt~nN@4xB3;BIFqJt$I8MiPfFAVen(_fx6kzPPqUXe$OfFcT9&(82CzUS`Zp*CPpj-mgKA`x1w3wA zDb!x(6mZ8<5RRbkx@jHBeSU?ny5=$=*>*O1e@?&2g7Vk7FWs`cN~Wvh$KW}p_psU@ zk*s9n1_$uSHil(>Hj!jsO{(W+75HaToS}B?;_{-l?k(@f#wQtkwL^II0K!zVFx(zQ z5S_SSOZ5s8Kuy0jeHrhEE^D{b@iQ>53}i< zf`BarbuuJu+)F>`VW@#u?1rFz^p?-?A zmYsuZSjQpGoQK_(<3eOOmks{NQ>Fp>`IWDYdhEg|2OSB6oCphLQj+AJnB}k42uj>& zgM+P|jNlreSMx%UXW*7R=q7q&VV$3QT>{e(vTomZXZV1;AR4HI%Y@0+;kDO|2 zU#&WQmR&>Gcha0z^Rd6fvNDLw=ASf;5#52ARD!{Jder4NzQE9ej@}#XIWL+o9@_$1 ztObOJ8)H}S)=JCtEvg&?tL(XgLJf>Oev<_v3;0C2}tgtO@Q=!9Q?lJVd;h=c`rLc4c{tXff`A08|TY6%ju;PUH!JAu8 zuQSmG>2dBSo&zI@C8O5VRpZm(L@F8C@&6!@A^sPr0jsKVZ!Y!REPCTERe$Rzv4SR9 zf(bEkuJWhb55SDUwV!M7-s57g^T3~uj74epeftgK02yZXPg7Z{QA1&FnD?kn88{Jz za=o}yXsj;}EZirZcqE#kCXBsRt=oIYqMN&7tTkIz)BB#{KuIQBu?NTIW_-wNy8wwn z`)#;EGwC1e%$S_M(i1$&NkWr~<3~e*-kOoqBPvse&$urH7$(@HsYsL)KB;~vf~h>p zxn3TXm zDtQB47Y4q%3K04`d7eX?i+VlOAOaU+K#nw0j@Bri^2W$1dWw*QThwP#nyA`YNoh-5 zuBjXxuF;+c@8so`cj~^)2rE5P@B9(*J?(^u7jb|DDiz$a&ixLPcZwGfKBi^u^PjR* zSS=F(E0sSo}3Tz{6S{wFGtC|ELc|1qwF4b<%EJW(}eh=pW(!jps zF3IidEqwrZc2TWjd6a@bMh@eTyhYMRB(Eto^e3NR>3j^cXcHgK>k7b( zcNsGG4Wxdj0L@kWtp@N|4?8#VwGWro0IfK)SUXFpN%@HzZnh|J+jv_?eqw&`?Uw>V zOSQS_Dqv9PL1W!os^0{6X~tB{bfr-(>Ib#tt2mz}i>FX>r9BWZhO#YupizL^cQICkg4?mM0(I~tc2{7Ju@h2pFK(G-4S z_bu$IRypmm^~{~jHfsf0MRBep+?zjH)VFJ*A3`t)8yP#tFJ(v2g~R#-8gmMsYZQ4Q zA?q`Rx!G@}GdOO91=n~^Hh{X;b!K`ByXNQ=@CdALsn3Ox74OQz{=`wtHL`0q!s7x9mLUcEq7NTy1aR zoLul$*Trih^B#MCOuP6v4mS2Ew@J63{%ZW}-(12o<-a#~RV3R}F3|nLdrBv(qs3LZ z&SKNEy+MAue_>8)hUu%GHmH!$Vf_26)G+Iv(8j&07fbj`gT zTCQTNLvcJ6A&volLn57+cE9v@B`v=Xk)69pCYz*f;i&nWDu54B$;r4_pcd;q%Q?t? zly^WBX_PMMN-&3Wy~bP+?6U}Br<8N*Hv1HmoJwM3;&fHm0pa2Xts8ga8cUq;XD}i{#8P+k=)TGJl%*Q zK&jV8WWZdXVEIByo3PU!Awpy%ZEQP!U`=(NoHlR>bLOkhyUmRP=0ZI-*N1$#$iFZ^ z5-KL8PZ~XS&x9*@Lv6U4m+75Bq> zCvh}27G38jw!m4Y*e@15 zhqxGJVN1+22To5GGSmlLj@@c=E*2CL%TYxi&EH>NPGpp9_4CwWO3Trzkb%-&TzM5=`sRbxV&_{ii?rLI^kqYjs-PUR|%>$Z%Iqfe3`}SVHyTHn}lHylq zj^GHcS^6aq@_}f_|MdMh6~PBTcpbrBInS=1FUtUpv8~bO>dJT~${AP9(OSCK=1-B) z8$~g&7wDppj?0ny5M(ykIq20brYZ*jlwr^hkPJZGA4qqQ;D$n5t0WdRARgiL6<$ z-fsk|QDA{)$gb_%eJTzAwaPW}I&D;@B8=pg#Axbgp3N*$XN4?bx3zVh5o_&97 zLrqio?Go+OWi+Qux$0WbZ}D(|xwBxS2cVrykZpP z@Al)L-$MpZsxQOr!KsU*4|-&Yxr-T}`&_!lFFJCA@8!6G_a&b5o9gpS6fU#2a|B~Z zC*Ek1AJ>29ooDv zz|Gm4$nBgDfF54jM0JYZ8da~?kS7kI>+IrbADN^L2hkJF3-S8L(?Fst4;eT79wt8H zk+Y<)V2UI5+lVfrjQ`9ws4SjCp6rXXQPdcfsMiIOHoYZ$)N?%6XD3LnvyY24l=kXa zE_x|gK;du)f~o7V_3blmtdaI5%tX;>ATaY9ZVEeQg}R}fh%>EB{BzCp(sS3aS`@;g z;u~(aWr&-zgl?=&hzM=~5$6SMKten%=Khtw!kYtA8wJux1vd-VnEg6a)PX0N@e5Qn z%a^j<2m!Y48}Sz%p?XuXvpeww>X7MA)UNYBLa><-H|QbmfnTPri7xznK6ThzdJ37$ zdeF{uyIpszyS|0a3Cifu_~@!WN^rwUy?RruOj*8R9~#IG0IV42mTHRjwUxLU+?ixb z8Li9L=xQg^u)p&f7-J0p5>FxIyH?%p0LWv~Cwe!9hpSzLz_0CY6}agWS z2B6bo5z5mG(~|uS^m?uXs4xTxC+1{j-XXr{1|p=)i1dy_Uox(oBoQy25Is@q1a&h? ztmz5PzL~Mxg#CRIhZg^;>!I9Gzl=^@Kk=Vb_deV*xyflAZUnJ!&%kLl9HO;6j;U?e z{Xqxen@$&anWqfa!>B||l^GV}@6E_xTrWQN^W^Sh+-dcaII5!jc=RNIW4ZI1R%?_V1yotF~?)x-<~Te4$4(6*AK|;1{J2F zSBNsUGWvJ*7G?eH@lAx*lhR|}(<1|)*ZbT%APrTwfi=s~LYA~=1B7mU>quIj8KqF0wBeo7|pq zNSC<8MM|Anz`r_kGOauhptAeU-J~Kor6@|b?-}b;lhCnJ$H6!?&8w7(j7(;2Gd6L_cqhsO4ZVHSAT`|3`7uo$xp868($59$n4d7T3N?F z_QvVzYpRANqcmw{sn7V|8l`lc6LdO2n`#|wL#A^<)0bR6mRha-`!OYRWnn;^9XUP; z|M}kH6PI?zhf`Xc-`ud$G+RUnI!L-Gcw<;hV$i0KoPlwjok#9s2oWkeE|$E;VcL^D zroAV13f554kyEmch3V3Fd%CVUfzSQJYEUrwL`YXT>ukYzm+wyE9_3NA5ak)+8RK%7 zI{%?O=oE)<$dBDT5m$(5;(4JC7J?bNU92Plm7ItDd{$=McE_KQ3WMv|+;jdsJ4<*Z z*UNT9`DEqI6rAE%RA=^2Js-2e>%5=bdf&>zVWU^ICjs>OY&KB=%8HS z&a}Y#*Wfk7OLJ_PQr>IPr|Gskp-8Uan%LPh&i!J5Qx7e~7y$KF!ckoL%e#!rFFKQL z`!_a}Vqinn_VtCi8#af5hZc?f_E6g|)W&V1Vccn#ZnJbI6m|Vk+)k{!gPv)z>7qkD zL}@h=PD3#&5AhjoNoX&zciuk|fUkvqPV9V_dLC+3N7}uT9(I5(_?=gw<9NJqL(vMH z1APfpP+!tRYjSym-|JJPnGvNcr6%NNFDRumXwmH@Q!i6<>{B!O*!!8o?@##7%=Q4O zWt!YR3RpE}z3?1no4&k|$X{#vPjn!fe>Iue?Esxpm$S=p25upfzn;n)V217*V}6ah z^Ma24`OWLRMJTzak;XRkojcYxa@QaPZkfdaa0#+GTA4P_XEzy{X^U4=ZnY;Gy57|} z4E;mF} zAfB0=uzQX(L-_sR#zwAlm`2Udnd^2M&;48w1Ck#qrxp>Tkb+I2f*C8T(Kb!^_#}}F zY((A7*iy5&7Squ)A$|aOzVxKgN^2JxWHeP+e0gV@;7o#N(7pH^%KMRCmzcYn9e-P@ zc%8+FQY|%+O5AQ!0Mp0Ft5cATtZG#|$-b(AM8*2O;Dk#3f}3iiGff*!3`aLX{>U(g z2-#1ebRNB zcLdc$lJyFXb%vw{B7cLkb?=l@Y72H`U0>Gd^IKRrJoF%xMIU0#aJKzIj-?`n#Z=#bpd`~n%vQ? zk?Hl_=~Z;==fq`hlE`4}lRa9iuBzHciq9~zy9Cq$u4F@Bb!bITF3b!$b zjpM6X=$2)|FwniPP0*I}AVu4l`Rz>fm)f|hp|L02W<&08K^jAwut!=}X)Omv?c)sB zB~@;6aUSPi!EB$n292s8l$hein7Q1E?JPS{tZj&X80zsWPD2jaG5pbO27hG4?mrz` z-T&92mAV_~-zF($H?}l4OBmVj>s&DSjtQ@^6#)eNz1vaq+fED<51k)Ioq8lXn(Yuq z($Ih*Uma2JcFjyhLS=1s27ma=3%J!6H*UkIa)lGvp7{E~(F(*id!^+5CT4G|r6x03 zNF8bDNvkNUK#sOEivVE=dn6o#C(nS1iKFSsI2HEd69#4_`@s2yy)750j$gpUQkb_V zrKkkPgc0^HK|p26Y9BxwNNW4>n|Y7dA>vE|7rx;H3IUTk$pdi zD0|DbpX;*hDt1g7OGois>K2<6n?Vcmlay?3V{QU|og=xQRYc4`lu?dfq85P7#oALk zV@-N%LC_8NS!t2yY&q@iING@_grT1T*K=)T>Oolk}T~tn=Wzu85~lvdPhC` zhbS&Xfwh87SB3lQ36Q4Ky_{cL5fD=;%%fjRDZ+CD6^SZ&I5j^zY00+5RYdHxo#9Mr zu1M}21T@k;IQZMEQL|~s&Q7D5KFDb38AVJ72J%V0%=z~elg;%8l|g90aBkRv<-+OE z%3MU`8dCZxE->ZoKs}LQN(#E>l{t6ViR>4eIIl7@-dusH(iYkJTfU_s{N)NOUj!R(a3NIH1G~BTv&)bimJyV}snwW1ngz6TzRnprv&2-N$a~*|ZzKE1aLfSdFNO z{_dr@B2ZiWV`QzYmj~FYd)^~2z0FnpoP6>!F4^(sW~6$Faz@=Etb8Z&FyKs0!Q3k*hAZjO2ACP9I(^oQnWl!)p*{#q2B+=Q=Ko@9U1z zbTG$;M+@Vt+R(H(=l0TE#6s{?$<4b_kEbgKl%{chKezcKv@jr8FZ42aE7zU#Y?k_T zmtfbNHu>4zxa_YSwQHj9%lU`U8gw2}X&PFLbE9Xe;tgL3<_6flrp41VaMW&`30tSs zt&+QGWZJguX3uU`BjCK}!iqB^!(n*m!D}q+C9uwkW&WiWX3a7;iM7o03~O#9y+CgN zBYdN)Q2R3a=4Unu(jvsmuZ~5k8fQ#5cLdIj8V#8AOS(iT{n7>pn!5SlvE4WMzQDj_ z5KIKYyulmwpRBg z4U{$orq4*n+!EMBo~)9P8F=oQcHGraTn7K98-0D1vROW2pf#HW44b2YwGy8eaFDHd zR5bLs$SZ$X_S|nL*}}l)+s|@~;^a4Po~Px&Ewn0Q{nB&Vv$UTm%FkS9eKbEXwK6JW zH6}Nrtf9yF_110>rg{A*N>b(l@2i8DR~mXw^II?96s3I&$tQM4`@@?ZrS8-h4APDi z=TcoW-53civ@}^G$s6$~{FvaMotDQm^1p6u-9$!>-jp)Ol}fMNZN+5)xR}FdhTMScC+q(7M%ER8+R4IC%N~;APXAq8wcOJv&`k>Cmy#a;WdgjthVl#6E07^FIUzln*7qu!n3flFyG#0u8N-hiN?Vu zgBYESczLh0fZ~s{@w_6j1(f4ii$Hv|*_|n^MzKv2a^1Hr*6i9?SaAM%<#Xk^!4iYg z%6x|43!htL3S`=^Go!}7YEI^LN4a#m^qjBM{kn{ZnefD*nFF>($={BE4U_en zZu?a2D30=2C)Z8^uZv~&+?&GWwls*>E#!PGUsPYi>hy_or|hT0foz`K>}{@M@F&nj zR{;2wp!^Ys)NC!{TEHf<-2Ta&!f$*-*Ux^r>=KSlZPd?rER3no!P&i;1B>H_w&>W5 zn7(s}!Sgu^+$^h%5(s%2O2a~Qswx$7%lgDTUt~3j2!1@O$26P6F$vE8Dh_rHl8@a; zr-FfEtIebhUqJCi*4x$@avGy15X0k-EK0T#CR~*cwEI3THeBT~`}pbO0r<-{>+Fqq zH`3M!53j8(18S5L?Q;0)ts%lJtCmL01|x}cCzsO9lJ_`$bLFW3 z{=5-a^rV+bAu}0-@Z*zw|9~_(Y-}G@*^D4M_r+iqW^lNA z`Gw5mMK`d@wYZnR@-eNxj%k_THpnbM+2Cs7XzQ3 ztm}CVcV~zGamj#Q=Wpg=*aklTsc3WfE9W@}e)7HK%pB+r2|r^zwvQ1Vo$)r;b9Lzd z!s7m79b_>7`2<3PY6ASi!Rtcr+yG#i#yemtFTgR3w)UUW2QD-G{tUNKU2aLh>)du9 zc=Pjhr_g-c^Ax=Q!R3RB_t{7i7It@g@hBFJt=~jWju)CJqbRH z-Tn72zR!2xz3=4ia=@PN@;MeG6@5HD>-y&@KHfb&(n0gTxjtf(u3eYyrRk{t%=6Ev z3!)n_{*no2U(wd^Nh@t(0tmq*;1YfyUklDDV}L-LZn{3QFKX|tz6(Jr3xAmMU<{It z(k|BejFmzR;~1Ffr{q)tvws$A*!V!*-l~}YQzi^KLZ7u-^CRGC&pQc7TvSbwZG;k{ zlka*S9-O!`py?VFbJh%^;J9%aRgh9T+02}59gSs1vgl8y&HDQ*2|bqr#$(_xUig#3_8_9Ab83$1??`4HvR^#5)A?>y4)aDy)0Ln^csB=z947kXasvLM%+uBtN=Vc$2s^ol ze)6e02^oQzeOwpX)K3p>oSSS7f*Xv8F3E1(l#y(o@sTR@`wzA*k`8DnfnD~WIWmxY z7+fz;CkJ3--xa_-Ugun5^FHmZ-8F42RRWRC(j~Cm)Usp2g`}AR=CvJZb&mop!E@NF z>zRIZXzj{+{(FyvozgejZ&(T#fWhRyF?z-S@#zHq=7{|(Jcg?l+cAG`%5g*~<{3u~ z(^;vtrfVw>3zMD4^mG==NfkSk@kaHmv(7#Us$fBHwCwB`zdAduczW4_cnO#9~B0_`6cPUeuE_TOUZfQBj)lBP*#`( zW*yc3Bvhas#sAoi_i#3OmcuEnm<|Rcy!~I2KK}P>G^}L=m+n|{-5vICzwp1UYiysW zKP&t8ga22ec>emHiEbc6B%`3u+h_K?^6uCCZTW8=l?SUc|9AM2-@mPr|G~sE36hMG zN)rQa@&Blg|NT?=tBx9a68|H}r*Y@yy}wuTCZ1sjC5jx)m8CuZ_fP!aFJ9UZ51q|Z zAy52YMdPnG`EM@+wVLxE3%V1PTCnu@x1dIQo!BAoN@NUB0sij$45deJ1IMgD9CkXQCHS)@g=Nrv0T0nX9&trGj4SZY%)sAVB1X$R>0T1B(H6etTZvA6IL;9Q z`9~~Zi`E=C%6DA&SLl`K9SG;2&?_e+L2NMT@cv2KG6kL6z*TLplkhl-!=CBk6w_`B z75Kj z-ay9+MT4v+JF%`_;C9$`kcO&H+o&DHzk&W?HqJx9?d<{^6blN0O6DB&$o(X#FeA^y7G)=y8Gtq|SJ1Jh50)TscFqMOqr8>Q*Cek!V2{-l-PFNT^i1#pA4mbC0n;5Fnvcm!A&TV+0U&J(;ye2k5E z$7f9#s!ApBL;KP9%D`6!)xZR9sh4-47wT7~HB4~*jL(z^c?r8mrJl-mUp@-@mO7|r zVw>AQMjzzXnNl5+im{9jv2cY;HKFn(F4>a6#KfydXP=;H99;fIYX*d2(6t=d_Sm$|H#C`ns$XNMqfNL;}0?0 zb1b30zNx$=qKS5$MrlJ$7WLLgkr5JUdhIv0;#`Ed{ezocj{xZ+txCP-9=Wj9rheXs zU21UTZ#R%l?Kmjal?RBCX&c#|M!ImvRaLrq_!3x2uyTa^Mu+XM@!SuvkT8|q!#}AD zBQ7$`W-&j2Pcglg)z@!l(J>BOj?wh zv`^`K$Sk9wid9%V?zO(O)z-RJcQmGR@GNw}7@GBHrJH_zvpwQ`u#6o@f3>Epmr6l0 zgG+qGopo7sa8=7VSa$JjweB%0^-z2 z{8)1~YX}Z*gkmdKbQzqBOnKa|`3qesn4efSP5@Ktw8g}EW z>+hje{T0r|Kdj__R>B|Y-enBrAK*{j@SvJ8win|jN3N4Moph8LGCF-Nv~$XV7UXd0 zeY21609kpFh8N+I%I(TK*e<+)rg8ns@HfhDq6gmbDgu1z9xcLVY7WVs{PVInD&o6fzlG2T+!}@43?vvof z(Raf!hMD*SAUWhXesT)KWsTwA!98$(%rZariM%ry6KkWn?@N0@R|CFlk6|OmFJG&s z3I=g-+#~J4W7c(vnwuj@z_m_12Ki7j&L34B&kP>_Hbv4Xj5sMsLSQo2JkHkgDp1#= zM){A|(;Y-H5cdl2G_;YThaQY~OD~BOhFuZ3X4JtoNS<61JSwZ3Zdf2;2w6NQ{ksOh z268wHhS&D2BgBpsO25&(qWH#5!+(Z`6EMf=f@+H9hM3#=Ybsq13p5(;*8l~2Isa7g znxHO*Ch{nm`ru*avrp)4`F5+IC7_S>7t0hiZr5(Ox8kk0W{h9Me6`)BFizqy`8_x2 zwHSpNRfp*0=t@qE@>8_cCJTBGD`!~5i0sq*^mO*DTla%K0}lbPo*4XN<7fK}FZ&s1 z10Om9gtG4E>9;Our&4Zl7D1w3$@ys4bl@f0U%vhzb*GoJES^}0aXjTg;Fk&#m0mGG zGKogjuZX37){7r>)G-M&8v2+Rk`>zVN{wS=6%xS5OOQL1M^8Qe%U!_Hqxj;Na3VeV^=;fb@c?K5uis!BLT|& zVDB%oFNqZfmtjlmyj4!}49O(73_r4|(xuF$fh*aw-{*qIwT|>0#SC7;@i*8|iJ1 z!hgzI^R*HdHab7)?XbF=!xKuLB%>&wq-Zj3^fM_7rJaLC{m7W(*wdBRKQ<)O8J#aB zy6g!e2yn6wF9KdZ6V41Llo^O5rK-4VYOPb*1;N}k3n%_T)%J>-)$bj8x(C^Arm#sz zhP1@q+{fT}{OgYzp{GBU0nifO>xP?YsWAX_uWyKdQ{9$_m?yih=jSogo-|{<>4>-$ zALYt{VG?Wfvslo z;{)~|KWOA_6a|2Xf_ukwveU0tcuFE41E`w)SPZOKi zLFTdCK{bdCL-YV3C_aM(t;Hxmt%|s3Tr6+0D0Cps|Ik?N7Rj>3DtkR%$ceyYZB5tc zE7$ssv}!$6esa419Eh^u3D3MV_YI#A)pg*I`BH~81>@j3uO^_N&cLP$*QM;O@)%9H3L^1FNRrMC0F*q3E zyeG0%If5OpEBiMdgqZ&5nfsT3qOmQN7+sxNU(cGG$8=aOuq#BTJ6ol-sRxL022{{@tNkxa2v&`?MJRM)W)Bos5Q z0sn8znUf<}L}OeUg3kQ;j3{^-O|ky9xbbHuS#4YIC$NYy@K|9W1<*p`39gDov@P+; z%!+uzCgAw`2LtUEc3n?CuQ z>HE-FzG`UomrK>dHMXpKL90Q`T8RFI?9>It205f^{8%Y;jDkVOiZnqAK7`=H&Z8 zdqxoVoH^`KznT>ijZTf7#K*1?H1@-S2jICv&5?v8a82%=Y3HU8q-g|_SxsXIv#DxB zTxs82AFqU~k}#_YN1%B)r4Xm}m;K}3hn*IYE;_^x_A9pCSPH%i%1#oXYp!VFBWw=b z`bRnx?`KjJ-6+^yic^${NIB41S(Io#Ct$p} zdwr*zM&?E1XOQ)JkEJ48^7zR6WxC9-zgge4e}=zz5N33|;9$3o<71owOmA$aa6+$D z8Iliap8H*0e_B_QJ$9xXBO*tz^ID*^4$(L@OZzw>J=ag-QdzU0wWhP5C75U5kxNFI zR&<{@^p#!84H9;wfckytr8fPIkxLz$DY(DjI%=Ls5HR;+fNYRON9LlPMLa4>cESyM z1DC;gd%l#`KUUs9QRKGfD4Pt>W(vFA-#kPEw`-_=tfO{W_hz8N0dv%69E6g6eG6nC z-_BJAeL!I0$5zS)1Uff=7!a@Bx#>2{GYP8H8=HNHy$kaF29>o4=9u7s_0xpQ-H}9k~BDLsz5!_laV2?cFS)AsD?&ybGGW2*Ka+4Mkml~bKRo9 zM2>3t-4vu1?{y%J^B1Ck4ve;kL(bP@?rM29f`qyvT8Gvo<(sB^ zG7@c6A3VH>-@u$OsISX66Tkf*e^v3{5b_uO>8=`3^ZA`*oPdig>`QGfp55GTfB>3v z?nOhRLb0D?G-7fgoqB%nSftIN6iz@YX-cE2?AG$V_)w$q@RzTf*1aV*~#Y0QYy0e0#l8!mz5CLD zo7wK`!YKN3DfTO?1T^sVj_bo#5Bx2fjur4ETc~+$-8{QXb!r%#N3}1V`DsVNjV`v~ z6wb|^URv$9Ph>w?j}Du*>abHjV8&nmC|>#HSXt{i(-kQD+6%7&ec!6CiUY`H-Tke? z76@_HdEjidJrI{Jb}M#r(gm4;%XN)q_Zn{F%`~sqE}>HKyT(2VdEmUDNnDUr$PX5%nCtOzIpK)r(pRyMc`~rOis451=RWJ|u6H zfEFrPfFlX(=%j*@Xu1Q8%e0#sYa*_@ts>a)#=7lVs~C&*0C5)N&13K9uiG=;^GiwG zKs6js6{>3*mN@E?o#2rHiJO-_kdo<@K)V5?iSQ$)A74t=o1 zcYS`+2U5S-0w<#%qwB+Mcp)#j?4nt`{3Oos=Bz=4P4QiyI#StZXWHdFVo)BAtREkF zaGy3tzKZ;mXS9tDi#{*-QOCIsj#R^gkEy6u}TdYVKJ6ealNVz2`4hwf~OjSxb7 zU;jQ7Sx%p*nAU{(weih+e0PqTQlYlBPByY(wF}uLT_k^<%`NBpCYD|$<@W@)@>=M^ zWpJDOO=EUhsY7t*uf_DaC3xjCCNK@mSNMMAKd^Eozzk$D_G*^W@U&QFaL>b zxbIF_S%MyB@k_?y`8@?4#2tYW#ro5O&SD*Sl}6m%5>D!~4sE|jBnPcB(EZ3ygdYga z8x3t+iN;@8gT4KR$H~nzaW9&VwpjWsSIN(T(Om7-5eGhmwg)N2d~n^)N{!@!iM=Gg z`B}j);>=^?HU}SDN3LG0c%N5$>qr=<_UihRlTkboU*?X;ca!cIa%zlE)6 z!Gq|LaX)2Ei%n@vaKz-{3arkOt!HtMlCbrEcHgnCd577yTN8SZXyb`veM&9ydz5#d zmA)F5snYv0D}z?a2&cj&YwBxB4*SCbeZ_B0;@UflYaVIDcH=(%KXiR%RMh?2u8M#n zA<|tELw5@(0!mAF4M=wmsWd|g3|*2V(%m36w3HG9(hSmF1L*nl?7h!g=j`|WV0^e84~uk@|BH@4nj6FB-~uj7u~_gRwf2+oS?Nj#qg6Ijg{ zfkfP%`8-+=K7R99`Wv16?-C!C2dMV|tR0FU-y0Q|!y+kW8|69+whnQt8D()9qnc)q zsegK-<$v$hnB{i;wB8jdBt~DlB=>6!v*a&5leD&p^uGYNXH-e2Un-z0kXkJjTp9L_ zr4j2Z2JWST>3ATGKY@K}wO}|MGd}k9tAV(cE(;49m8G!M?=%a%VGeOuk?5Yx?)h%b zsy`M4SZ(u`zTgc*SL4<|!yY5ajaMVKm zxlKE|r=JuVf=@v=yP3!-=hg)L%SC(IR^~C&fxOp`1E$-ctphuNb?B#$uvObK)gysq zjE-yw)+_EoUr{;c<6sS`iq|H*Iy!tjkgnN)?|j!%B?qGG>BdWZ{ned)flq7n_lCWT zM7&1jTEIRfCFfNK(0YTNnq86&N;g61Kt z`iM2?n3c-hO|6&^D;531Ja{>XMR``)ZV~gzWx6+-kTh8(t%|U+zrDiTb-EWX^L_iO z550ofyGL!nM78)R+xO5^9R^(JdqMX9W|MJc5mJ2Q<%)kARi5N z2pl9Vh2agU=rdNv^%z{49b#3v0u+Z6v_GWks*Vtr_x||Zg2IdKccZPJWfEf4r0)!V zNk?xZ`1_BqEnlseAHMljDBE(94|0K<{NQnLV~wW3)yQb^2Uw}M;nf5^SOGLrY>~+R z%>9U=zz~g-VnLs$uvZz!TYm0EG^5eoC`l5X%!uHa}?(VTHxFNN2mq7T{rF zer-w<=nOH}rV$|zpOy?oe^iOMHWL*fig)E8-(@*r_E)F;^$2TPz;!qN^Fm^5CMA)h z(SC@A*$dLXjUNYh2N?R~3eAH_y zfOo-m6}upD+V>$iLgfhVJAU^#Y@meD7E~q1{8D#Es#R{!+M5%-)S<&H0u7_%5e`x% zerPpAKe#&h+MwYTlj%dN{bH#AG;+W}I5<%*Y3zx~VX#Bc-FfzALR>x(D}kSvl2{js z&Qd?y$YDYvKl(Ksx!HJ~ANCAVECfGu2!XJx-o%_~#qE;Q!16NAso;e1NrfUQbmk** zQ)9RI;YyiMlgamPSA{t}N}D#jg6J7!O3vD8b7&gOVm z0>q(;u_w6yNYsD3aa9FEGjHNZfruG{(#51CrO^N>G@QithFFq@>@JZ)8;8utiYY4T zAxF+$x7}@N`9{smo6#E|6xW>8{*Hv-PMv5sh08vroHJ5wm}t!ElBs>mL0vH+VnD;q z_F339gm78e2xWj$`eSj$08UukqQu8>Sgj5G>5gcpX0TRoGFwl0f*Qkn09XW7rgDSt zhe4(erYqpV3`U)A!t%P!l~vMPEng3PwH!5O=Sj2p_0{+DLFrYLYXiXsLb7Ozg^4{q zSG(a^PMlzO7Xj7CUv}qhN?cr+vHR#o>1A1T&qrL9%Lc0T1}Is~GBTKD=`7iq55-Qm zE+b|!YCR(|U|CGhG4mK(Dn_xUuoBe6w*v?Hv}l-It|zHEb>wywRIIq9qKxBsG;Sk| z^!?ZfEHfNyS{pq2j|;qREjJxAXEbxsKZ;|{o7)@R2g&ECuTvb|w0ZN5i)_mLIg)i| zYRxzo%(|LLahWrhR_<-2|JK2WAu-sp4HeJtvHd0&i)J4%5cg4b+^!I&HYJhr#=~!V zKlx@*%gq(;i@#LjHec^)h-TjN+|6>j-d5MgzLxn47L8;!6gnp$t{5{asT#31)&eY7 z=or2s333!tm&0=`Cwyp7G=uR}fH>SfPH`PmED+i-E9baZAKzwHR%T&x{2==5Q-JD` zL%OF^SjTg!Rc!@nPPgA6f;B z19(^>4wOvZkYa zvJRh$UyS&b4d2NrGh?Df-*a}MR+6mS3HZ~iFA4L+Y%AXuZiFZ0i>_ei8y#)Ix!&C- zGx2&q4F1q54!oc{{=qVzr67@X=3Sb5fEIW57M#!ja4zF`}>y)0DncxA0Af7xS`MiS&;{WuqmTJXIzQ$~aoJRQyBA!v+v1*7_UwcO^EhDwK(v^&*MX&(cQ(A^R{V^w5S3X14v?JgEBo z)hoTo>d-*<+RR^N=jX+nh5Z*Px@F>rYJc9z`!S*cleY2ZsQtOjKJbwvmjy0%F;}nD zh=~jA4)|iSeM+?qB~WSLM3Ysvzdb#JyT^(0;=5vQI3kYFsp_rNBXTi0Q%nU(BAMkF zY`1gqK_cbXKAc@is?fu-Z;}r_O3*IROaA^gGae|ml5rFRk3D<(nM(?u^mGW~q6PhL*Z^k zzat!l*%@?K=jYew8w%q`W;y)=M$k73H}81Qb4oZ?l#t52m(G%?RS0Q2)bg-^H{r|~Ey&SLDPHSPNsX+5UQpLMQOuhR(#5-sU_8BDJ&F(VFNZ%&NgVxc z+=i;5b$={7cZKgm&_fi77F%KxeV%s2-_sTmp_YJt(MIiGS487An}?(5eE#kz;H_VM zdOSrgi8OhFmJgCJ>L8*LJD0l4nLh{m?Fmo+a=BNbch@q*76q{NIHL4>hqSBX!hIB-x>Yc;5TY0 z5`_Qgi{+?PaYB2FdOXS5>irLCwTH`ci;YcvT$J&RWzFmQ@j?os>Q(s8xibVK($j5} zGr@dTFYq&G0yAZne;3481T)CpV(BYp!naEVPQsCId5N6#s{7joC!dTVf{w@)?EF1SPTS;5c>KX zy2btt%<4F;!vjZWKSQnqrpK2o(+b=60f}6D_Cw$iv@K3fMJO4*yp6!lOH`AKo*EaE zi^cX1h|GX#u$^bgthpP^2j0N4GfB zeupVY9T0OO1hIPQEbY z9|x1lr$qh6<wBMN?% znlGd)DIBWugC$^SQZCx=04BSX+VW9F{f7foVeV&~&yrU+->nh3Ti+9NbzbMmRTc;0 z>60(C$f+?3>oQ$-KW_e@O_SU_}<2V?*_eOK-Rfal=Ja%YN!8Ng!D~b zfaF%Y3qEwCV6yL|X`2cbV>mS~!YXM(H#b9qiTRafGjiSNykgj44BFPj{X!qdn? zCiJF?gDs{<;ASylsai`>FRtgl{eH)0B)K^2QsTFB6eY+;6fmkG+Lk{(Up1z2kx{95 zNrankoDvl#^Eh6Wb-+UcxR23dAyHZ)T73@{35fbgFO0EsuzD}dMl-ewz_jCm z4r86^@Sjj-nGP| z7inCjsFWTA?DD1hbS+n01*X93-8pwCl#2c#ZNPRVSz*nFj>0Kj8fxoRNbKsfxPzGq z@c!(%fjE4=1Ls`UlIE0QdTu+j1oE>%7Xgzh6uw7*lxN-(O5Shr8Ij}Wj_K~ADEazh z%ppi|Z}_PVJLKa=Zi}V+$tSW$@!a}-ZR?n3V()WnJ(eQ;Bc7U6hiUVEswgm^<=Olw z_8t5?qZF3<>_C1lVqbr*hRIv~*0cWFRCu>-e=pE?KCtFE*X|d;M|{19Do?th)zgEcilXFYVUKQY|T zwDNr)Yqa~Gjw%(DD4BK1n(d)M6e@T&>O;oCM>tOT7>?_n*~>4Y^D^WX&TXLO2dCDV z5YD1+FR}4jL+&&8-gnGB;D4=c)REj_tw`~>K)7AuaWTP}& zye`q+16gxgki2noXVp`+N>lpVfO+1PE@vWb@oFIhs+0x^QEKnft?+aUt*mPgq(nWEu;{{ zf$ekMuHMwly&~t#YG}jZC%(B5c{h0#SL#5e@2t(a6*#Rw@!a{K$!!E7ITcsz!U%06 z+>aWe3Ax=if4UBXg|Nn~%Ixu6_C!b0iY`Gpw?IRl^CHprGZIZx^Ye6%wci)jA8#E7 zPI*w`u@vA_rKu7xX_~LkOncljr+K}1(QnDmHnuooFP#O0?6+lqw) zxcYYJ{13ZE{r1f3&ATMdehK$1OrB@3_p|_{L_k{uB;>5KN_mg#c0`xLx1KE95>~XL#lPrq>f_1 zkT&NZajXH8ZKGP68{*SOG`!#lTWN^{X#bm%>yDk2JioFcci*jr#IHM_tV_-;s?E7S z^EPk(`F&cVsR|OM`A#`eJoy1q4R`m})`P6=_WY3^ z#ye&klErfKJY1u4I0iAYPTza{B{nE3og+e*UAYayGslqm6QnN^-T|fJh++zwk;p#& zj$jL7E8LC`EOY_Mxf#yNQLv}1s;1XRr3ffiQL%_^&>MO?QsVz=qi!LzI;p!rUQ*gW z0#6P78opkTOsbP}5Oo6O*%_N+VG63c zIt$!AM#5f4z28}c+;M|+5(us0{)Is3jvw)(y~KI1KcnG`CnHW$7iR|f&U4cZWk;sD zQ`y&)C<6lQESqw^p*T%=wwHnC!R%odCEmPOW)FPtYP`dajWb_R=;+5rVCim3Y)le35sDMUvZJ3MiGw)8E9p`V=N63I;GK zXi}i~xsyMkN*lzOQl)TWDeY}Jd}y|0nH}Fx0OCVir#9Qrk+_Dhay?mV#l5wTHksUm zaZ*$yQ=AR51$XOEyp*r&`@~_SZ(I$fV7%v(t#ijpV|(ssd%OLb@Mc{jS4{n}r~K4E z=Vw53o3BGQY)-$M;@ofjR|v^|193MOHOxREZY@ttju>@5(bkvy4#l08ZS-8 zxh9^x-G`SR&JBe(lk4;8Dt_<3{3SxA8epA~C~o zNnT{#{ul5%gpIm)&;P-vdynTEm3#bI-+KA4-NSE&{vx_7OJq4fEUl2j(49ZD!xj%u zMC+~5nZsHxC(uCBEF{cVftyUGT<=68FN^CAY@qA9;?kuf=i-*X?Ap7b*44In&HaaX zOoZzO27XZZS}O`yB{?G5nn8j>QFg;H#yX?l!{+1h%{OeR@iy5?KU$AJSDVgyJ$}h@ zx|h|k6>VEnP2(~^XE~dCno|LwQ^dgfC1VUa%rwsX-EF^735Gk}TGnw?C6Dh!v@pdg z?2bkiOg7(tI%o7YC{3f|=S5}_u6xvVO=^4DmD8OUeP_LFMb}|ghrbcFe_xrwI-x8= z7+Pjk=F06enR|*|*=&=)R8r|lfl8^HnH2kV{*$i;-~KlOFp8hS5JBtn0dJe?`#_GB zB{j44Wz)|~T!sVRUyGe6$L;!OPfevMnvAE$*OG6d|d{U1Z-d3=~6Losoy(o0WJf_Mcaz%gSBf0`w9&cXj@-v(RyH;^L*`UdnAUE zAdkz&hqLOFm`Q41D}q-5>)LOW893M?=k^yif<)zAPl~mBIK2}?m}u_wG4>c?8Dg>i zv2o#~*JtCW(Emt!pgFzC+XEew=|u=ZUwe#bszRJk;xzYv3mW(4D{nE_6i}dk^_%-2O%%Xin#Da@Cq6DrgI~CFzDn{ap{{!Sn^nXC#?YC> zsW#N=DjEp>l;^^>V6HsyGVoEuoym}*uU&hwSMld4(2@Fp|EU#NH}pcv(MFFTo_z5* z3sA7wlt7$&q6!+q(IOngGsP5ehOkth+5x9GY}pm|sUo>~P$O6U>p$Fm+5XSLo;?d4 zUz{D@fH0Zd6Mpr+Kael<^1fV&*qI0yLRhZx$Tj@c+t4T8{vqXhNZV*F4dRB!Xymgt~Nq zHzJF8E`&(^VO?Ppdf#}aY4j5*cn*l3OxI-_QFb0a=`^d&X{JNYyk}Iq>|sA_JgcAN zEPsW%yW6Jxo!y7lZGa)OTbWNPzAs=`geRM=c4`mv0@S5txds4?V!=Y=#KQBZo^*7!`r|y?~iyglAS9tf^_`r8YJ~h8$lH9kR z7C$YMAx8AHy&1|w7}V!eaWyqPkFae$FlbZ{m)au_)2hX))h~ZI#;0!$;LsOKmbW z*^=EvZHob6{p~*k60}nr&s{%B4?HW}=WK3Xc?w3U`Q4*GPQ5(L0(>Hk2|TM4^OQ)W zP@GWJEx`Oa)FNdfl=u}ZLhDz`%LFz)`mt0y{t1D+4XE2wPo-9lA%v<089diF?yTsU zFV+JVt^5wz4NT2)!?1&=Jw_a4}0Vn)l% zK1~9En9#djYobfHb%+MXnxKDcnCx*pgYKo@IyTTj*pPqBXKlc~p}^$@e7EouMrnXv z4ZL%)oblw&*r`LJm&A=xe zzF(d4ee1k69uS~9RP5SCk-TKlHjguBU%v&7t?-+e&vMrEh9>*LUG!`@aJz4WU%e@= zg7xPp&ybESi9LvZ{h{j#lh^k z(|pd7SRZh)HaLY{Xw>uwL|aB^M5?!(Lm!B(l<{L8l&8KGhRyo#pm#JRDZUO^4U4UJ z5PKg_-fdv#3MN=@$o(Uw&=c{(d!J&}6Jm&Vqel2u;V<%kCo#fUepHle#0&kFNf(pb zl?6Es@y@Wbvu8P8vkM%os?2Bw0tC~#BZ;G?)ymvmlCAiU7=ea{TNPM``$di+*cvy; z-IBx`%@(6)m7P^hj{`%i8nq&Tlh5EhZIsm2dXjtJ!I}T>;%eBd`KpHS9x<@IeVU;` zgbB&$!w04vvZ;x(ENFStKJwLgnq0VzfWtJ$U!&`(6XBx%ya28U!!7sH8`zG05#;F- zyCtfPF%QPI`&%ug_}^TR5-=E(8Ks4wW8AB|zqn)#@umt{J_gn=yEOZLahEE^(~@K5 zpa0-Y_jO2!HXCKBo~mgb>v^f^_RvdeT@#2E&iTaHE;{l|!t#uMfa+j;9E0}ogKwGH z&9hPPuUE(bB*!tnIg#Iq`c>1{g`ZPuk&uDv9uBi5zucyggrIWdp#si(tJ(Q$|yR9k7ZpbRG0P~4H$wu zQ0b?fGs3Hajcd;Mo8#yZSDy2y^I?zC?%PX_*vg(o0_V3+nU6rz3zV5(+~eGigG;8% zhrA=rouXoFpnzScVTNI6T55K#Ztn#;FLCB;xf`=e`D4M&ldRiV;hGaHtT(c@G5$*Y z&xYV3=%%ky=T+@_8x%yevSN})TdnH&!@4Q}x^Vx4E`OxxzQ#4$_6xwhw?_AG#AxD2E}SkbX(Pr=g+(J%ormOg?FAg8Gncual& zbLiWqrm=*FI-&LOoQ}V@2hOYRIL1)D5m5YW)l7IqiMD7(s)=t&z#gzC4bhv0g=Cna zP25=nDwH?c=>Ln#`8U7A3%aw0f>2wxh3PEMIFd0~60_d5$ntS3IH)V_vtll3>EY6n za9iW1KsU#zKXN}*#Cv^rZcUI%B|wCSSwZ2Yc8x5qw>wb#wie6TdNjHDW5Ec0enl?F zP1Si20NP)ls*ifFwm%BunmdmBzYCT|uVsO?iI&|5kHao(5yx{tPs5lqnimnfWS41xmP_{hj+~v#QOgSugx-Op8YRD;rea`{6Wc zlh<2S4F$2GIHM79uI$bLfOLDujvab#?=-ZlOuXNHNgl%l3ga@Ln2rxW7g`ZK^P~uG zZfVJ{R0D$1i{YYtTGZXq?%SuROV`wNBAT_I;WrceFQ zLEBYDEFmreR+;*B<`vng;m3c>c#B>UDmDZ8)l=N~yz_xeVZ=%1t?E(x^z#w;r<@*3#mVaTl*!&g%~IesA=ldss#PJfk261c8|&H)J_E)R;f{7)2}eyY zxjzLiVrpJ=H_OWsv&O$KZ*EBTQewd`l`|1u9nxn%G&BDUB3;?^2~5bhxiZ4p zqSUGZs;0Lob%#j|(xtVdV=mdt+xS{uusb#$$CdPDK&`qC?pwfBT||5VpBORX8Pt4SbYlcS0! z_k5?{r#<0e%Ccz$1cLyCf<(BUm^D?AeBn#lXrF|RMm1;<(> z>*<#*F;>!|$hVs2db355Q7aikK(Xao-f_zu8thtZu6%5dd}d0;Gk$2qB=7 zL{V$x$;$u4>)wGu64Sp247D6rm{|!muVRHx;-`^#B3*YJxn6LAiuux?^Tz(SLY?nI zHARJE{xNf~2|bl7W~HDHcNCi}$*9h6%xR>+wykB0vll5>mq`yU%VWgpJIU8r0HqQK zmWgm1nrw7kREL`i$F$F&m|0~A^75$OHP;nIiuFEaN!8gGbF(=l-{HFeBnx|_pVQK# zFw(QfAa!-xYstp+5&7~}M0m4K!Z4-pYzs2>+K5*8CLbvQ0l+{S_E@tH2)~M){aqml zO2e7Vl3Emg-%A)~YdGJ)x?Ov;Bo~0|{#I6A@^pqTls|VUXn-Li=52nIKwagkwi;v; zrg~Uh#2a)(7BsPL<&0qeJ11D&L??j&InQ@mRDy8|N>Sx}iulZq2+E78O^ZI8A?swO zdGv%9j$F6p?j0V#~yT>{;ApSvE)B|J0G?J`!{4rBQcQAP_MB;g~DMvEeNF-9SzZGG2sx zqSyGT;4}z`o2VYCj8nkKbQ0|cY zr0nX)ivO3K4*c>zA)bAV^XH9;zu5g$PkXXGxJ-1eD=P6T-Qw~;KR_d@P|%`5xzfC1pL zv7tV4lDpYJKcdR66)8)lab@u5i~XooPuvRd_PccgJ4MMc&q;c6XO7zts9Y}H?N|O8 zk=}XUq*%b$X3T4y)A8Mc#pS2gm)#iEaD zVR7QyUy93oBGSo{S5=$q(tCyEvO>8R<_2~dC2c@APdN!hmp`zWL``N8^R?nWY1y7Q zH-RdG@!ZQbUaGqG<q`LqB#KwNldfMk`2R)vz?V73qrrV!1 z*1PpSxkg@iNkVr-e_c)Q8yfh+$je zaQfUf;8$M>q=SzE!T4k6j;v+_MXAq7iyP+|&6*8szF!&UNQAX)z}eSmm8glueQr{1WcZPPNnzSXp^0O0|fQs2(2WUC>2TfcfM z3UR6VrlWSGO=eB$ElZ35sAj3Y@;-FcBi5A}+TaMzTK7-9-ujAmWbQHY`$mf|?S@TI zvL8@cru|%DowD**`Ggbj)znR?+OOsYqPGHS%XPs{llbj&$+12zXe+Ru*Db;fOU0+zkmzF-T{@{W}3pQ_2R-Z+| zHBc&ASt7FZCHB+Qxc6DA1bqWn?45)_Ca^Z;4%xdSIl{;J@(%KN z9l^_5$H5)`S$HBvZ8DyYN!lMK08zMm>DU_3ag;6sV9qlnMxlhaNwPJo(thJks@AWr zaE;xQ4>i_HE{npOcirtwmT}a`hh?<;A;uBpsIQAhMwK;q`i;21z+Avmkzu&|lFzQE z47-J`-ekl*lcq%9ndH%-W-Qi0{WG6FTIX_jY}S27tWN0ITS$sJ{xXcJ{VqDVXW!}l zP>1%us)8;ys?Kb{Yaga=yTP%6>G8i|%HV%w9oAvqB=r*Nah7OrqqcR$;)qEDGV`XQ zLvmh3D1SI!5|bVdu7XhbuB&{%ALg9-az#1xu!nN6 z*AEg}k3)J|D)nQAcP1x4R$;kmQJ6*Kp(2Ifv%!mT3V~_L1=p=!`QVJjP>DE95>RnKYNdyhMayL!0AA%4j_O&(Y0;TyRbyKec7N#rqcc`jA@ytT z={kHJ7GnnO4vf|y0<;B3Yz!H0{rEr1B38X>e^YuV;Na5aIxbwA?7>y$NY<1f0Sx|a-)nqD+79eLO#Z)f2``d8yqWHL zwx3drj{g&Je5Lq2N5JZ8G|ytM_dIV|=5yKJT3u!99LW2VMcU`DL{Ye<5u&|oT%PpO z8#IlBP1N&OjT@%v65&sm-q=4ze2|Ke$9vo9tTzlk=NsX=n^G(!*Q8AR8U~*f8#Ed? zEMo}iG*bNRw_i;o+fTgnrZkMS91Kqp{!ubTYiO#WHmKvnM);hDr)7xyx%gbuvU%5$ zR?3!FW!xdZMR@|_lbzp*7^?niEH~7+x(GmIV1tRy{oE{L->qUaDqd$CXIb0|!r&Y$ zVSBS_iTx%(;gG;mHIM0Y$(D*S$ai;P(REIR0ruarcG6yfTpJ2el^*G2t49;t)+1Vx!15m(0Yi?i$&1nQq*CUr$b1E5V;5bHALKvt z&Ho>WHS73~h;zc#7aOjnLn@+$Z!;WWkzpC=C?710k9`ipcayFL=##ZVpJ^OhsUA_o#i@(>m+IAr(c8=}SB(ibva{=eX^n0*Wda zC>(v>EqMSLo0qwdAtq(@ZH=2X(T=I?2_02kQe< zpl#`F5p&}qNBdfaW=6G-UKkN?-{B_=*%ApVN3j}cs1HSN>*GuzSVZR!Eaoxwr`177 z4{P@!sZA*-k>(nY7d%2Ti~;sOUEva*MeVLt52!M_X~M$O%HWFIQ3_9Sl{( zPx20F_9W4lQf&GKvpztvv-4+v$K=AurG8%TcY8F{fJC&>gCVk%5@YfdUr&Us$T*-i z7Z2ont#Pf0C@bAd_$FC|>niyd_18*6@WpK~3WAO?Sz}AhQu%#!7QxAXiG#kDI$8%H zL}MQ9=~*U2)p2SLEaWUlmB7i=ipUsktQY(~GWq47%X*~h5Hj@_+JYk+b&weTGwXQ@ zoFPD5{s9Ze0c;9Ohh??jzh1c_+qD;BNXYIuniyB+B_l_RGFAv0E~IKjonN^{&wwy> zf#p+~00%?}NbU#uyn@O1s?~ui6D2JdX&n%uE3o6l;HPgX1}+DF~d3^xR4AJoVD=zGp1W<{Ya#L|2KbV`1%>ZZTc?82aQBS3NlfbH0A zD;qe$2&fHr9B^8_)Ef$~SBWmk@ z0o^t(A@%nhLynj4&lG2ir(r{&co+o3dV?)%T+R@s6Yfo#B(DfK_jcEUvLw2sT-r?$ z*avmqilHc`_}=Fl#EVy(pE(R%{h|EFxYMvhhp1!tXCowiXMZ!$HjwtHvftE!Sc{Q~ zJv$fV{#Z@He~09mltP=NJ=eOCT@v|U{hAxf6EU~PtDpUyrhXXxv`QNmwc8*l(Fb;T&kKStAf8 zT;lceuSLxJ(ObaY(F&=!Xl_X!KvlV`qm3xYmet6Ew*1JK`E{hO=f)Oc?KDgM1FuDx zXOF%e=aqQkO{~>Rde)%#LP6Pvj|Np2uGh^J(X{qhO}L63CYfRCz!QSL7S6g@_+wIOiNHU^`IxRt^5FbqS z^QIop*Bf(@ZLa7AgQ_2tF1)~T)sgvnlUC*Cx_d(4zl;H$G}^na@FVjTs=6A*VfeNh z{xTI$CLxr<;SfOO|Cw?Ab}mm3|HRrS*>3t!@ve*@Vf4}9F>pVE`h`<_KBq0o0bzak4=dRPjWSE+cH{?i3OOj4H0+!tFZRzwcN zTdOKr60^dA>DTAMpw_u_Zkzk%U8S+ZFAyG@@%l>5DtFs%KCh2g2w@vP9B$v* zKVb5F1i>x_t4p7qmE;y!Qt~vE>oMA6LTsqsqRfIz*n~$^Hu?n+7qFFCt+kUhzKZos zH9nr2l&v1@VAD(h3pFC7;D^?Zu!3sn2J=m%Hd--xMyep7S5P~X!PrB^A@E$vTJN*# zw{XYSMzGQ21JkeS&wCacYL54TYrWA7x$BJ^^<~(E=#N}PIAb7o{YF@w?oe=6kquYQ`c#4g0-BVauLlyIwFZsAYr{ev|T8?BN{8&Ok7%v7D zo+zXjGyHi8)UyC=q2v6|7AuqhnySe5LWM0zky}r^5w?3DHl&XPl5RZP3w`!w1;BUH z$H9*)eM-rS#xB!HlRrmzu61>L-pg2Xuf4FHJ;+!I*PS_4oyHBj1yl*DC`*Q!ZYu^s z35Ft1eu^%z*vGFOwuZ)YB$L$*Qfi3m{o^?vni*O|@4F+<&ttC(dF#BPV0q zIr?2!yonn|BfUp5N<&j87kB}@I5YYeJq0FtW%L;`V>5Ab{ZQe}q+*?>m=DV|$l@&64f zrWO7J6~xZ~sEENRyTN}6)CtVSkFrxOEv{)d?pR)SKp>(D-x%pD>)6sF)jcMP6JET~ zM?AZWk&x6i=bHsn8y)SEFd?$2jUJmV(9h+q9$y3fZ$5h^AFLgRh)+#-MpbQp4;53p z{pkqRrHavB<@W?}oT>z|FYqyr&KfD##dX=e9d`U&`~+#1Dwj?LI&`HdLQ{DsjU%Ex$I;;()w~1J^8z-&V!oLJ~8b=%=jW+g3v) zJS0M$|Cq&(la#((4zZ6<1KphB9uTeGt_u6_R`;{d7IR=dLz__ZPNf0~ilGw8)(Q}F zO$jn)duRd2`PmwxEg?UVFYXj^{KrR*#H1Orbgbt(w{pej3*9LZw6#Aq-)ZMpEm#*> zWJxBwNh$(r6FVU@@BR6`egTiDwkj^0y2}qqOq1oi-GRd&w<$cpc+Ld zYVm2gn4&`sAUMOZpLPKTOd|HXmsuHVKf*O;VAxu1RG^^M#I^ZOOT3Pn*f$h|0?y6@%V)ZeYu`MdjGWi%xjI;O~YE4fWU zIKPITYDWn?vtY29GVnxkrrecg!mrotWevlMd3F+;YE(?5AmaO2FBedafzc7g$J1}x z#o_YBR@o;>i9+!>rbi2L_SP5RUyxssu4NqG4=KU7QYLvy`>5Go zG4Lz@w+8H*{3XW0FT!1^Pz{G~$~&1Vt&b>%My$RJ#P z>|%hOr<@SB9mV;nM-(}&#y&<|BNnqcsChNdf&HoofAjg@XICjhOBZ%lJ26XQH8}(& z3Qh+FfbV$@L%rE{c(s{pap6Rg)q3UgSD!yMM-&)H%kiP5ilC^m49kg!YF}&mO?Kvo z_#4{evhQ%MfqO*utAOtGTEGBc%~Gqs03l0J2WfhCMYj(=x1UOwfE-+ zQi&rjav)RDVwv}fUPvcUSf^E5M^;j`SK+Fi>dzSx&P5Pf#{i9Ll3~YkT9Z`B)LvO{ z|L4-}rQ7XZZv5=>i~2<;ivPy3_=9NDWuV02C114`BwOa5N8Y9NtX){)`+!nf|auVml`qP}1br#=KJ!+hpEPbH~wmWxM({@SY^%wfaU7v?sVwR8C$1$(X< zpWvR9W`P2*uY#?W#5F2sKWGH9(TN*uK6p?moRkV(me%{VbK8Nw#e@qd?bI)r3j#J~ zwGkX478@b#Othb1EZcs=bUAl$i(O) zYTw%a^%gr9lhYg$xi#oO)wn0!^pM=LsICoJ5bAW#WRtqD3EjfSfJcZ5 zD!SO|7y>YKinhwc4d_erCyoyj<(H8~8_VCDZ&Zp-%qsLejH)&Vb@`2SCMQ=P0^#|# zb5WRh$K?1E&oEno4vO@u6($R{>qV9ueGB-v&ARQa>a`?|`<`(%gor+2yet2nb*krp zs(%12q3JLCQgi*fJxS3^b=VgITrewZXbwo_4^7~z`|?#qoTw~M0@^k?&|DCR?d(Qk z_UMMgs7J1fOB&n(ITsl^7a5NA_1`S;*Y4hI*CTsg*I;bJ<~L;Q{5;Jr1e6=f=~AEb z93xL_GWZ=W-(i2O0h<+Qn@ zVuEh66>MG{QcG=dJ^T)EA2K+ zmluRZV@K#Ezil=dG(@zEur3q)jeT8?XcsarR*Q2M7;nvc?ML3zfbMaR!Zw%%R~l!6 ztQ}`kNjAjxvD$T?>~E9)XYuLZ;R^0d-c31ASIy}{>e>I1EH$B1gvgg^x$6fqdjzP+ z#fkCsc7w6iaboufy!wp1#8?sVpqRHcHKL3Ekq~d z+PwO}z19?^l$Ag-gb7u-?sb6I;55nka!`4>!+6#bM)3_+1$x#wrTWwZ_ySEEyWfUPV`w)#IZQxWS!o+s1 zM2BP^qv4m*Y4=}R2!H$LR=*;8lWI)8kb=Xr+&@EUbjaMPmjZhZvfvLIA=*BUiK}xA z#b$V2RyqTn9le8G0FL(|7Pf$8_p@I5kwY%+^kVg9zJ-ekR9+~n3em+%-#>U4k9&>) zKP{vhGLi!J?ic{lwG|Hw%)dj9;t`Y`Gn{3$TkbqF4r+Qz!8*oWjG|=5vo{R1KfWfbmY%Cq~ zkhv-aa0(g}Z*TfxSN*Y8H4nJ%de@dT|^k@|Rh{+UAB_lw8{%JdF)hRuA-E56h1sI#3Ef%eI5!^S6;6>2kk+lH> z2ovvhe_ZycG_cDIxQU~I^&p}saJzx_5w}V77+*!RctfD$YRl4iF4LoM_#RiER(kNt z+y6ow{M!=r@K|`rz(*O6pcC_GLEuXk~@S zR%&-Vew&wJT5#jNY!0HzGLG}j4wnouzL1OPG+%5?yha8vR=Yo2PAtn~`1oZqyl?nG zV3GS@uWh#SOf{&9_s?s?vN2;wKY8V~&k6*&$XMbR|yK{l-#lkyb8+0Db@uD2MDS3f<%EcDDpRTugnI+fFA7?Cz%DuE!(M#?hQT| zT*d|{oGaivT%V9D;_4{0B;7W_#?D22R|O*z`z8O!?B%}=WD1EO5~HuPtSax$$H1RZ zesH($u?v9al570{yy_n{z#&42_r^AGy^3Y6H=X(^!@wC+H`oGOn?M`))3dNNjY-?{ z7O^@N%4PWv+BUOgT{bh7Vci@nebUWoDMCp9VE^=8ej!SVM!@+x+pUoJwzf4(yp++)k2rtvqri9q`AjGLUCC0y+GR7J>!B>a{`@g=>e_Ny`K7V^qlT*t2s=xZd8Fa*}Qcj*>*!qi)EMori zs$Bo)tNK5iJmUoJe{Q~svv|kuzxt;t1i>X#d(v1uG4!vE`oBI>7ZWk`oK0^Y!$AH2 z^Izw&Ao1{|fIYm&brQz+7hjL53o^K1w%j0vssHz+^`9@Y=_wWBJ6HPWM*m;W_Rn}i zRR`JHS{NM4FpJISe|4u$UZ8<3Jbv4uE&A_E;y*swo#PESVzP3o^a?`u|90>Gw^vvM z`e}bIvIf{M$=$ztgD1F_t+V;Jf%QKB?XwX7waGWcm9J#gs!6$^@)wT?#<5&*ySner zvD&#bW=GHd)f?P;_S>gg!h4TpX%2w|8-*ZZ!hM8DkS_~jo$8s zs{%{)U%kO`@g_`YX+8plj}@v z>%aJV-1CFMwl=@E*yi|eDh))4r7DVSjqZ@+_m4aJpGV`r?c~3|tu3GvL!8!Im(umi zfAt16$PpX*0yb%~{#VPv=g)@5|9`xpV=&wS=DLCRY! z(&pCTO!6-aef}&>jmPdbi{cfl1hl#MK4So_d#g-i-K#NQQiB+HjQaf~I1!vO<#T@v zCno37ijwrST4Q=9d7p=#a!hEJ(sd*FQZ)drTeAUIcH9R#&c=&oY}cY}30>#fnK|=O zs9O;sa+~Drwjo#Ud3;y})G7Iq_()jZeWG4^@@+1g`UyG#@}=<&69=`wFK=ZccW|qA z;=ARAn%m@RJ3R{QZt%L~(zQ0o8M5sXrdsoPpig zo($ez4W6{zYmca%+c6(Q0P6LO+O`sLSL9 zb=}Rpf~rNo&l6g9T5l>OyYDSbflN#RA*>mj0BCLX4_^ia&hkL~aH#}hqci!QUy*bu z_bRLV&Ej-*;5{mF4m${jHZe`q>9(RxBKMdmF$1cCql0#+ZBdIinLnM+nC*NhA^_h=m5c$MsFxxNt`n$91=hm)|Vv>&Mzz4387r20MuG|dy3YJ|Q zX9?AB!0X7l$rZ+Yoz~@o9lAm*!P>65-ee^m@y4!dCEaBOJ5ZYvNg>#3t8ReN%lgys zLhn>*6Al@?15`(nr@{FB{lkFD`$#j9*38cg)MX} zrdsqf+1q2;6S8}bLF-N_jlU1yr;fV3?Nl^N8X~Eh7mrToTN!)cUOd@Opu?Xd+;urj zXZ_Uoj+h@{9IkD3Ro3sAF?wmQ+ELmf&1%V?E=3HczMjC4*f}aFb$*!0 zc#3=b8~Y~35rnm+U)HCB9f9qud?LW1<1aO!U8f1%9tlkhN~^3^A{3KJTVTlrZ;D_$ z4ryXn&-Ppyt2iaAHG<9J z8s5WV4&{TOK&pU}N-8GD1XWI-vOpvLB_bO()|%UPovU^Og6<>oZ%x3uK_MBvR|I?O zqb_$*9GNo~nz1I#od4$v6$UE2xXhc@O?Rii8Gs75g8z3uR5q5tJ5wjPMIw}qlf~f9oWk#GyjJUKtBV#)| zF@qgCaf|B?qxFrlMz}46}60ed0I?0%c&gHbU68YztOOPox3R+^^pF<=q@grN8 zy0r&vbefam2%5XC`5s-JWRhB|CSCyo^ch868B(QiGUknE>mil-bV)J7An#@5nQg89 zw@Veb6O}RY+XM26Bmd1tW*6y|lPy+cK~RJq^9_0QuzksZ)V|34s!P!tmwd^md_w6# z{m;`2o!c$9{17K$pnlDz3RYj`(#-60{b2UO*FpxXD#xcfZ;-p{PNGo2?>8iAUJ8MD zE+BWwx4{_Z=)x8ILij#wwI=(^{muxjmA9PV2awrAW4L3IM4JF=%N}f9je6j+V2WgY z=jmaTU>Uvh2NgAhJ<;ZeU`IujqGhKd_}=YxBMl7RYK}bFnIiOhz1;;kUdf1Ekl!x3 zDFPI>n|vd}-AQ|kmgZi;r08)z`C*9fuM9t%IQy!BLnflt5}`M9QSbALp)%RlHTUx- zVo?=%!vatdqg!ifCS1>jYNYxU9PG-^vR5>++0S48=k9sv4#)?{2DBd}ssg^^$>KAz z?rSr&%=`vr5vgm_f&}zqF1I7DehWPMZ?~=FH$@jhw`;er1`pIvL8t)kP)y=wd(E;p zH}w8V&_yK~t9N>TGMrv<-B@DA-n?se3@F>j2;XT0kZ%kj^RIqAl=;o#r6J`0-j=|{3S3N#ARe~YJJ_(t{eb_PYj{k?E zlah|i-h05+X*`W0L`=>OafM#MZZF*|c(QVCFl~VYvt!{|d*@g|xc||-(t0ID^ zZnYCO_?6K^B|%0J!!!b>(y+LHu}0e-cayqoJVp-cSWt_UtByl^7Lgl28#iWaL!?n64cBi)F$q*m&+3i@hk zUx3NK;g|F6if)M5Qsd|XM5pCbgO@-pHo1MoR8z6l5iIqb09+iyOlEOcPsX&fUU{kF zJN$a3qpL~Z8!^R_)4v?j-F;W3pOwHx@Cz|(Pq{VESZ+V-8JGskxw$C3RTn8$A+Zd> z+A@~RsPTAikVS&<53~VRodbkaoi%O$1%FB z1bN>DoxCLD-t)qH1}jl#0dK`?~(BYLB_5|!Nndm-WK6?oa!2$&Bxcq+M zEb9a&bPi%}LZH?iJ)L6JIvaV#&!F`a>;P>5cpO6nb{8|O!9DM+lgLf}YzQw=-Wz!G zZFLRk+f+00|B!WedS5HdCTWt8I)7jCFp+9Er;iC?2xAorh8V1cFA@cG#aAj-uqHwh zLhk*@BiR=vzmPit{+8uf1An|#0`L8+KJlLd^!=Nf}i>{AN-o~(x z2%jhB-G?uQUFqP|WNrw%nZAfQw<|pNKb3Yw^C?*n&5p44Pb}J3IvcFC;BUP$3O)B# zOk|Z=3vomfh>9?pO}?y7)}YHm$r-KD1l5OA$^3upinR31R_dD*o!j!!(H+ndXmHfN z<8r}jHEbr%h1V%hVtm(Xa(uy;TpK2PVjc{W|B$@dTN@9i9mW7TAIaQk9WkMG5`23ed{TOQ zm3#}WcMy7Zf+{0V~-XLz5S<+IbK|)h!${5le!Q85hDY%l0P@{ zDU%!GPOZmN(Dn+YNp`k28J_#IWLYcA{qvS3&4xg^Ny3$kCE}UfGuHdqHO8(V8JuhA zELob}`&SXxYBrBpoThqnRO%M-pLa98`upN^FMbD zpZ&xJ&&M>}5qXI+?H3V5j-csO`il>7VBZC$2dzF8GLy23xxnkpD_-*$zUOgC$^lKb zd%2L#DnjvF0U>1VlN=Q#7Kd)h&3Z4y1{#AV9VKki$vQ}wHN7qXzaq4)Lb+nYCaiWp z&y*;NiJ;iv;4%w$5gV>=FIXnk9y?ZkZmlkAmB&k(;uSchdtsRVg$~p?!Dk zU%^Y^+c=pCz@hP^HbK}=!E^r`qEmpKb2?+uc+oH@6fY=|Vn1SOjlAl9$7O-#_K;;u zp@3De?M?t^;0U*wG8viKJb?4mZr zlHOfG^qZZQoI!&_XKsbrE?Z7D<8~I&-}9~~%d_>DQdE2^@uGmU`n4%FP zN;)6M(Ma~qNgrpeLGYrVMHF)LNNInuUa-IUjVe_2C!hF;Q?&A<1jwI@Zd&hzKPJM> zO!*qlP9go`)jlN+72*(eU>|o!CF6cp6@0HANaqU$Fc7W2f>W5=t@_lW#m%Y(J(fsS zznQthg4J1wDAxO=xbrNxi>&hK49}ME>)XGSl~F{etml0@@!BtdSS)e`o)J zCffgUfIHox{XEF>leVIM%q0T8!!vhM>AF>!g8)=}`l+$YSQ?{bttq%-q)qASOH~0* zzw+<^4$24$&iN<-*-=dAuX#%-%CK`d!l&8>&Ql$J`YRe}-^hQNk)=L^pb#rNa|J|^ zpZpY8LO7Oj5>bz4k|SKW)QVD1K!c;&!gk6sKs-V4+uc|39%e4iwNF=NIAn%)5&moA z_AfiEa}Gec=XRQb@@g_4RBE>E7VkqzprlYb6e6@Ap9;$;>+*+!K20Qq^qooJTCGEA z=U6M`GJ&ymsbiNYnY`2LDl$Y8;f@WtYUVBlq?%vXtH7W&5Bo`zT(nhm1HUZj;R-70 zDI%GhNVRAXTk z{Gi=H42!X7_qjsQRyK=+T}(?FbLM*52ZkiV>? zy(w{!v;641_{;6$0#P}v$xl~30#9BZNuWsDp-rTdK?oVut#9ej4^s9=Ddb z?`+e&v&wS;t^$>MP;JZ6@x)zv2*bLG7sO_{M}Ds`O#R3>;VGZs%QLGBb~yW!jyC%f zz<{cA?Ylw3cL9YBjjW~%ZtX3ruf!%zcvdOISM=7DtI%bM zfWbz1e4|?7jJ%b7MP9WxJ3E1J8IyzKPx7l}7(%P$#})v;KTq|xjE(brK?Pgz{I_EQ z1&JB^+qfktRGEGt#wf$OsenI}Tml|);wM1sUHs61;ah{hwT_e!7^Q3P34Gg<^aW&{#K}J4kIJv4>nwanV^m`Zn<%{5EF5Uj37Gl;u8pCxxwD3D{VBCc?!Xu6rhg{zK^Wc;aI8T4d z8Z%}`-NeaDZqgur+kWZdn8l{PCH3)?CdpoZ?vEGRHXD`BJg6q&P3HdkSe4`>+MqOuD5Wd|ye)SAOQluVq0P>QmiG|*-@({T2wocn6Ib?zf=qWb? zOc#LdbIybUAXP80h5SsuNe*LqS}Mymc9r*(%`lLy zPs?yh(&1spOff*t`ZT8BgkcvaiJxI5c^r7yyyC^*&MMwbZeb;GjjGqQCp~pjb!!=Y z7XCKcU7&FGsY?!LtKJX9!*7+}*<$X{+C&q=4n|6i_Z0j@92jX4s0FQz%-BlAqDz(O zWBP?1`>^Eg7gp6HOW`p@6^AZ+#D#a`E3Mvv!9E_sZQ`V=fokaumojp4RVSI#FCmC8 zL0N_`5E^g^Qsilrt-?N>2~Q5QJ)t>_deUAkOJA+$ToJT;e7&Y^+eCw=nO8-7^bZb5f+xS5we;|6fpy<1QGAT4m^i-Ik8wgEa`|? z2I65NMG;aAv@GUdmFTesJ;DaWSc_yjm|O_)xJuzZ{d?ofo+XB)T(m7_BtUxD{l$aT z_?i9F=?IyEg6y?sRcnPS2ce?tb>Pz5V@Ob0n?f6c$=B{r@jJ1Z2aPoT<0n`P_AEr- zdFWD?TBmZ3TWV%6%qlH68Cldl)07^<9r{WmAthzl3mg4t%JRU!sJJ}UHrIKPq3(?o z!kjrAHUt-vk!jTo9t%nr>Ee6b5hY|AGf<~-MqXjAbyjUfMeAmUoIZuJ z#hpm;bso*5OZn&eL>ojC4ORQ&jGWy5WP}XP6N|bf(!H+L!owjUCU5EZ7g$gh7iGPj z6JXh$5y&~^GWli-&4;U-zdP9#4`VwV-fV>9!;Z>A)cq>vsAW}wT@ZVzcx+tl%na(t zxe!Dk!Xu!jz0%D=$Ykc#zb#GYEgdva#hEDmzzg-ac_%S(l9Ofd1giuM?cytVYj3;N zi4z$=p@Aw2Ht)$S6{D|q=KxcWQ7pePtLaa;RC(~ZgbpUL5B=t{%6TNWtm`Pj%sVK& z#9^rT>YbdX%s5|rqZ&y}p_j_OL>VrIWgN_LAd&|+i7j*FU}}*ptKY(3oL_9_&}MFJ z^;j{?xcI(c0q$GntX!Tb6avxAu|~4?2#|+-ZJyZ5@l|(wvY96T^yGeJD{|mp;Qvo z{k}hS^7K&QV;LJlq))UY(EwLGT8we&whYnpbSTK5UNhUG;1eqyzzgCOJUig-wj=2c z{?h%(Y(D#XHjEQ8x?5urj<^Y$%oz7{=!2>Dic27iL8+E8ZllVzr?3dueXPm02VxkG zn$)E>$)?ZZHsM?so3g$!o>IV*NBvCk11(lDl6itb3gHYW}|XlX6DYEzg7iJ^BXYZNWLb zqtfdy>&58zI&uWd`bwe?pH#ES5|(v;Yg*O0Sj7eJOZfMA~!#&-4;J0$%5 zD(Mn7HhDlY*K>=tHMYwyQCa>+0dAca`O2Nqdyofn=iIK`b;Brh4V4Du>5cLu^Gv&t zb1Hfv11EBN7#O=jjxD_&$ZP!%{9^?+Qo6t@%8L@g@_%ZSC&@5V| zIqEu|)Cd}&$|CWuFf(Eq%GhH1Z(jU*HnMO$u4_DWNzEg3Z|x0-%$W~2ChnjLphEsa zvqVa(dkvk&&nmQ%_{_?fCu_C1yhwpDxt!Xa)F%e}tyqO;Wt}A>gw+vslAV-r{7F`! z3iP>8b`gc{$r=0g#E>DBGqdRgP}-;H{NgMu#D?oZU|(3aSx<5&6&uW^u}Dgz;oP0 zSqL0sOFQ^O;`6d5ezJ<3;?V)=*+&x@p-OKbN)m_oMq%E!Z6 za)~W^l$z*SDn;A$a;iso>tgu)W)t+Hli!_&jn>p!rR;=cA<_pfjK>MzeB68Nw8^m3 z=lJ3GE8)cHz6SyJXP)Y>v_#Xhsw3n5x%czh0VFpl^I1{}>Ni#u!|EtWkFti*lMU%Tq=qV+k4N z_n00I?6B~lBruXaD@QU6`OkZN)sH<^=3+SCt1b>}!{_Z6LJ2fww6Wt~y;Q;`RvGIB zaK}}mognsRN%kA)1!Eh(Sv@Wr>O$hgBQlA7?%ww^asCd$6J%SFLmfm;vRcQa*$heQ z2lh?h0}ky)rW0_oAA3edui)E@Im@&n^DVLu3jcl&^Owa+)R!rINqDg5^;qU-z5U}z zi~I8tFG+$v9I+nKMn2t&J>x!6r?v=aTbogv2Aq|g6jKxOv9o=%#mJ^Wg~|goT+SZ) zo);}9lt&rQofc^azhmosvwOp@`q|u9sErTL%u+)A&vnC&5*$|j7S804S`S32o1Dyo z_^yd;VO9y8nN~g(ZBI4GSe16^3haX#fNpN{2Vrl7(Iie`z2Nd;(G`YXRJ&);f;cS; zS#|r~e*6j2)q|zC#IWv-xd6#1!iTE<3jzZ{j}yK!G-xhJb{Z`tX|ODMFMg7BQcu)k z?aP;apfaBUtr;P5GGOk2Ahliimb?yF<87{W#ku)Yf?hrl8T`!$pHCAy_Z0dG7qaZs z2tDt$v>$sByxyCj!6T2tv@-CFSGxgU#XwXX4WCPp?!MPL6W^o;x*tfYoNF~ONT4o_h~j-ntfun6-rXX=^%;pG#$^jYPH0MtNHu*SP5o!+W>|IV81a|NkSD& zHK00Gd`RqAewlH=OB)?^PS&Bi|0wd0!Si7!gy`tbn?`%!)Tq;Yvaqu8EG;Jaw?nsU zGoBME7&0;eHf``gVAJN7*GJ|We&X@X#<$M$^Pc$LB#n&2W0D!eJQ6tvj474AK&$l% z5pX>oj@FW&kJh`kT)TA+4)pVDJZVmD&VBuy47$+$r!}=Pdz{H7?I@eC^WvsCJYX1q zTf_DX#fjN|1YsB)jKs|&%YPF{nO%)Er-zEg8d(##9<>t|P@S!nYp4y>O_3^QTrTpj$vL=+=KNix?I zR}5w^?Z1`R2{=(L?EBXw+1r#y#4nk~q#ovnaa8u(_0=y|`#s!iC-L82i%^jH^f9k_ zqbJ}ha^KdKtW=f&Yuosv+K;RgLVkwJ*eGMC{em=0Lf0L-MA$xM1kr2p@Cs2U_ri%d ztU2+1JDa8!H;@~tMVHz6*XFQ!vBcHXV@Fy<7KkVsjbl&uE~|m#U~&l*EB{>vIr#$3 z>us+wx?dEr1t>wsE5KUxf1Iw`Ag29*AK4=k8d|_zyx|T-7?Por(uhL+f|9HR_Da_> z(zpgazB+p<mOy1w=eO`BCC*v(?yTy&IrGaJ*_cxh5#{=LUYW`8E)z=VV;YA-e0rd}|1$hm zfz*`#A1r`=C#Ox#`qW~2m+Rk8J^`x}3$!tGDKo-plYr0eOXzNt*~wu#GHpuJskNX5 zn$J3qk*Xdt(eaqL(v&2@FTds*s2MDTIF{lXY)(p*+)-}c&9xCk&45}=%zbGhnyO(X zhup|AHDbT$u-d57bTY@cl$5CfR&4^<#0Dkz{ZmEeH|bFyp}niwyeJHQ`NQd zRl)l71nL+gw#il5Jwk5M4zw{{d@D**CavupuGl&e{v~J@1JA_3*uW5w0lM+X=Hv4eOdWqL_uIM4E4F$%*1E! zz(`BiVt%Zv*1Ye;%1Wn>3e_)pPFSv3!cY@ig4@tDtxD&Mf?dpEQ=@rwd|aj)+C`lE zW^4)V=Zv!^?N^+#r+Vv{ZVIbk;$?PUI)_iI=f?~YlRC%r1Zgmk6{5E$=sBn_R+e>D zw;w*vE>7mT$8YRQSHeZKz-6Q0V?Ho+rs(_}w$^i3!66%4_p^aRF~@^S)25gEDhyt4 z()BbW$!Dik88-)Yc##xvinuDR!)X*594nTHOQ^qbP#ryV;i~H{<-{_+o{-?mUFPf} z-JTS?txzP6E7wyqQ6r*u&ch@Qzq@+az{$cdTdhYK0O{I$RAnO5ohTcrqc>*qdp9+b6N)WE?t><#YS?r z?B`w?N3R>D=m^(rQ8MoP_u602SI6exOP)whH5qPSByp$ESx3sVC&pV_j=I&Nps%ow z>oDJnYnF+WV~+3J1{RbV6X(&@7@O+!M{dxvbQMLoX|9IJ8TuvU3*Is6SC_Ynu*?E| zQYY=LzF9z!>#n)oK@jsI>!tBTK85CzO!jk-96T`XU6*jRS42) ztZ}>lY$5+cM#mam)!HwI?+za>NqD9c#jg((9C03_ zQQ7dzvJi?c%9xg-tNv!TbGB2b;7RkW3n^XCVYkcW*d;q3PTg?M)U)J)wB3t#x zHQwKRD^z;ZTYATR^~>P;^X`puL=cenUEXgoT-O$lAO4QbvorRB$H2K_VW+fcH+-rT zM$@=$r%__fdo{`7e?lOLZT3>{3i7A0hky`vD+%tHRr43p=qH7lGOu+2XF5ONOh*to zf=?26c5xWWX)$ur6q!V|gG3Z`C@a91|F-E=s~dScE~q|OFs_AQieq!qrDhh+!#Fre z3?+y?BbldC5u=_R3LLd(iM}!X?y`MRloB~D*WiHJNY^MjXUSL?_#!@jN>mH~(O01% zSL_3gYk`Kf3Zi_`Z^YMpNqJ_WxKWVtMX+p3x?Ct{ z%v=(}mAg5NXO(co(>E}l3v?OQ~CT6^WlR|zKrl`!m^g)2sS->Y=wDQj=pj~HHiuOZW}tv z=xHzI8lD1tUdpmOS;C!wfv5R$(WZy_Mz7Vz>7V6XcW2bw^@l5TJ}}|;ex3dfNoXW` z1Vf^|M^+-X)jX5FK#<^`aJ&B<3yk+w+51(5brgA!XVVSAfyu>Xo5Tz=mSE&gysx(f zmMSOVM0CXU<_>>`u0qFy4yiHepZxF0E2G(>eL}aF&p#=^de45%2^Np z<|3k{4HZ*Kxp*f}a7QpsG{QrXYeChFi&G!v^|)N?q1CXLSVNi3XLv*Ea9(t^fw60T zV?uR%2P9dkvvgTxidvDE)~dUydDXL8!-xNT<&5!D0DI~M0bHFL`}3t47Uxs0OT2eG zHD4Vh{Wv+z4j2PQDd|Wp^e?&uk+Ak*`;xp*nrNTt#WpvcqxBYp-y@(APr*YP z7gmJ6>fn!q?Zm10_Nq4gqR$yE%=lt_WQ2dXsquO{3%@oh>(=|D!(dEURibFteuJJP z1h2ZK2;#c7D5IXSs<7e|ARpZFJBZ`7RBxyB++R?z!-wdS@hz?9@CR|yu?o9>tFFjj zsN2nMW>ol6xjpwBy7ZHNzib^BHcPLprW@6du(-g^w$0yfP9f|)L31o z@YH{uf4SVV28=Pix5H1`A?{i|lOwB4C;SzKEp|W2_es*3V}q`IeX1AkVjgmt*liMK zIrp0+#?BZcxwoZp;hi)Spo^*UzJWB)c1<0$Vl8ZOnLv00zSft5D?oKL+YXknz z&z@%yrl1{JCh{ELQTDDZSk2A8>I%8mjMv_DFpiHcnZT8Vt`kQ0Uas||TN`v1mtZ?& zc^XZ@O1#`j4@nsm$_%^)@uqIH^ygmJiO%F|L9JL#-vNah>hu9q#VUQZ4u$Jv0bT1= z#6{wawwa*mHRg(lnf+y6^$$ryi-~2Dh^NnbeKFb8se)3?I(qI(?lMRqoL@8}Ez*fs zwH-Gy@)K|b;u_#N5e2{HS~cU7HkT1>#c-qLUtNsZFS}{htZ=^`7Be!V*lsQzbK$b< z*URpF?_$|iZJ@PF9vIM4IQ2K3cc`!y9v{#uTd!N&f5O8~<^SF|#$WFb zsGC*A;iua>pO2j>L&2^O*C-v;_L+L3r#-c><)2`=g(lQpJckdfP6KKEqF@wD z+*>E|XRs1})7__Sm{15)C-5`**AZ6afYZgKlV?I3G;eh z2C{pRQI*u+ta%5+dYTgOT27@VB(s^AebN0zQqqH zQ*O8)L<n@4lm1E%)nqgQ2G2guF4wCJ-jG$UPkEpczzuxiAwR8 zjuhGiad(0M>jWE1c5qI<27#vn-P!cEG$~CU)q|s-?@;W~p1xJ4kaGJLN*b&WgY0dq zdgIF5UpO)b%zrY)69eLx5pgQ8XG0!cbz&>%lqE?ZgxsZGQ!D73zw<4Vw< zM?ebFWzoT8-X)sqCpbVJKh&jFPkg66@Es1JZSY9=h6B93( z1%8YOwDT^P*i7^$Jh%G5Z@f`$plhY#&>O3v{Pv^k&m_Zc&uIsL&bZG|<-2|Kxv|?% zoJew{-g#{5fY$d8ck(^2j{X9;-#Jro1p9?>cK|1{7&z4$WQmf!og^L@q(5Dqj`!lu z;2zgObgXOx0?qbwm{)l!Lo!!rtU0gpTVC-COCZqF7X{XX)BU0MIu3mvg)Q3$0Nc(m zV%HCKC3Vy#p>oVIkcjXe&qi1snrf07t!lI(ZG4*tinrg7C}Sj9FmKdp=1RqXrrJ(eY~n0T0mf8ja3 z1Qg$}hDz+blNDbF*^iOZHjm32x&^?z!E_NFnY1g^;qGFriW5z~=1nLfhQl8-APDAv zlL=M8GQSY|#mW_ZTZ-B|WovT8eyrw%!FD6~M7%&s+)5ot(nRIp$&q|E0Sd0Gu&^a+UduEJqzLtI|0X~dxF=3x^`;MS z_Kchd+FeW&ds!!fSRkwIVV7YLOgYp0URzR^X4|E}orLEZ{@T#>EUUB3pK~p9a4!|2 z$-I}$5L~i_ez6oI5T#n}+b{#_Ez>tF^>l;Mc>BQVHt$<8(ZCVi&Y=kn zmFd1l%GdMRwh6JtTB%Tpx7`JX!t$5vVN)9E3BUB}ZG&mb&x}S_zzJGx-K026FRr6C z{Wr&%W%^{|Db{FFp`nG)gph;UV2KTMERMd&D_nvSpSCsPmmz<(F2BOPEmT*@pF+QM_GJC*dkH@189dNw3Co*EdE(fO&B!DRjXZ-$X4Q z&!}&|`BH^u0NH!?_Q(5#bsXcJDqwt3&X0Mi9Aym%Jvh`HomLb@tFgcMKOVglYf!T8 zH#=fIR;$C=MW|;uni*m5w3(*hJVwbfRuWmmJr^fTUt;%^Xx2?z9+w&}l15_)&o(d$ zmGy=xhLMxbfCw%7^ryA2^DyJ|r5rrUO!AkKscem`Mb|jx0$t-x*?6(*)zrVn!$7S- zcZn**Am$;2(ftvM9&J4(qo_MoT{e|%8b;*%p*$n zoTvH{eqQCT(cpVfA_c?u3K-CxKzvRMWghb9-|-@+y=2;jmIWa79C%xN_pRS0Y|UXK ziEiFsam-~*=R}N8i21V`BB=NLPwE*bVmUh z&Xz`=O`e7U0m*SqVB=maCN5nAeK1?}J&c-n#II5*gr0Pmd$o`F)jzb5sNla3F@I-U z8eA^*NgWwm-O)VO6_5E>fQpEiT8v+uTmbZOE`kVeH}4| z)(e>-ZeXi32+*H@WHt1)gK65rk9r2eSoWicQ|2%ScrQBNE?O?L8`zr96*;;!didguSwl@y=joVcY|x=T(13I5g6?xDZ$V`+)=fuEG|-opu>}ncVOEhT|zrtUtK7C_a-6|M*F;{6$i7h(KaG4~LZqlDo-)q_bmKVIJT>|4I z;*trGX`ToW!%Zu68WH;@DUoZjJ7KV|u-JrfiQyiGvp@O}XYE@?bfwPo`R-AlZ>k`z;J_$L?uJew37!Zl8R+pz|Zv5@j^^{R9 zC&DUmJ41=qe9e4t{L3LZf;0|FQR$VO6eM-?xDhN=Y*TMMb)$OF%?Hx?7lpNSE|P zq@=q`S{fuLEl77GDcvny&v~)--uGVnxsUyRdcQqi)^WH*n9S>(;~e8Z#`ym_WI7BK z-slDczC#2TzyRx0UNb1XE3?H&THa_?yd3X-;rZw>Jq|i%be?WOZOV;1H}{Gs%&TeR zBEjC{)aGZ3w39GT**6#-x+l1FHJfG@u*ihDmGO7JV(%lBRaNaH2hGta`G2_0Bzrz7 zE|si_*NchIefxewYgOai``zc8iRMIU)40-@9EW5NuD&1ZQcT&6YE1}Y5VB_QY3PVT z)vAHTR%mD;K>*vU>?^Mr^)P$0cXTy&v_ciPeDnj$72=r=pDUr* zx$4WT8x3}4!){J&c?aMrvvL}|mGXn;1Sg{8EA&nB$|bg^kXmZ9H;Bvfp*1Mi=~0)8 z&vOrMopPYt)`UE^QE|)I@+^z*MMBA)(yHE&p{20JZ@x1!Zflf;!@l|7irq)k&X@pf10{0BFCyJ1xBE(1j+!Ng7Y!n6fyCNzG2CNc?o=A|l{hZ|q zo6=pF5ho^9r4sOJ7Q5U9GmRC2z0=<&hzi~8QBHBbkbD`RNS4jS+XN$f-KvHJK6$B% zDjMQH#OJ>*+=dok&zQ*eBc2st6@Bt4SNwcbw!3S(80G4xsXNA6Z(~KBFTbC?*RA=I zJWwft)MO*;kBG{-*u#f--mra*yRUb6Qb7|QY34J@KsykJiBNeuZ)>Tih5zI}%g{*Z zP2W6W@ekz9f)S0ND3#p1wP2#XnOLe=AHgi9b)ME35Sbehu}XtoseF^mfI;>1)qIFz znJ+w8V8l@O&;D3l9Q~l&-AxK{a}xYze5ejBT`KNj=r>edH%7=#{3u(;tgW!Nja1kg ztIM>^m=XievO zxO^l1GgXH+?F!jYSbO5t3Zk0N5%s2~Bfix_N02X^;nxAK&wJq!Zg;_*b{WKX$3hq` zN&U+b)e4-WkVC&3QMgLXXi8>KUOdZH+Db2u}9x4@*Z^K{!u+10jv zWAi2~+g0E&hF}yYjcV_CdiQT;6|5N2`KQbG7Twa(&0aQX9wr3==!SA8s#~(Cg6$n2 zK%PcEZE*^d(so>oG>htA9yo(0lU=v5=>u86ws4<5tGVfYa9ZMxEIz1fwq($WsFqRD zLzJI9gvV4<>qw!lVu7V+F!>j8_Q%+gMzt6p;-&k&sIhh- zOj3k77Xhcqw*?!!+j*zu-yX(ygPgV@>9eo8P+9GmM31^G?n5+Wp=}3PP`_mj9@p&+_HIm6~RC61~~-z~p9Rbz#2$ z!<7$ie(`t`1Qs(=H5hnRNEl4lMZv8%9F2NK9lV*At)rjXDac|`nKyL zmJXc1u~DALU8$GqnllMKHsvrsqzzdzjSG(&NZHB?=yk1xcHYdYCAza0;2R0jF=5YJ zK)u7R#ifw3SJ8&_;*Xr8&TD+mg^>wh%U`p$X*DHkd!Znt8@VNKlEOlEu?lwE)P;)! zo1A1fyofx1pHPY4Z#uCMe}w9;d0Q@mqxoJ9*nN6qC}!fuXFpb4EP3{~mdCs<+_i(%rd6jl}>Ba0q8TSX1YqIY& z14gS>#X>w6G)zDSQm93gj@(JG@^7aU#j@950YM&y zE4nY|!9IZLS^BAAhXI5BHm?}`5G zk8df_A%&^#Eo$cPQTd*8)A{~=;wDWde%Zt}ufn3AHD4_b=g8NM)@-nZ;wGlAgACEk zw0-4L-pC_`^}eQmm$`nUF>u`q2N^I<2#fDBJ~`reHebjD&F6*GEKexkE1}~b*y=*i zmJM8+cgBh|YhZ=Xu8jTh)e2g`iyMoNILDvTyb})86Z5O=miIUJpp{d+X9p>Bv0h}WtoN6x zs49vzgCxhI#+4nESBvlsZe6d8k7Th<@Pb-bzN3-5r_*4!e>lkb&x~ad;-ph&8X=n}N7-z(}H=vBkQZdkR^G$WROkyc}q*k^UE=AoMY z`jueFAQ7r`T!g_rQm^!JZ9-q+0$Wab)|*0*O@3O9DrZ zZJ<&UHPema|2FlmHHOWn<*-aqz1-~}@kQZf+54tR`SUL=-GX-p5~OS_Rd_)XnQ+hVL2OHeO%IQe)S=vX-;pKzv=bIV zrowgUW)Ev+NU~Y{q?`0x<7o+G#QZ&SD}+*FbqjFnF~j#~f?g2bBDL8Z)3@+kt7eU! za2~p1HI>A*!MhX@v#8x>!y0fQ_h$TY=6zD;r%8j~>t}66tpjBCNI3}st#$2_vMp7( z5Nt%yzkX#NZW>eEi@f`K@=^L>(m0n0(-Wb#PcIK$KVY`GTVcQe(r@Wb+i1-=pK&&V zR;T9_;ioAU^RaA`l8wBBl2k9oe}?QbcqcD6(oH{^=XrVIQipBC=z(F(#4uLGatvRg zoguhtTQ2_a95RS}-wQ54+Dqu%8>HKH#`Q*Kd$93@2w(JfY)A(6!mWDq_a_~T7wQ&N zZ{ttYHQHrfa2~4d56dF-I}qmWoC;v(#WtXG@xo?NF8h>>o=>`Y@Ic$SVdppG6UR=` zXJkO^annl)F^XCBlgHog-Y%9|AzKyNsKU>itUBoW=5~tW8TBl$l?-mUM289}#yFEp81)CMBC87-q(Zd8HcO*Sq?u7ZnH0r9x4rYtJI$=xteQs#~|@SJW== zh?TtG7Bx67QTK?*Qwi?;yHw2D_epI{d+Wr7;`^;@r$IW#eJ5zmIfolm$ zzVefcCF~LsGJ#s#X9HH}d_QqiZt&ucVuaxQ(P&bx^D5QGw272CMS4E16n!J&3nD{v z^HOw#KAKPJG)^i-1eS>V!He=l&s(u9ZjBtHG$OFC?|vL}GY|N-rTbgU<1S;+{e=O*x0^jv<$MwX+!PG*b!wiYM!ftF{Rlr_T5q|=XN zgF*>Z4@vNG5&ezxsw9o}?<8l4_qlQffkuSWGL$V4Ahzhq6)wVqkk6Wwo~l`na-3VV z*y!%z`K%X-9j&1giHg+nvB9}(FfUj?$wDbK+Y-Df2{dyyxq`UmmgnpFiJdvC{28kgh#k%{~(W2O>QzK#lDvOG1mg`C` zC=)c~t#WI7!cfcnvP%=mNGC?+jUjcX=Cm`_u@3}{@i)o0j0?Vh9!Lq4B1yJ8Q(kNE z1@6a=o0YI0%<^dvt~gz`_EiC=6Kj|mNnm%M(aAg|De_1)QIeKGs5BVg3uBP>nwIQL zS0}#>g$u#T*caA(()PVn_)Y{fAs*G%gZn%2_n+!F{njyLxN|H?!2Gs5?4Q5s-3hlzs^0-au4)kAg!7Q(yEQwuYgOycn~FE7e1**6`y(C zA5ju5WVnlX&||?+7@cjmG|1mJ{O8 z!JHM(+**z@WzSf6EZSPB35^g8tHg>Lw7Ty8tkV#EL*ADw;6lQ&B5*G4AZxs8ch8Y* zTzfIt#cDZ%N$fYy<9P}Vfk zznvkD{W@vY~P}EKa_F!o2TssNV=-&b!U3qwrZN-$e-yx zyf5>jUjFJ;UGwbbZ1Sdtmd`}sqQCXgszh(^$fv?y7W>*o0n=)&8nK{&woLdz#;NUH z{GPo}3g%s9>pdYxa-{0?eFdVo#}hdB!Ny_UCVlqpwhfBe5FYaYm(D2H4Z*~scB^K* z&n6p1ZW|n}WxAlLw|Z$GU$o zvpxvES7=~NxakV0>6?rynXhBN+06Cp&{UT%9vb~erQ_3}-=ulJ?Wv_+C^@_Xazv;= zG{P)4rC{O|j{5j&_YskIsTX}^`x<`V`XS|~xUjr!Y+a}4sG{_h;KiNtTyowvxDDpCPoWc!kHTp$q=1x z)ZjHEOW3miWf1*Kh262-AQrI6=F2Q!RI*`zUwmo&#ITg>>BAA?aJHiMiX?PV$x~Cm zI^fw=4$F&J26kSOj#?z_(p`-X5*>MHA>My@)+$YIn*mQ~;|WR;W64rE^m-gy?|5a? zZ^j8XD+JxCT-D6#B6N$pw_mv=u}Q0X`GsuXeiFc}h>ez@@Z4^Hnr``l(8Z%y%Zc{W z9gw@GQnEElGiNI8t0>|iRZWr0D31?+5)ru5uc@eRp6`%(SHQHPy*~Yt3I6D~!B*0H zGe_OPQV!hzT2w;4Gn`uqCwdZ`NzG z0dlgR&V@-7Kg~WN^&wsUkT<@g{?@{?VEn*MlWd2)Ir*8JL~H&?Z=}Lf6w4L-B$S1} zp2NIx&{nP|6Dei1c5KRJ#41s)AE@P++tFg*30Mam+%4vMRg^-}W5RiB zt&QSV@e1+|h3Ex;i_M<&OjDqdMuDs@Zw7?YajmfGn&VNydcXhE>9dA4RRw&yHw7$2 z+j?mYGko{{;}510v#NlxQ4Y4L1{e5C_!bW8F^4ejX7VODZ&*u6PWs0|vIx zm8s2J(q?S}d-41kOH5c$dmer17AoG7jO0ke1~9WIPmZ&z+muW>uX)b=Vs8J#c6!F? z9ik*r!EY9>GGrk;n`|^S?aRSe(P69S+%1ujDc(q)^W=dNjO-m0)>|^p>^fb&e`z!n zBmTNuL#)z^F{8ZfS>0n3vU;wbtQ{0zaQD;pq*qNaGzI`7kx6GNlDS2{+n{om7k^`e zxSMe$HEe!g6?gyw)tLdw>}&3|b|bBT6~BPJ)f4jqU=hP}aeB5egMRfg{xXzk7xNCX zLe{ty@aLp&aOa)X&9q?-y;_tgHy`D5@h4YRbea-DfB~r7QTL6REss6ZKmxt%@U`H$ zp^d7WDR1svwi*P`hhsZxn^}^E54Q+>))|~(EW6N@z(diAw>@=w|0#o5ZOxz(RDX z8zTfPuqvp3%u?4}eC`(ZYF>OL#Mz2tnr0d3b~ZKbk{NG118jDlzoJNx@M-Y(qHbxJ zEHG+1G%GLMrrVp$S3$Au*t-Y(?;?Rh1-~OIjdq&_g_L*I*NYG)r=@@A_VF_F51i03uNGy|!cVuHb3zg#r>+e(Z>!z2HmU z*Uv1!-Ep`M#m7Q6p0V7*k1*1I+A(Im6l+xQ)#24-MB&q7BtcX+Mu6-}$xI~kqHtrD zW*XXD_|6yW$154Nly5Ty;gz)53Yxh!;!ZT{y6@soOE6%%PMwA^{(83zmY#FA+&Mi3 zhKhp$=o;mwYD(Cyw_;i6QCz)kPw1`SOa~2}%O ze3-=iEv{Cj%v4R5tLh?i#L8>bCz|)C-aA*dKP6iR(XoST3;#){?5;Z!@5!?c(aU^X zr>@^>61N(O((F6LWAi`L%#bXLb|elG%&5YJox^D1cPidn^70+r(aG3VSX^W8Dr>on zScMW$L*9IekUI?@=|+0$Q|H8d`F@cyDV?nouh zx|MY4L62UmA(sn_ff1Qregma>P1eTYrzo*r#?preM0?&MH!$$pa=E9H?N8U|bVa>Z zBSez$_y1_c^mg32BIEhmO=J_h@KFm@e~g7cTu{PnQoBjD*~N_Dw8+r8Jvxd6 z@t1KAKVo{>N1od+DS|$SwxHJiI{9+mL_fBBCxfDeXyG1l3mpR!9Pt`iV?ms(6!ug~ z-<>A;aT+|yFMqd*+xwi!-MRfFf`5X8(5|t@X++JaY_DArnU^Z$r_$EJ9pH76SU)x^ z!UNV2ABs*dlRgUd=BRlWD5DlD&aFSNS3(@_rEf6PHY#+v8_b1TZiVK-@#X8L*I$Vt zCzKdQp{BaPM~{BqlnTv_2wR$ma=>P}=x$+q_=gIIR5hcfLKNK=%(a>-{W8 z02QqvESM_PUeLC`u5)#IDBz=~P#}*_x&#t~vhY()y+&ue`ZL&eb=#rI9g%( z1D*nQ-VM}hpO?Q{kT3V%FI3c0}A#oKYA#TbpHb(=jSLp(JSs?d*5d8@GSmsxMh2*)m96R9z?g$+Hl#*?Gf`jPEO{qXQJu6|@2 zt(r|yRN3O`hv1-?j#4#E`ueqM&E~D6K^?v4Ud$c8vN{u!o7e(ayQnrE?nw3g@ie5AnP~s(YaNw+)2INL}qlL)91<{#dH4N+_}Q_ zaYlHS-7Pcv^Z_W(q_@Y^>--$hKH`>4@eOmm_kX6jAXn4QU?|J<3;NoG5hdS?rld;d zjgOHjKMS0;jtVg7bTvBaU4vOh+Hqs<+yFX}Nm?mo`b6><4=+~<-)=f;#%6&?})^7NE7e0}MgR;pV-fJGmy#e{Jj_0)w< zH>R^PP69@i5vG3!p8b6DO!l4e{`Wj@WVuVR5sIuU#)sc5wh8Zglz1}&vc`y&5+21}YZe;C(tG8td(mtjQi6=Da+Dz>8+bJ9 z5L0jwNEwx(DO^0~7x{3pL$Gm73trQm<$5<-Ryf&VGxN0y8LJpkBX%sFFBeA^9;$w1 zZsc|yUO{ZuBG;0-SF|9g7)#++D^pTP&5LBr5T-7CLazTMUAzu>5qoN4Z-LaZtVXqh5E1EJKP&%sPjUT%f{Giq(FQ-`=I}bS;-}-`qkBMB7BRtcn-VR;hjWlM*1~?`Bl6EEc9Gdva*<4rs<`#L_GvZD zdL_7#HHc2LOYAOjxDZ`Ubx?yMN;+}lxYo}&rHiTQxJOiLmKVMW7HHdv^64VQs;hR$ zt`pXdSy%Y3n#U-M5KVUE*9Vo+2{KC0_*y5vKWTc63&(5G8mV^;nyQO$YE9o`U2sLe z{ZwBwta;3O+~^eVZZ+#bitk8$QvL&t_W%}4<#-dQ-JmyTI`N?QtF;DKS)B?Zt3yd| z9W-2fUD?PNZ`ubyC2Q7m$lB}aSxiSL=@Ww1h`P8ZgK|~t%`DO9V;^ZIE#dIV$D^@4 zX3zLv^hJdmorZLjQ()R`n(gRLH_ecJQt=aMnq*tOPy|!(RLnkjL<;}d=q(c4Qli}347DQ$&?pz}Gz72ezFOvFmQO@S- zJVATWpcgX^>r$&&01QO01TWUEC57I$Y<(KSd1{GGqL5%(XL8SY2{8Dc2?pPvxde4l zOy|?r2*?VSkMtO`BhUP@|1m&?v3{T*(@FV=^+a=n9M$oQ;Q1^zHkP<1**Fbd20b^& z_JYT5*K`q#p1Gw>Iv2N(_(!E5s}0d+^>*E+>ZHL>J}2fTRLZU<6^NFk^uAlmzk&Ifzi}y!U_7)!M z1`#o=GV4|gO~tJg)!(5pq!U@h!e>d-d9C($L)7)6n$nyEt}CJsNy*APzo44tJ&x{1 z-To1(kSY1V`yFRuoe}3bqcJ=n0MmRI>drcC5e>Pf8n)|^gz-ksZhE4jXIbQ}Av5Bd( zX5v7w8`xj$ebt1oS2qE?SjL@HF6gn(hO_Xk*(0fP7lN!T0-hWFH2)LoCcR#|x;r^q zh#%FB87UCp5w5RrUW(Gug*z0zXFUH%hZaiJ3a1?8$6aesJKSe6KYM26KPoMcHwDx7 zK4B770qAGj^=|kcijP{@oYQ>vn220vmqiA~UTVW%y3z%WNws3IXLK|8p(YLkBM9$G zq)n&OZ{gJ}<(tBrDA9)~GYx_4v~dHYUnF5ZoV5i;CS&E(4t=cWU$n68B2{K8wO;>J zf3dAFPg7HRPLnV>xf|=iQb}+r;WN{fwH|o{wv^Ph@ksh*^`_zbafp=f8qo*LSsOMI z0OF$3g)&`+qWeGNYP-HbIHHP;Tf??Tns5XLT}59D|@3Fiu!7NF;f+I zB1G0zKy zOqFomK~gU6ua`<4api*{e6L zpjqrOFR_1%ZZvO#Z#ClgLtpXlUSKasGZt&rUo|pdmK2m-bOP2+J^2rpv;1w6 z`!9FasBG_KEoq&6<%g_;MDgU#|p|^ zd^z_4AQAG!#;Utrht=|^R4(RhV#WGpk zWzUG6f1o?9&jI>Zg#TgZ7{|SiMRA*PXC@sKit7a*!c`e94;*FuHTkT7*2NjE|B&vy z8oFtuYj3n=s#~fKOOPnJVz}Bt@VFczqH8nywWx}k#jlm?@fLAA2i`_X%rE11Ad_mX zDPXa`eqW|i9XBV3;3(Hgcr4v9x_UC>G&}h;yT|gfAD71K1>^I)56%8Lf{De(O{Ei% z5#=z1wEnS~T*JReE0g^pMJhbssaOLvf{*ATn`4_5=!I3vzDp|6k0>5|F3;i0inE>p ze)4f@aT!kxyO3Z|KmN2DvT46|zbl`1TAwb?vK8d73k-Zw%cC$HUoOR$%N0+5iU!Kr zB~D;l|GSadDdB6qX-PL{cY+42S@GbNn#FW%{Q5*~ywzu(Gst~@u$AUaHV2#g+lL=^ z`}UBv__bx+6tMOq#&evlHbF-&ageMmsYcd|a01^I%i90+q` zZfoB2kKbV78s)-Pf@V5$AuA=1<501l)j0q5E8u|Im)Ql`9!rW&km%Ij9N2jtz)?}z z%t(s3p&ew`?v&BpHmIUnNNKZ{3PRtG@Im_(!TzuE33H99%fpK84)bNM z1xy?RKU_AnZ^}4dT$6rPYl4x)fKaAAB8O3J++M$Pyj;AVXRzSv^KNj_sH8VbYpm-OR>SvQWrN>BYlkb_ybB$P_MkI`xWr(wLX8knHAk$ANY{8Hu*5zLu#Vi-pP5;f#L7nH z#k^5HXR3$u&4*vA!K9TTug^H61>3yZQJArGA8}UA{`7-&%9WdZiDo(~S5|+dZ1!`a z;}=?;`Uv!|Vr$>q5~|tUpY`H=bHE_R8ts^D{?CX)`0HEW>`R{;lWu^@gotp-d0E%* zD|mIj537wo{o3$jFY8gU=X0)Z z+g;ATjn}C#Q{6p1omC(l;`*VUFkrpOCFk_fNXEDQI?madzk^`BDP;jwP}O=1>MQD@ z72)nc?1qz4;Mc5pAO&N!xabRWQ-z)I#R&i}n-Wp8D0|4^B$(@63OB_Bn^Ljm?ztR$xo2O9MU8u|4Q!CA?5 z-T7?d;lIzJf?x5P+O)eqCRy2o`|gPmko?WfZJ*{&#+?D6sPQu-q7g5tFX0gCxY(_n@HrE&FAM+j2tX5any`gjBxJGi9#wL72pBAXri4#qK} zxO6%jf*B@`H)^go_Z&-HGuMwJ7h9rN!pi&lrMS7*S$w#(kLi@43_c4}}{^Ma`L5MA@wE@ZR z`e^*1LsbX}ATo+T7#f{ONK3Z{Hib$MPY=T~MFcn3e)}ORob)T<` zA*CssR^zJc)E}D(Xk~Jsgb3 zlc>15%HAKB{F6OZWQ9XRr83F=g~8~}^!XCk^}QEkptBd70@ zx!ueEH zFGWtxD5XJ{3wG>+=hy>JzF~mFX&YO%J7J=NkXv?YT1mA3%;O%x+~uvCHv$|`{w|k+KZSS! z@|NL`Wp{G-RW!&Vd00tBOUVjv6T**V@MPFH`3j_icPBusq1Cte3m9O1w@+yaQTw8G z5RPTwT33R2o}3SFXwUVd9Z#~6vH`x(Takv=?>W(|;2JCG^NVvbJ@PF}X|L3dUIpV&)4wa7ULpJ{vgSQJ|@B(#OwMG7Umi|TY z`xh;g%$@4xBk>*XX}#-h+gH1?hi(umADRo4P>I!THhR z=>Lj4M8^e7=-D(7?(qNP?T#XHkAM)v(G}7H%m2#{{MQqQT@&Gs?KY_XD~nD91;k{! zYZjgTe|suq?u%l7J?6!Hh2H_)o5hz8uE@WK49;gzCdRrxEV=j}q4?i_yP^fObU?&qfM_&Zytm?E z?huP;7+R=ZBdn+tJHQj##S{DSXbb39AEXkX0y!7T8%?Gy(utsP#sf%AkqT`%X0k_i zf|sc$h~@%emoGu4Sy6S6Z!vWq`PPoT;wX&!0TKLx!!(2pgzz2FLDTc}pFRa!-Ffkc z=ASR!ozxw2Q0{(c@|3G(?^&qF3ke7i$n(ufRs0<%8uW%J9RS3hq13`PxVXqiz_i=h zZ5GY+DT*Pjf7Il*az&MrUHrtP;gcUDYJ~0mJDy~ij^nQ$51U9NIiD;>i~BnRbMmxQ z#ylh5%j5ZLP@c(m2M8wv@fGWqYU?dg2)#ZPY7DkbusY=CmrFSRVV3l2VC!X+yi;m4 z0v>}piXZV(L0+CBk(RC8LAM2>nidHZ zeju7oy?-4xh4_3x@Xt+&hzCrWmY6tsDRsf{y z0k3>D9~#63N)$Q`=jH&8jh@sll;k2$e^Lt;FJ0v6ocC;>O9RKVtgiGVqr@ZmUG^>b zC#;LzhgVhy5I8L()qL((*JdqEh@C#fttbN<+8q4|pk(E2fpZ!u-8kR*^6MXBh;g9= znZUTIO&Dv;FAm*!y9u+ zD$cI`{@RKlleLl(CXP?s!wI46U^u?0OX$w+PTva+yUpaGy9JOa{cWo>6A*Gw+vD*9 z`VQ+g3wg6_R2e8~jJXhLL`wC~dDWL9D3@-=v|Az8d-TzVZ!!h_iXAXE1ABB;j&~-; zY9@97^zlkf3($4C>HommeFkk&`uLQD901!(kW^-T2%6x~{}hkGk@U7#jyIj7>jDXo zFl*ER_yz0D^MY{H1sJdS>cKvca_u1YpAW|SVRkFEpx5T~>Rr_L_bu`*Bhc9CFgkSt zKyA{*RltLIGC?Jj7{`5$H41hbO7m-}Z)-bUK(=ABHU##!0zTdvL@wMIta9|ZxpjHA zJ+rXfL0@RP%*xe8t|C^qMBy`vKT~Hem3A2Ib`m`dOy#E>UZA=k1Pod~DcZ1+{BoJ% zL!rJ2>bmarLI4sl8G)r-1s4secA=hMfsE5YI2|orOVOTvP{S+mwsy!;C)&zCnF*e)=*P6NpK(1} zD{8{hQ20~qjf5QU4+_DQYxkxa#4;&=R+qw@1Sx@8i67NVW@C6)9p7tqt`*9J(pPB4 zBWsuSh`cMjku)kZKsLp$&8wctI9;ovSA8s+(ZVg}1}Y*IntD-U>e}CTnER;h?>%W? z!~*D@E7c3YC=+9^r#UrPHR&joGJYE+ zj_Aqd2{G5LJi7%u?afHo_e$D(e(m%vFRZ~3AV-IHu*<>|u1!iJw)7_MMTlY4+7P{I zuWj}$vOfa2LKdKI*dFG51rpZBTtx2~>NP2b7jDfyG>YphC*`8B0cnp)%nnLpU&Qblb zfBo2EcRkOo^;_<09jOK}ow_>2RThN2rz>f>0Q4zj>URb)VTK_Lt3^!eQM*m70^COM z27`xgLH;*vm*2OC*^_nymWB<13;z;O(`qI-i;|Kh3p7S;c(N8Ye|umTXrGObatd>$ z_0QvXjX1{~uvJ8aEmK&xQ4T|W$_BxYiKWx)Im~X!plr>BK0)~v5T~*bB03&cP(Z%_ zYdcNYg}>AEPzv2~FN65Dh$9Kk*9tE`KnxCkLfc_Ju4$b+K38ZsZERg)vfeu{8Vyj= zD?lL{AMVH#Ds2mSzdHTu)Xq%Kckba?&in8a?IjvGqJWPJJ*tt1)Heh+jb%*SvM7jO z^9Zzs!IspY|A&dh-}Cu*YS&TyYX5ncRw)^d0o{F^sxRRskmCXq^JO>gSO{8|JP93& zDOjk@hbn!pLN0)$Q$6(IU!V-sTUKqFT;^Zq8FrrDPg;7aZN48X;jjE4Ic_^__7oo} zktz1q`9=nS(V){0OdtI*gdo$BsrOQ^yLt?!#aK@oZeyETyerN|>K2H>=LH@*E`$yP z+T8$tz-=LxaSaqkn9%3lsG`$}`ilv|rs6ShcnOOSX5F+m%hErW1jr$-j@;_iMDwtp zF2~>F>dKFU=`*gaLwLG6h+_~a$As$bIRuV`7)-OWUz~XOL9jVxq#M>C5@>D*C054l z0~$l94jKXcq8;sgR#w$a-kdvHlt^9kFKv}GM4FbuHjejFh*jkBC`7>;l7_~VqOG{j zURZPhy5P8lag(OOBOe_(GLXHLT!-6T5s>#d-nVHw?3k^A3hTI0^B7oa$t8k$KdxGF zdQ>O>r;VcZ6se4^;|!1`_eQ4r5|}@j(^B*Y=K$` z>C^?MD4`3!k5422B0<$>H{UZ)wKCbNg(TmNGfvgP4)^vLAR*Ov_ydmo=O(v*&(8k+ z`iDr}4CuGk+ga@UtzUR9$R9d`w)c1&u7zW#24{t}CA$yWlbqyZ-{d|SdvbRv_XdW@ z6pBun z7b|ll4u%hu3sO#Ed0S{NVdj-7r;S8)edkY^%z16-{BKZK0W+fM_SaJ^TdZVk_o2(ifgpDrMsPP^&EiN4swq371YIvnb$gUk#JpKuxxYY=fLj)^xF* z$Ho{S?ZzB@APE_@+W@AICHn5BH_~Q(BpgruI%vt52&0k@ zwczpn0hS$;d1s(oQhERrbR>>g;x9eaBn zvwnh$nc297_)yS$z>w<@<~NAr2Babssv~v+_wH#G8$ijN+nV)$@`#F6+DV*pk)Y5) z>>!n>_8aRvNPjC+YYnHRov%jiab}%tQmr>Oj^j~M{LU%tsE@p*$Y*64mq8(3T!IY_jH^f;f;rF3rQBx@Ex{<#k6vF z?-;sNx`nmzSA#ECf^(oxiPS1M6VMx;t8)^mV~q8|??;1RhgB>2s0#p_PAk@~DHWAn2v=p~me^y;@65z>0OR@fu_vr#q$lCv#bmDFx z0v#K+u1+XkHOvPK)m6>;(Z}hf$x?g0)V54r1o`i?*ExW2_R%CFemwXRNS-flWN_oZ zSh1KqJqG>o44%ltY#G-Nfac?@lpybXJnO^NVdK&&`^+wW75vW$nwZD?nVWj59y3r! z<2r%pnVfQ%5A5!yB$I7tBu#I=I|z7*pVy$;C4Nxj;m#-2QtMq+eXtE-J|7$}E}Obo zsycAx0UY1`ibUUQ13spuQSYY-@v)asx;BE3eIIbcu{pMX>?Q#QD5=n4vuqm2+RC3n z84Hi^{r2rt(a-C#`F9!)Ybh#ku4o@sMI*|YU~ZFl@dwDaEPd9RnGsgURc#jdUy;Tu zP}eh!q1K=?YH2}dd>LG_@}R|kpl76?IkSHSZTm*aY(uK113f%4>&E+bCwj8aN0fv?z>YiAVB z8*)=%%0WV`zb1Lyu+lX_q4!NVtGCeUeY-=bxjOfeLt0@wHKA^Vz+#lEMJ&KV?xnCA zNvfOWKJ2+<(!yBkDHj_kp7XzBF$b03XLmX?NmiQmn6TlAzo%4X6ukX zkUMV~CcMjg*9FxG&4w=X?>KfFp?>QIOiUaycP>43Yj3hfHMS6S8$*<|<4OJ81;>SD|BekFMt79R9iuf21peixIvE`!ieQq*eyEBr5QuXB z^_0(Q1FHR0LIHPXrNVeFvXR80aKZa$d1SAE~cXJHpqh`9(*)^lo;JZT9~Q6>sShS>#V zOxBc8EWLPfO7nAk4AX|OaWHB20$bou-!m4I&#XR7dMw>lO4j9_%=I;DsM4M7JF--m zHg6&w@2WkCV3Y2dUHxLeQ5DI-v~IX!!0fE%Ty3}CcJ<-&_)+-Dtue;wOm6BE?2v$u;(nM})S(pjnmY%I35vc(*S)s8JWt;#N6`Bx z#o3$0bWS!l%am&A`|l|3>=P^hH21r$;lP6C5<>4~J0vI0VMF}ga%qitafh(@*t-Uf z5Dr46Jo8yz2>Glzt@rJ1=}YI{&}H?2Oi~%CNZinm*#~&2_f3+0SZ&dmO*#b+d5i)S z)OVWT2FL3L-T7<*R+5VX;z7~~n-Lkh?J*>k(NxZJNhEwfXai61s=T7sOcC{y-IsvF z5XVRJU|z+(MQLfcdUBB|&;=zHXRm&=?yyWM1kBX?))990j@fffT8fq!-4OZ__h&7) z(EQN(H)afM8)SoGX$j-nKm5yhD@%?96i3`m7cngyM8k;=Qxx_YDSpoEkxkGi#@Wx6 zJU29VN4+?!zdC!yeHNCtEh@t0<@7?@BTq?DB1uw<9@|f23;0n!|AI=^d4}hPuI97n zyGw5-?0Fu4(N2l-(3gy;F344PYv-^|Yxu|snRXjOL78 zIy_)vZ``8o(TS+w$^u%1$ZzJ$IcDV#v(A4Cr}5RZBVAQob1|osKQzT~w2#7~OwPw0 zOrCs=Tra3Lm*UnQYO$p+lN-;m#V=PLH>n@S5bNJ5dpz6$X1=Q4JIqn`rko3s%3XY~ z8%_)KaWE32vdq$qC{*_LH7wzLZQQvB#{ZtJwbtbkSir8zK7VtqU5aIuJYfHsPv9~7 z^N0WV4SG^+GjnA{*ul%ebh3lY<2Z}bPU7K~Hj{I8hBdr4l{L%*)xOAHAj#2#uJTms z>=9tY9=hgl)IM|3sai-;zV-UBjHS`B%znUtpVBR&w(d>|V^(t@rz$`i~%!5uH*)Il+B5;Y3&pe(3*WWNitdd%;{pLIG`4!-u_&sR(iF#Hgog8j=@ulG-K!J%|!=3$w> z`!mG1$is=f`>Q5+;cD3gBjdK94PBw{=6bQNQ!h`yQ1~YEAI?zB9Lytr!}M$LVonv9 zIoJ~o&$Z0OCVkHu?1es4j3p6Do@R2VsrK7Ge}tH?Q4EI{r-t zo-;OcVrn*=4E*Hj&5u0Tp?!Mf1+f();?e3N;!NxxFosTY9tB-xt}<%S*EB?oJjI zH&+NA`cW%G1<@1i=xcZrGDk_-Au415Q@r7sDYWSX*S@BSdn;J zBZ0gWh2>8KUZy|IOCdKku?KjhO;ux zb1@fVO0`o+5Av1EFWg|pRI_&jdm+4nC(mDf8;NsB4w247W5eYV7UGw(>!isSw^>94 zO&jzoNm#jZv&n?@Q=nW8qppx=b$hckQ2MnWe&BG6PR|yDwWn+Xr3U^=*i3JBL88G$ zd)f%e;=DV$2{+l_GT&kQF%(k1)h*os9|%>co{xO!R>H-`3ST8Zdr)7#eg*T~4SIL) zqa-GD5Kqjv=FX#ugo`H#gP>^Xf0|a`C%S}8y67uv#(rKSTsVdNl^a)$TrXo6V`QV2 zN%V%P-d?1zfKmCtKntkSK_13MF4Ft-e2L&S;|s0=bz32Aq?O-VVuKc!m(Z5II#*_n|L*zO^trX|`D4V-mF7Lf zOZfM!<{N&+-?SG8Q6ig$Y-hAsVZC5NGA;pxEeH( zt;0`&{CeTzU=oI8b5~2HxR4SrVS-OAgg#Zmgr=6pX9Fo8YmLp1?p#OA}h(G$5ZLGdn#LJCOVc6k(-H@g8clJn~>r+ zZVKq}a=wJBqt}ZW7Bnj_hEP$-)zKGU{`N7ovNggBh92OyY;8$msRZs%S=Lyy2lCO& zxijoiR}-s0B-X183~^^HWNN>#ftAmS-E|YZQH92P%r}$qV4PRfti-mfE+s*|#o57& z?Do9PL*%#)*MMkY6^$J9K_WhsI!RaxE$R4uu*-TeEF;ey(lgYnw;hx)XkK;Ar}*Mr zvssqNlrQ&WGNXi1l`lE+t8XXhe`SOTT1Z^yAAv)@1=#!QpE1IAs=#Kqh>-l36IA}LkD{#?<5WyL2CdS*SC;lm=o4P@oB@q5I>pXhpj+TU{jEOD2>kV(A|8I z;ee@7$@1P&eM}_YN(;oUJIR^3;RlN^K);O_iUmwl4b$qk%BW60!u5n(cNd+3KmJe3vrw2>LOs}<&*gzhk z(Kn}L$?po?CKabRVb38dq*6%|YGm;iGh%PN(hr_EyzZ}d37h>1+^a0Qqq({7 z1ygFD5jwx~l#iyYoh3b@aL5?vtDGHt`3voTqs}+Pwx_hWwmE8s+pkL&Qe~ar&z9L4 zwUkrm(2lfv7E4bG%s$F=Qp0aI-p)<4Lia%={Ij2mk zZn$KUKJf;vM!PJLp8S0zyidyr*FwmV+AVis)4S}(+K~ZBuNP)Rs;fPew@_7UwTvGr znHnJls9|Gg-or~Mk>MXbw^Nb(@q^~8Eit*+Uy1j)_Lz8bw+l~-M|@7MzLLgI(u+)R zYGZK%ZyI!BImf;=6VvgqXD0MZ`a*S2e#Pz5B{mv|DmFS4lygH>qWUDDm*}K`b)PnQ zJ2G=rr{4nRonctSAU~acyB`6!?UDPb&rmRwc1^X@D2cVU;C0xI+iBucTcN#oZf{F0 zY$5Mp76{%lzaS&{@@eqe19C;S-@l*Cp%yGuKFr@TtXKUi)nVzik04S>X<=?KM@ZrX1tH9En^gY}Fhd!P=sNi=`F{xvlc;1{w-Q z)3jH6!}dip10IT9O8;X;v@H&-h(t0ohP@A>T;Ulm?U>~9?YBEpb)AD#cpV&h`x(Dp zZdv^%vD&p&O!QWIcw!HKmcKmiSwQKo1U6aQjilbvK+ztO*p*bU{yZ1@Op>BUi>ST69#pz=;CtJHL{&pDGuF#jL0|>QnItIq(zsv26b9erzFS*o5;TY)MH1h#XOm zVea)a-bCXX1fUegf;yaLso6 zK9EZNkj(t4*b`LVuRD>ECw1AenVN_9 zCvw~|s5LJ>YhFr$-V2tli_6mf1|1BW42}C+*uHIP+r8)_srEC!w1PfQ`7XZf{(xLL z69f(?MYA|R@NKybcY~@-#SKQT#|qVUmB%#{lm4n|cfGrCdR$Z_u^KLx#oNF?o$VAN z*pXfPrX!>}y0DOpJy2fMH?!NYQa~p+v7Edq-krZMn>0>_yV$5>))#)1n&^08@kKTJ z`KTk;EQZ9bwQXGbF5)_>)Q;Q2jB}4E&HD94uN_gZ!C@lw#And;_}N?A_Ep5pOcPmT zXgod?T`vMN(t0Z{o$1WuEW8>iCY`3YdCQbGds<{;p^(D%v8&AM)LRF|7)hNZx>o`y z?G^(XwUxa=3EQ4tnt2Da!-}7`icu9SOA@M`ZX9CRp*@YCNOgzFhgT2OUBc3P-8ns3 z>-Y}v$A2Bw(&qN>?585QH8MaMPqZT4!(Bbj$p+dYxwMu$Lm_L9;A z=GJQ|*n#yygMMTkr!m%aadWmVCc!D42^T1VUp62+N>+<)8?=2;ti6@QpJV_GC(aE# zWsjP)JQoqprfB27nh@%-(tQz2hCqBH`28QMFOAqDgKj1pR z)-n=<&+=}RYiU{xvI)uFl?vvgV4zcpUcIS1VY}NHo)=-K^c)G>91)fD{gE)O-C3ZT zB+V<+W`N{~6&XYFiN3`2#|S)Xro8Pqqz0vnRHvIS7|#=SC`OdB+qV(;KK8{8CdXWv zRDRP;i(O~FYkikqu!rQ-zcGJc{rdcO&@Sg|b%prTtFP zi|jyqPKtUN8J77OSQYtIQ_qYR^J~`8#TTYP%>z@R_sL{{ONNWr#GTmb2U(nVojtL% z5By#q`BIS4`*Ca~RDYE4#S>#!7igR)o{A=y(s+IB^yx45;=A6qi}#p*iyD{DgDT%F zN()U_+dPv?`tC`4c2FzXgs@r6d7>>2t_DThvXfc0Ojz`?8v0egQx2Vl2e0W0Dz+Fn zcZ9D!X5qM8%SYVE`iFpV0(;wHKsx?~lrgZ~V@K;vaBk6_a)-A~INGoJqe=WHHfsZF z+EKFS;ngY-`54cY@G1nh82y9auNo(J)8Q+cA-ihtOpH*Dx-5F{oW)DS`C)=;66AGz zx83B+X_en7ZbT;JLTa5p4adsDc5}2vY%^6`m({dm{^iX~vAAtLBYFF``TAR>^P%Du zqs!mKeK8LFQj{6=q^TcwY^b+MrFpzgc;upa1IycZ^xl4|(?^PRfQRPra66{a{kMg+ z5;F~olWdQyGaNtx^tNu9VsX)e)rBa1q|&pbD$H`DG<+$PKdCEy2OP&;Kd;;d54f@&vAuBe zoS$E{^`$GkHQ1LkjsvQ0O+}=EcWBi>e1HmCE%oqsFgzJxdgD%Z^c7j>Zy6G&f1iwY zyE0G*@x3gLrz>J<+!?StP}^s$N8}TQ&UZ?TFKlGm(W|hRpaVJlsHi44z;3)rnCscZZ{vk{RI}*-i1s% zf{XWYiybY=|F{oLj@yR@5F8I9GkqWWU!03zjltGBFfcW5!TVwVz?~VYsR~hEcZshd z08x2e>so%ZpYZ)51ZgHZXpQ4nw#J7syP*tHx~S+hdjEkJ&V&MIe7jnsFP_sM)i$K$ z`8NBWRZ%KFIvKPvx|I-#0Z<&3p3Ky5O^W4cYS6kdV`7T=Fs`M6XQ! z8@^l|5Cj7p5rRsxY!BV5${Z!P-8X;a8)HA7-0?cu!nhqbQFBYPM+N3>8!7yl`BRAQ zgK_V5d$8mwig?X_W7|S>`e4e zM;Gn+*o3mi5w%rpZZB-RTiJ#7yPdGSp4`j}v3^>TiFXNIIwQx$9=|z_n+gHW#Jc<- za_)n04@TfhNSpEYB%qVIP+OJxd4=@N)}>dGimw&js@!afLW+DBvWwHHj-v5347pjg zdUH0>rUf9&MBKA2CPbIO0AF1@_{Bplx3EdlEIzS$IL|r+hbxnvRgL{7NKQ9j6%p9* zUK%SO=s;1OM~*+QPHNe(8ZW56^}Q^=IC|Tmbl`fIrJx1{6ygF#VT;>m+DE)?=niML zVWYL0OTqrzmx=|#Tq^sdj?18iesAgX3ioZ7-Z;zFk8oQ6(0urL3Z~q)*UUbo*HBwe zIHNU)_^DZi|9C6kFufRA3lfKcP8OG%sPfT@F)a{weMTcb=D2>J1g>G9OYh-m+5VTN zo6B7UOqcyV`E(Sf+7Bn6vJW}fg*A7`Ssu*=@vak1c{;TqZv|FM@l^zFAQlg(Yc`vS zCW?hwWvFjX>o&d%rh&xf7Bem4dg`5dO0z2tBl*Kb_^3!irw%Z9VkjYQ>w+T zfcwd+jopUQWAB7iTad+nLPB1cOJQ}(F5FYKv(`C=CoUlBclyGz#)zuM(k(mAYS>qA zx9qz1fpvkN#cy|eb2r2IOCZl}lnePF$MMaJRrI@=CyIUoLJgqKnlN8uk{Isze4=lZ z&+_0Q}#o}RiYk@0$AR!T1v;A{PN_o%&|yUlLK=`zUh zKuF%!k`5D3d_A9X$L%6H2&7%nzb_&tMqo-qEUJyh`q7F1{(Mo%NE!( z?bBr5-L z@cByU%_BL2ZdQw)YHro7YBT}CMVf$;YwUCTQc$09x;y-Ji>IJN&Q{399@fDN6_QhT zeR2))xq18n#wkhdQj>Ak1BfUYrE2noQ$k-IF(up{pNfN`+LY%FF$)c&>1&=6LOJbD zby;&Q7aVNFu)edCh$%tPq)>2nGdpbBO`h|Ry zF(QOsz4luBtY%`(?eJZVgrkb{v|xGQ*RnQrrio3TRur>J5=9B1T08C}Hs$j~J4_JU z^GDZ%T}jwP==h6e1Vvv|OtFXZtU|5!SRu{_$J%ye3~2jO`}J6>Vjn;L^lv8|LL1X# zg;V)HX8CktxLVIoV1;9-kd=L?*Pe}}wo2XUmt!68p^%%tyxYLaa$(HmEd>r{>rde*jHXxv`Ch*EvGSZkwP85 z9qGu0*UM?m=}z=|(`1Rdt$!)>w&=&ZI;Wa14c#MNdovkp;-bJ5v)o*Fv+yTnXxs=s z5W|5(uQp6v+s_sqxb&}J5C(#X7&a1DP+U@ZXY|vsGPJG!^%TT%LRi#)SNE0I3&=;( z8>Q1O!&5wGuK*SF_+{+R$Z7g3`6nOeUvTyt!6$m6bd1JOlj~R2{R-@}WT#{j5k1_| z_=dm+*kG#hiu8?F(z~Mm9J%>}ncRBSEKm#fA1qud&P}@KcW#UTDD`iVq@f4=ckdjO zZsG8Y0LV8|afusP_S_uCZG&JRhnro?Ft}8_nQ&r%qxR%iwk!eFuGG7^FLWqEj(=2L zMN|Hr6bvuwr_o?Uu?|bi^x|j?3#Whl}f2;-k*)9jg!@4 zDOR4VcI{;gEY|e_x2yD6py+I|egb^0+yhjkjut9zJCvq9Q?T1d>E$+Ehb3~-<57numM8;?g!F)M-*RDk z6&gX{`iB%)h-7fj_III00otcx84ZQCkn{_q)3NuFcX+pvirc z&^7p?1xY`ZT&Bo=-cN-I;arVz;7>fljx;85uBIKl1Ln-Py>&F=tB|B5k|_Q#%c+S8 zUh`rYQ$7=h(#3zf8c+hR-&g1YZU)VzTG}C3*VS*3o?3^-`@7%C2 zCyb}Xz^zHXYZHarq0_G{73xt=C%YBNx?s%VW)=J;QPn}Zy4jkA7WIn4ZXx1ng@EMY zF?|Wv=P4-VzGVS{;dLcBN&5%*Cl@???0Fp%3^@C7n-Z95{*S*jt3Gc;s9M+2LSlo| zykQ|v8Y%BwhJ-a-R_v(#`=q~=TE=Z4o0j1-wqgrVcy{s4L`&OpV(1Hm*07Fv-4To#wT!D7l!9TD!`+SGpn-6*kkwh`W#_+~GEz(r5(K z+(eG4Czv$Vdz{Gjih~>5$m=)tZ;^g}u6B)v+&;!Yx_6LI)FQY$V%C3;e?!mexCE{i3HE_LP&rh76=x0)BXzlmVzT?#_{B{j;`f@L*?((2!0^uQD@ z+kLOT{nm@c39s|h5$DPi1i0I!KOg3X+>~N7x2?LD5=ymi0xW;hZmS(-k_10;%~;_O zy7Y*njfwpk`Nt0@V?TCUZ>ri%CwrY8_0JI8p%|v!c=^$Aj_jNQkLtT=0~uwNoZuYq zKc)QiAH-PxZU{Wo4~o33E;1QzZRUxCr{=s`V(-rn>;SvS7jYP2>T;@6}pjYp1MXgE=GDl}M%z)&gd@-2o);^Suu-ytEx z2-@TH?GzFl5p4@OV1odlW^O?%IO@O~5!$qu%pO?s5A|!)eeU_B#lX5O zh(V!nN$&*gBB{jtn8*t0A<_w2t} z0A4h&Vy)Y&3KE_}JzFfq5gEZ(Tlli4A;+998It_NX#sS|A=GGYaD=e7%m-UFA>2ZI ze#tu}4*fH`V}ym<%^;{PD{!17_-KEntti|_PDdKR<(q{RPK|s_VL99r{KK5dG4#oq zGf5f%kIwAJOnVvU0Q6eREtEM%cy0!TrH^k8b6?~#wmgnQMM{th&x%2}A z&&01Wi0!j9E%hTZ7N%eeO$)1_s$kpwZ(LD=?Z4w?{f-0_BE8WKz_8`j(W7C2WXsbJ z&^OzdOU59#fl^CN@8!G|j9|pT`F=`X&K2RO@3zdg6VNDN>UkEqHRe~@XO#;|)DHgz zUPtSN2|0zmc&O_g!MYD(%n*tzMx@mB4uBE0^M#XNXnadwXXPT#F=D zP^okE_l?avsahA-4^Q>@ zr}~4P%rCA@-v%`}cn!L!qVg`Y2cZ@k{DAvqN%T{x?TETnr3z@j&aLOsytK?gcUU%> z>qB_IL{NHiM^S$ntqbeaA3Z6Tj9I7;seBRtu09Dj=WoE&YbNEM=vC%!MM4W@76U~l z`G-}5gnGDX<`rHa-V#irsz>dw#GI@rIEhzO=8x2L^!w)}+h9FF1wad*9H%;PS(@A? zHiB`C-Kk^oM`Q%ptJS58_HeIQXP03c(9PZHb7iq)&_YNVjT_@A&YSjilw-lB%5C$9 z@>7!c3_-{k+mEc}Ke(pq+?+-%#eG~Wn`AwoVn|d9vH>1UIzWRjuKVrO2aOKj6|NMu^74}m{ulmAEb9n)#`-0ZwHA$m`*=GSM`5}+2!eV2dbsoQ~)kTKW ziqB1#L#o+hb9xu|L)KQ7!LIzoUF&@hg~!BUK!DkQ*V8uEnuI7XrTy*)><`-C3j;-G zRKT5FV~@y_{Z1NzFWv0%Jj3~eMYHbf>37=3hAS|Zzu0$rQ+fQVTbWqyVA0o0*eo}| za=1)ttk24XPPLZm=px01wU7pNiMYFmLH=ffJy2_Wsg3N?k=*d0jt z!N!W#j#fg-cam->^=f4q8LTK#4pI@<5Li)N=XiTw!R4H%%WKmT%r8!`2fJ5^eam5t z5<7jdX_sYV2OBqid4*ik`c^A+iP`fm7QlY7lI;N7INOo^u7`w5C$51H7*Rx~G24QE zC98cJ*mUw233GWT(KYFy2{`Qp92QPX$DI;@A7eVd#oO)_Zs7dTo;g|G8_Awq@pn3Z zgajd?ogUsJJJn9@2zVCRS#mt0PKu!q(0=c35VjzhtMIP^>i&l0O0owTNpWSF;C>!e zKSBs`ztnVH7p+~3usAEiyIH$lV_ib4;4*uoA?xk!M<0X^4umx+s0pV>Og4dP>O2i< zpEZ-3EqhB+e#$pN_|gIOGZc}RW#^r3M)MMM<6I zWZxx;w(%K|{+byn7{_1{cpIgTQqgaEh;6n-AFo`x%YprVoc`T*W-@9BFpq(pl&O7^ z7(q+`kW%CU@9a|>&ts_xtlFfLy$0Ij^=De!fO!DqSV{Pr=0!@cnG_1_Gt-itX$Z=~ z2)GX!g<=?kNF!MAE+0YOQx(yyrdr7wyz!7aADVZD!cIe;Vl+l~EvMvJrQCd@vUHV>5S^5^8=4&Xxk@DYMa~>! zkNP;MaPJ+_%=?TPcq-Vl8GR*A)KGU`gD{A55OqB-4M?EXr1B_o2XYIcYcb*xZrbDS z8##BUSPK#vp!djtvuf8id3=kw2y9-Dz7j19VZI>(u6zX?NtL0dB0Ne3k*=d zf2DCvj}<;_;qj&(RQ1tigg%>2hHrXAAW52K1q*VN?A0jwRYM_(3+|tfJGG8PKAZB0(ke!``!ZC2^w-=k z;IXD`3z{&)-0`b6?7!WfTyIF;(%bNQWKa9vh-i}fc^PkrPPa|0_;5|sUQO3A;J!E$ zoiunZ!5q04eH9N=2OKY8Wa>TzKXXqgJ`vmb%x3*QN$}y8=^>O%!n<8pD5@j19i-%$ z9_gfhr6=``{5M?U$ITQf%omq8uSKKU^hbrplN55#-;3uBrF-Vc~BJs~ok`#r0SZ{l^cE9?m3POoSacA9rRj^X`_WL2zqK4kXvNhXo z-u)=hr(bg)dIy1lTi6^nda@mSLVWhbi{W(nX?;wmEdsK31@1NcfWbxb>_K=?O;PIy zCM)CJ1f*%Th|-8ftrz04+~W_A2tWMrtA1;9t#ozO5Kzg2`uc2Z3yz9LSeP z(?9O9U#GtLLH)8C@r?)d#&nrWiSVECYd9futa~NC<{Wu-O>+ZIGjwx!N-9GA#*((U zuNUA%R^F>K3h(1N#ZM<_a{|g_j>Bab$NKB#6f%$HyM?$aLGuocjwUHG^3tT{d#t~t zUTq&T3b)4r-dDP7sB%lc!nxQ!^4KAPlIlANdMV|@16$2Q?{3fY^G z*C$4IJk?6H>IVvI!nK?&fh~ckU&@P>Zs_0>j+g{0Nxe4GLT8r7R%=_-pc{H0lZ(W@ zvEOnzIAwC`J}dzg?%VS|rVLjf?zJ@#CZ}l)n6HHdXG=9dfJB9Bcxp~Dc_^EwW0Tv_q<<8Bi|@LZaFAC;)i-CW3C=$4S?O{Vt)gC(grZ&? ze@nO2q13%7kn_hh%$Jg&lYe`TgQi`>!ad2Oy+7U~p3$9rmC(N{_&49FQJz3B8sD)M z{vplTJ!-6G_ge6T#5DA@2=+NRTv4*-1vTp3V@nC?XfLz%WtkrIOGQ4sw_-v$swW;N z13!iE%D3!VEy>u?<2)KJG|ht{3eqw;blw05&T z3)5?kyf}3qn>7F~NOVs%B(fgYO_-DRr?s1(kJbDuG5=b!(RhAl9k>s4e{Tj3_}&#? ziC&HA^V~1RWTN7nu%yXvERE`XmA_-ETd!w?R5JGR2&m0UrPWW^*B45N`hGIhkpSQV zC6~B5t_X7S&QUZQSL4@AQ`idil3BEo_77*)vm~i`v%@chSq!)8Q5BXkQe>USx(;t= zC+@i^McIOwVE`%6*KkSLE4aInMzv_Y>*ng-EV&U>lpoe+xVWJ!NeW32x`M8I>jJ%v?n7*OcWG#E6 z3aZK6SHAU**d}U|7-K)_;3&aukO&k6>F{i40vKXw#b|;(a9AtxEj(*v@ zM`guTil0(`16WM+LXV~n8Zwiu4Qh3iPOA6%;@@w*8f5w<#az~Oai{Gy+lO0hqfB=f zB#DVchaASM8VdiolI%o6El*i!mZuNY=x9;n8@NY4 zeR7YQ?y-9C4+g?rb_Kgvyne-mK?&+N8UaH%vsoI*dnTJZOLr-^o0KmiO_p$?RmnAUTMAjMcf_SCckw;j5yaT(hg%HoZf4rM1 zU9W~@GPE10QkYW5>r-$8DG5{@Er2kUebdgI>mHyUaw>0JlI8*pXlC1rETci(Dv$A4 zrNe*2#co$jme2oUdLnY(Z&x_z$FC)vg*=gb9JUk_OlP;7BvZ^WqPsu>&u+mv>2w1u z#qY9XTu;`Ui%>fnL7(hDW26p@Ib>`<@*=(xjq>1@-7 zeI+RQ{Ze=Tbr#Y&&e^ARYKe!e&iBsSolPx!L}cosHz1zg8{aoRepS^!Vh44zv)6Vy zKf6;_=s3}t#EJ+Nmg$6S@jV~)lTSVS{vvWx47MZFBA(;f#0H%~UA{q&eKQ9f6`#AA ztyf@t=yYjpk4o|1haXT#^GBC(#x`9h$*+x|p_VZnC~{E@P)bmqi@!qL5k98 z-6SamxPNp&G(`-CuXB>&#q87V(FldSuU8)22_BRhY+NJB+uhr{yJ|S!BS41=Er(sT z9?pQzCKjNs zjN{q|JecRYbDN;V)#%b6t=shgrBlBa=!N=~9o%q%5wacPO*rYsu)vvbpS>ZAR(LH7 zDNN3@Ax<($()?39c~q~xv?8~LC3|*mG`KJ*hDyCX)fA`}C)RAl>KB6T(`3RLfjVEn@>mGI$gjS0q^&92fXDHJ^tCV*ijxa+4OYm zGW3(Q6HPX+*U3~OL0;7GLDPqeGM6$$VtlJ9!#S_rYp<}wqExG1941;*ZqFf*^a@*2 z|J478`@8=^rRKL6eYs@z2Q=j~l`GZdIv4hAo=X$YAl0~#7#YH6tC`iygvN$2GJPyb z*^6;g;rIl^xCPnDeVP&qrM=~Ng;J!!D~NCWc1sIylBJI)V56nQ)4qABVIf4{$LNbi zU#YHD%sQ(~HYnqJ*6w!%5K%)%)H}>w zbB2eH_-q7w(s?i>s?7#G=V@h$%~lO&$y_;%G;qw-C`rPN1V(& z|NgI_aWwOYHa)>%OYmM4Z~zKpnirg%UC~(Vsh^*@Ltdw^?@St@E|FdS?s)=-mUi#o z?GH1fd!Rq1R&T>|KqqI@pPiXBE=IpKMmPRCHbJqyp}mUpsmzXw`%r{(CWgj$>YL@2 zH4;52exkBJ_J|ktvhxacA-|1*YU;W$t{=if9a0Qqy+eFK)4Y25tU~SD-%U%6m71sr zZMV_b*+c)*J-Se`cce%48}s%odKZ}$r^=lyGb%Kjk7I!5h(W4xN2F(OrTldNSW2J=O4K4s8ZHxhi-KU`daNY#!jrQ(Nd^1F zj`$lQDMd{jpS3$s-{)Hi-R6Bm{596|!h8LDPf@axZ7rY9K?U_snM)4SzcLrz90N2r z{z3%<>4iw7>bkYjPCVifMD2m>!vvy>lL|rG$pM2F0=J`F%+V=6XD-k>4OCVL48Oq=~Od0(xPrXH(Az~FzMDS8P1{_kTr3&& z6U-6Bf7IkRP)2*mu1C=ZSp=g~4e}gjZ&W^;7eEAZtO`iQl+nzomJ1`;D86bY zLr=4Uk~PN^rc>@(+e6*drtj6{Hyg#~?h$W!Hjuyr)IyE+OJbB)(Ljx)@^d2ofc1pnXcv0-Ks=pVN;^R< zC02h{Spvr4>)sq0Z12O+bqwdqvFqwRj&5h>R@^+HmOOIcR2irB7@ab*Y>AO-1o*A^ z1%|WE0UY1Avl1{c67~m&c7EZ4h87-QVB4F=I~2|zL@TSFU87EF9sH8Co0efSlf>+N z_4mcfz@N+rb?5A%Sbnax@C})XTxIL!!^C)1AFVFs$@g8*Ern!0_ViaTUQ=4?4LWmL z3J#laSQunsKQ?Yg(*4U)tQjO51&v>|1!^L#$#~x%C?9p!U%pIUhb?Xgv#|QFBmOe- z9kJ8$AgUzpS|ppVS4L5l1A@PMDa16#xTkq`6cUCJj{X@~xGTO_U~d%-+P8W^2?>4! zFOouco2Emfy2N2Ij=9_S&tmO*VhmjZndNJZxM2N+BNkrarv*8*EuQpqAwAMcxZy@Ou-zrwQ1*0Xi zi{P-g&YM~sMs8BBk#{5;kk*&`mu9?9NcYGm2`iYMsn%^jbWvM;^lmmJkn-Ty{zLPG z+4rv&DP#08wZDGVEzPZi<_(M1;#ZNkKdw?KVZ2ZS1IG>92IJob&~q8ViT>MX=_dhQ z!RUz`Y26m=*S%ow)S^)^iv2RRS099KH*)?tboi}mhGBjt|L)q>0F#|C@H@|pwfbSI z#bu_s$FcjnyudL($2=gHZgN@vw=m$%tJoXcw^1q|x?a@${k8boIU2wufB!CcSFSSK zqVW$A>k(s7#Puv+l(KzzOgh0BjV2dKrc@th(zW{{T6v$)W;G-jXANS8V}bYO=aJhp z@`+q3|E!=z7pAqBPZmP&j>F+EEE^6yjx^?4)!s9W{0#-{A1gq*TjiF{oDTP_zh-DS zPdkzY0MYuIU^!9dOAL>TuAFrF4ZDw`i+CIQ)7ar$-QcJm!TG&5Dr02u*aWN=>If9v z1vOyvY9jmo(1aY7p{AkhO8W1Ov-J&aT%StZx8>%}y_MlEEoJGHMguT(+MxH8l=#Ct zU+rBqw+(ieDI3F@&zcupt}Qr*rXLlOSkkR!Uk0#^5vRg+`!F{HEiM&Nz3dRnTsq_o z-q(`02RM6SVULQ254G6`I2zQCbd4+JY}D)WS3lm(!J3a`B>xVi{EY!Djn#2LBrx|W(uf0lkZU<23f3pX zff`yKbSiD}-x};wU*$CpeqGmu0^btB`ImiLE_$m5)I*hLS~x3FI-_8F*c4=_r%O+r ztl2sW^_Ix&+J^1?%CS;SUYw=H@+PCWiIo3_aa6S>*s9!Q0+95*d%p&a2LXD$u$f6nagRgho>8S0QvWV*-F@_h zaeFA=V8?6g`FMHl$NG=^TyGWc4`3;e2n7W8@W1X)cfsdc_*j0?y%d&- zG$a_iwIjXVgsa&D3(+_3NYj0&)Gbq@=<(}<*A6QtG=lU0c~8S*-l|l7^XMU_Y%m8m zgg)C_ZUmcEV3zuw>P;Iny-cW)`oibx^t+S%&7_^;rlW${yjSvI?@AMdw>ugHNLSZR zcKU1=?(Y52|GG55j~%D3BF8Vew%@2r#)STmyxdGz=;X%DaqhZkSO@p`Wr`!zmq#o{ zM?(5dqs~W5SN$ZcBggq_Ua8)UyZn$se7y?dI8QfSrGI7CdXR-L-gG_VslbK*e%OgS zweqqidL;0t_g~3U{`(V!FLAUWweaK-Tim#DFqyEg!M@jNlu=Pg?RE z+lRS;TNt(%7$=jbD=6c(QSYQRZ8OYuA|bIv3>Q-h`&jM`zP`ykQ&C`D8zR?tkxR{k z%T;UIsQw-#NTA2?i(TEne^6Ymq)T*yH{J7fG`=)2X6{`%s6zm#DPUr-g#n_(0!1}YKY z)jBOvEe1S!Twge@W@nOqU~9g6{lToy%^V!2@DuN((H|K!&Wr+<)H3phAaChVLOTK2 zZS=>>$Wjp8zkLZ3a#K;KD2e7F0CmOduklqCiN1$(5mqiF0cf6bFtVp~P(^$%*u#AT zEmj$996!qsOlA7=?|??UsGlb?fDMPz8mU_n^}F$u!J|xyyFB5`Wk1t(bshcJ1(z=& zAMWC~`CFaJ8n_{jxDPfxX6L7PAr_e4OxHOBw!MI`%K5;^t7P3{2d?@|t(vb>OOl@W z;JRx;g{GKS-<}biEI*absWR=mS*eSoQtlVdA3VD9YVzUrs%Dc9$9xxNJvKmqBPKHN zcLh{EH86TN{n+xf>0i0)-eX2$_z3>NGvR-zH5sqJkz>pZ&wOMG!0gQ=;IT4$GGnRg z_Hu8(veCmXx+=m7g#J79o~3xi$dOExqu^Tj5C&S3YZH46JRS;g!>Zqp7^mc%6CH>3 zegk5d@hNfT^$$2Q=x?yY+4`f{MCi1~LJDqoNbrF8!ybKdyOp46ta#o!izZ_cAwjUP zO5PK_iS0MGQVSyO6G)WE@5LD7De47>3C`A8Y8+E`B2{m+axYQ;mUai}HA9S*j#^q=CVVEx&XcGwP8^ zF+ppMW3e+=d6VI@UAW!|23T*LUZR<%Cw+&sZ;un*qWiOFA5WDkvfZIlKTz z+c1YEOroTw<@S0wwk~qR!J&z-$FOTPQ0eMlqh6awxJy=iF_%5C>^XUWBO0seJ|ji! zv>!AoN~_>hGx#+J*V=otNkH|l%cX+5Tn{fYbpG?O7)r1}u}yK0815@u^>?^vvSP8r z;jzKY=Q%D7(ZS)0wR8CXc=zB?TmtVn@SII} znh$}Kt$y95;{6j_bj_{+PEes1z3ltrd|Qv)n*I=&CU{Y(P%0FI>;uC|1#Ll{vPmC3 zcf^sTlL1;Y7nio-B5V|Kpqf zb=z>S1``b?@ck2VYQ7i!(+`mAjhosZkpb5aa@gM|MBzNU>3{PXxM$_B=D&toGoQ{gAG-hgF8Sa7mdA5ROw5EDRRH@c|MQ>T zF2KPF{Cr#J2MYf>z5n?a@y0!Tnvr|}MgM=f+_^!IR7k1)s&n--BX<7d{rlGkL&6?W zqhKcwLvQ@kbDzg}Nz7M*lJ&nl_W$wAHmx%Mb(>d*Ef*yI}BA`6TqOg+d>23<*Es@=6m^0s~Mf z|HcXag;!3y3QrWudqpPlTo`ySeGs|w$2J1nsp>i4Tn(m;MurD9BhZ~Y)0aNHx&oNW z>LACHwwg0V)*yrn)aWxqW)h}PP_mt1dnHg{i30{;@pCEV-k#-{|}p9u_> zpiw6kG)Ch`roe0APJPFR~U1s3wte2o;%jm(A zplLp6RAXxSEsidR1@uKe4%7kIxm(2(t@^RxBoWDCvZwrsq-6+a40;p zP|$gI2U|sVAtQ>z+ZHV~Uv{h|aB>d3w z8$9eFZfz5ou%t>2YN5mfO?f^6sIqY$z!uwT;;8s^z68N&9II2iaf_}oqHl}0!ET@n zWq0g>*8p@$9u>F$mJ1{mt>dC&ixkA9KsnwhgM9oiW(0Yo*lq!-#!gp7HzFJPg3)XktkIZh)Js038ib^(Q{V0Mx1` zroNoKz@TpRGk^ltW}!RcZ7XBI=*WFs>W??8lDCjteD>>W4AwYNAOY#rd3g422?)1X z;W4J{Vj!BO{-JL+mVioE)(xOuHXdV)sEspjlZK+9dmH$-n*jAFnLl@EV`NNPzKHLs>S-b22u zFZRH8cK3$)lN`@lE24{RNp$uOhN;a`Z(#g!0RU%fW8?$zZ85tJX1NLG@nFP{pVoNJ zZf|D*khZZl@I9u**|}`>56&%wnI=62zQHTa{|KL{g0FpSN1W+-QRE&6^_yB4*ms)r zkc>rx!B(-cJrL%AXuxEn^(^1ETO`9Kg&AGfPLssPm`o+`Rl zh;-_oVVFIs{)76@OX7tg1DMG_(cc$|D7AZ}AmU1U?Wf=1qyH(bcb$<1*Wh7?f!QQ= z7IryZp4UL)3ursg#WMI$R~F;_D<#S&x(uN78FK5KWR@LCVPqgK^!Jcbc<&zFVWbp* z2gC;Z0J7Kw`SWGq5>v6pOW+d6f*RC?fU4AB?^qz@o*N3|M&5be@T=uSv_;^I`#cCF z_Ngxfn!vWBOU!l2IyE{)J!!X7id@X?{Wr-^khvA=ZVa8%hhoM4E z;2`~jfHMQ{Y-QpkC;2OB3BS9vGe!s(f^n(jM%mTZnSVa6>FN|%+RYoBIz@kfz9;mT znM>_m18yt4z4Ms0j_~?&4gl+1Yh%}8ctRj)f}hR1)(GrJG%-_+7m9l()C^Be%40eU zX8<-Bek=`rOc+X)3bZM1tGxlRu+Vk5x-^VXJSM9E>1;hQxf2vPyhCr|R;W>Lj6<{a z3GRFd`U2*55x>G^xDfZ;jZ!>fE@B#~3@SA-#JwxL;V`xz=D0Y#J;%_l*>20C#Yvq8 zS3-QeeEk?9$2GW+64k3U^TZpkdB=$#hPLtHKuEp_@c)MWCT%BG0JJ6;j&ae_E)W{` z?0U(0KLfwNokk`@@-kjhxAN;d5fqR-ty+i&*j3M6pZ)NxH+!@P(D45OnN>CMrTc_` zbmpWm3hM&0<}JCXqiOC}-8~J)AF8`90XRXESFe)M=ti#BQ4pO0hP|z9WQi~FpJ1_v zKCI}Qg8;<_X0bDhD04C04cq_DE)fil2nRwUHPWFMLMU2O*HDM&oy} z95LM4O0sMScyO*>|J?j(hF}Qj3d}RtNoXJQ1^$R_1%mk<+5IY>?ut8$<$QjI3tjgv zHSE@UvG3p%?*gw+x84&lIb@JiNB_C9g&KnC*?ta7}9VLd_D8u)31UJ ziFB%+uws&+_PWW6dof#>O^Y4E+a*FhUx};f;gS^C^rJ&XcWx#;#fSA#O@N2iFp95e z9#8X)EmlQnrw%BWm);*z|DSva7-SWGvrqKAM?&u0CyePjxD_Q+k8F^(3Hy8()Vl2e zq#^jtP?LVTVN7NcSh%Z@Fuhg$D-U4dl|n_5sCg&wPJ{;L2lmDPbt^3Z>ja(^n!;~A zir{)*W*IuZ3nJKtdv<|Fgf~c12CDcca?|~Y#A8|k1$l@(!46nTB;vnQ4Azw}kxKLa*0q(i6)*#!s0frKgt!A{2% zcrPe0xiV>DLlMVlmmX+Ar|IhNz*=+#I}A=iypgh-gy!0Slck<&g4aT>n<%J(8jKel z>uP-$Uq()`{uLGE#Nr$Ws=thB%AWJW$RMd1V2lYFf1NsoSy`j+on6F)iJA31@TR4< zebb+HhQWasU>+_F@L5m|P!%=XvlHy-;rjF%lbyd;^-^~Omh(VH-~LN2C!0jkMK-HV zaOd(NOT^Os(A!ZGzx?#ca(#BNAIK^#1oZfg3EZ3lu4dUvmPPk04%}r-$gCKHvuEF; zOm>(FTVZs@ZbY@cT%Y$j( zmr!*IY$(FLhFmfs#>LPLA}j(Nspz1N`|8+6ks(HWbS2^N!@Td4vX9EN75) z5Wb4NymhDw?b9IXN_c9tCq>VQdp>@xK4bCw8RsMOp7FmiHdI7E9 z&&JB)5VJ_93Ixo28!$qKHQVIL0+(_W~hlw|q!nfs%DG?hztk9UDs|HaU7VcL6b zKP}%@%QlB^n4Pw8H}?^12F#m?NoTDt+~=VPz~w7g7dnFv&OZtTSkfPdqQp=yaew<& ze?K{ULfD!61L@G3+Dd957K5bumB@WqeB4vX1Kl$hG<^W`;-O!YmZ%;NZM0n%QQ%> z^TiSdvt9&?q5`(E96O`&0|M;JSD5MSu|jCi{=5?u`r^#dg^|LmDNS24kk9f?lF6Dc z_B@C4oM1ZAI~S|b4#9?VB(&o+_wic6B=pz`13ZtO68nJ1mr{Qp@P!S_F_@(4+DAdC zbzW3NbVJEE^r$(5YJ24J2ZJ(2q%?%67_Z+xsvzD5UbrqHFG)?ocB)sg*ZEy;?w$!F1HbQ?1B`%hpgrK2g1%R1o z{H0tp#{3f{%}7;Ag!4}%dmg$v&2kF_q*q3Ix7d>xhC2*ZMP9Billa<8SUl*`Am8wk zT{*XqG06uh#%nF(0cd~P}865oZd8tht@;R{=s#)BJO}C zJ|vyIjkKu*H_pqwT5rW-4M~8MU!rz^T3$Kdp)cb^{?S~0$GZ?0Nw;p(dwW$AK)#A* zsG$Z{lZ?7w4ouznbWrQyFqH-xLM7&a8&kq z(%omQ{&6`#mHa?!_+L*Y)kxV@XkmO4UyeFiCk8+HHXf_5{DxGzKeu*0y5o?k&_lFHYdYI z;~1~KHL#afR+{F;%D0gjZo5Qic1CVC8amTMRff^L#WW2ABBt-{_)hM|84)JD3`6te z`1XF^S3br{gt^v&B}*ORw%Pp3whJW--M+50z7_Ep3BQh9{vLbIhb;dpeBIQL>bh>Z zY;z7i0-laaBK?EIepdiE86wV`SLl=>=lKucMTDq>GO+!`(5}55)u0qPwv!^8=RB##{+0~ z^@j#zTr-`T#F?mtt(;RW9EDl~IYBDHS$Dp!S7Y+599QwjP5yYh!p$-9bca5hqF2Mk zE!C|vA0na#OV-9_URwB6vCPtQrn<#XM{cd0ym{HmrJqjvw0H%9v=AIvcU8z>?rge$ zSVL^3OOhUHDjFL2XHo{N%aJ`_xT{F_+k%&Gke;l$NQ*z5NG zW71VgmCx&DeiwuKb1S5S2vE3C(R8Z{k|=?{PP{hidbR!I|~ZjkDFMh@Xvz>pRwE zJo9~Uni-SllEoB;RtsYwfLF5xp6pY%^1H^ig9P0Vt zm*Cq*vMn49)yz7YRJc$Cs1VF8KR9R4akC*2T5l4rf*m%lXBufLks+4@=&Ss_`xMj~ zW-Hr~^P9~|DcpXcHK{*Wv7ypr>>iMkVS(Bz=kh?ouSTI%E;n4G5{u|&z$GSwT(C%a z>JT8ofYxAGW1}V-b^l!XZ<(1IS30KFoJF#f0QgX7WyHwW*8>VAR@**CB6_Yys>o>m zcc_w1xYOaZ0CnHf^7Y#x^j8^t*{$>gU=sj6y`_h_j;CE`ZU)g!Hd0Pbi`@Xp=nOqe z=lkow%wK>ceKS)D4*}@!_=`~VQ7-Cr7});-e&B{a zM;A1Yitw-=IqKp`&t)naS40Y9na$pIv!*a(jp`jz>Hq)7Rbw<1VCimtQ1O5;8n759 zby9%IBJPO=k+2K{$c`tg-!*>?eolG1h}dAjRz>tC16VD&HY-**gks|s5VG5Vszt<~ z83lGB3ZiNYEmB0Qh8X8o{`b%VmnhFGK;R?6FY+>U>|s5AZB*x7maXf0qP7Y)jIG09 z!vD>^pa(;kg<(Op*|7+roo}HwPsEo(CVr#`*uKcdk*&6u0!G1oF$scG3NWGKJZhrc zH}(~wyaw-j!;~MWu!gkr)&|nw+L>Yo%Y2EY(A-iyfwh3uf6(WB&n$Mzo2q>&!F0jpj@c7BSugPDk`weM^R-SYHDh1zTknTkR&rqWui2JO_J>0+d=cVUTATCHqp5q>FVVnoCP)+STg4N$} zZnO#VXqhkx13J{*Z#o(J@50yy(fdW{{E^G-jPw$J-5V z;@vom$`n0aNBycZu*v}4=IIFdSwEF}c5xJ#g9dVyn~{m7>Fi-QZOYSIJQ3rl)3oFJ zl6!xFDXopDl1ZynWsJ~(FR+cN-+a-|UBl0QlQckJuE; z$$nsyf@LBv6SUTfz?a)ffN3>YXuw=DS*c*Ea(zUHW}?U$n$72sGFv;yy6}^m|2}HS zcJ^h741q`vx_R-cnOV=CH;2pdD5UxR!hy(YY=0f^M`FwbcrfredViL>_Q0t;{1Jp~ z5$6n_%(l1i^XG+GJeU1k9>p*bucWB^p^yok$cihz564{b9I2Ve*PXoDV6y!Z=Y=cN z_04}Ec%H*t;ejNci*_^4?1}$T8?MlGgb$axsvTa3!MgVgL(h->+0IkgVJfz=UNd8v zpz7b=+e*eWN8&~Oz(?qzOw;-WufvG8&-_qo3cuLK^}l{-P3@@zR?z67GWB#MI$8>CVJBZP>i-w-{-% zi!-hK_w@E}FTe6<<$R#7Vk0(}>p?tr%86>F`uzaR?@vCvU)Di@lvrmlkelj!y!+^q zNQ_s0Ooce+Ww0lD3?@PRLG>X>l4yuw&Kz$uHHd#o z<=28bw@Ce|qizq!!FkXiJvjzMS{d8Hkn!+=qM$AF-^CihQ97}AKG+x)d70Vq;Ew~g zOS*;ZEQIk3lw!H;MapzpzGdUoF&_7=)$AJCFXa&dU>T>(;$i$u)!= zNLv^?81=+HeJ}KgFkOVkGAWk+10R;wXqc0{&Lyy0QS@yS$dahWzgt-Z8pUarcZ3iv zP}um!Qiwv@m_4g1Sp}XEOl~^{K9K|y2XJG~PhR4dPDL@b^TDmMkAWuP#+n&~j%EKF zS|&E>d5p#sUyI2cw!Li%-|s-G2pq&C)kjI&N<=sK9V{)ntwLzJ2+l{05E7=BGq2L< z7VOf-YshoE2tMOH_Axpas`cD!>=e8*uaOb z^s>O_ny|WS#*oM3nwb8T3223u@08a#e}Z2aC;=Z(gU6e zDst1~Zoh|T!;}=M>CJ;?MR3K1*awvdayai7mvKrlaQ3Jb8x>k9;Xj0T!b*Whn|uUU zi*AvXy`+%tKwFuteKB7{ zsaUvc7?N@Ce%akO(TG=c$lK5wib!VHF+#Brf^a#Ub@Quw-zP|Z*GLVKP>cXglGv0p zBw9{Om8N1cBr=eSe7S!w{rBtj@6EM&o*LX}te(ct#y#tug}K)|G(L5ag-Z_C1|qKW z{8i2D#y#5@B7?;tsfgGK)>UB9i>07co)}9j|M$yHV0)Oa>v2H1QU&MELf(TKa*ctY zP|RM*&s71Z*2bS;xctV>t-nZP|}>NlAme$!BlE?ik~VV9`#n zi+0S)e#xAGye#>xR3x#lD{&DwgLl)ZvC_$aDE~1=7pm+mBCeR0Jcy1g8OP+c-XH50 zDk1un9Hx3jQMoq;ZC)z7j2qr9dC||j6c!GcD*Yh#JBmeQIfP2&u2cM=)TO6Iu{1)b7f|v(H05$b5?P=u%rDuoK8# z#cqlO^cT$rk$3d=)_KtaVP8M@@8K(fjAkrI4nI!TQmyR;TVJV1xv$jZlV43+$Y0t$aIC49rE;}IP=0x| z-yAcwGwy`TKz7ehisvbLI<*=eMccJA)*MZswN1?p(o@zEp=^9L93&yEv%x>JvnhCp z!{?zegAn855&qUJ^yY(Wb8lYHuI@*IJ{D<9d_zA~6Qzu{r)8y`)DPiDw19|WmwDl4 z8P|tAyHxoZ`Nc@>kRAUXMOF~L%5^zm=NrNsoDFyaX9({W);IS*ZdX?NhBJ7}k9d9= z>Pm#4@4?!GzB;3|WRNAoPGRI574T~yP$>Q_)G4Z&_UqXfJD$k-A}Kkop)dDyZ0Aqs zXKexsjfH-3bfsi6Owc4*DqJqpoP;ADAw0}f#zj^|Z$7LaFE39{`-hR&QozU=V_JSX z^!SnSt854k*Iw3LH$2Rs@=O7l+I2O_>WOLGtz|$%_`Y>{Fyj z$&S4Yq+fmAVChHm?(S_Ux3AP5*ViEE&U!$Dx^GfI z(ZF?P>Y&tI9~+r%0ZKc($ZtzSYB(?}!~fyiV$5(EE-QJUp^;rgG0Svs?GN2FG^r$z z*L;^nnqr$T>UP*N6XRI*iD^5NBy;tq%sU%MP+HI@9{m@HF`~|LCJTlLtD#WYO zl0DLIJaY4uZS#FGsFqHB^|L=;ikxhu6hZ0jlMy9?%ht>u7Q!LJ)h%8RT$S`MZE#$b z27^oA&%(DNyzP3D20aGJOM3|~Ch>xmE1)G5vs{9VX;Bfk8ay$R!IeDd4MVVvyzA5b z4;Q};`bNX78SCHs&<8Z?GTv znN5@DATl`U9q_-+UdS5FAS_{I8h*+oeKoHqvY3>cJ)A9Bkj}q44vbV?%)=Vo z?)K`tuOU|;biiEErWXK1K0Cw;R3jp{@s&)7{z+6coa*q}Pp5Vv=7&$AP*9vXZTxw) znDFsX1fq3*=zYG=guK%84;uVzbn-4}PKpg3=hmM#IIu&B77b^z>RPXOk1xp!>layZ zmsJZ{LnSUd%C+xlccM^&a%qX&m!T~`m~cOJMpz%_ghfVOmm>-tcv2HIbXG@kY_dO_ z^K4s3{nd6iU{|WNp-HpM)ZkZzU8?Kq70ZtHdBhU73{hs6@)JyiaWm~kyru}VeyC$S zm%r$H(XF>_#=e&#*#PMM2Iuq=;ZySNKC}&(+GI-x9)(^!g@LB|%&BH3XB^pXEGB7t zA`v-pXX2OSz@q9G@Anze7^{T~pv6$AxW_ju=EY0G zy^L?f414;rY#7tt$Ber5kDwHBrf_?g4LRT{@Z1RlE!uw`bj@0_nM;e%)J|AyT5Hy7&Fo__NAW*& zrQ;+$YGLryxx%NRQaWNli4ND?rcB z9C3yeY_UHm%zOR%@a*RUjms~pmdzZg@yMc^%Y11jzY93{%DmnCqyTjAqc3Rm)c$*^ z+|cInIoK*K(>l9YWqYR1R+2@6L#E>P)Sz$pQf&8}9Qw{-WcYqxtwEnQ)dKspV99lF zaa31D%0#G81^ znUXt)6pr=DXy^(TyM-=flyZFs5rU^sg$xd^wY$vHWAfHoD6mMV_3nF>b;nz}8&X#; zTZ!unm4Khn+HaS>DG5VmXTt)JhCwPDbK+rCTJqTE(m}M9w*@V7{I|g2RSjXkgDd{QdkCqj>7eBD{>L z=2iaX73*h~Zyz73+j+9`$cg;*9f$u>Ed)(uC`vK~nTUVz2EH4tm+ip!e_dWzhJ6AF z(LQ@0gmQs=0*6?g<;!U-66@%ONK)o80mj?lKw?E-=Z++_YZ{m90NPb*S3L7UV6Jq) zH=Mv1{_bUAQcD^8TOV4HHV;j5AC5ARB)?}lPv#F`uuH)rH@(THUaRHFZ!&zS>)y(A z$I8xUaDgknb6iQMtqoWM6N*}J!>&dK%gsyk z7(7PPay4&y2|TgATC{AfVA;vTx;6vMj8a3m&Vk|b#T+oYr0UORO zvVW`z(>2o5x)0M=_ IdxD?dgrjq!h+sELc57c^&tyD3nPv*? zT!4k-tU-5Ved+P1_r^*VS$^+Z)S*k|>Yrc=uaDz3`k1KQa_gF$snTAfLzS)=?RZ!N z7Eff2%OMYhbt=N&iRnx8+H?~a?is;U1kdpO&x&(E7r+87{I@%wnNMcl+Z)K&z(3I) z+!Z(S(bwPqYAbbH7Jh1*!jddgEzYsKs-Gb_+w1+#_(U=o_Hu!~j&MD@%>QGduZ0Qa<&TT?+F3M;Na@HXA7{J&q9N-jey^i2Y9@nHB~g1oBOsGx^||o8FF3wa-VZCGP`At46)jX)U*~CmVopDc5_9;-57h6hGzOr~ z<-8&>sYQ*@`aQyJQF($QtKHcoO5LQL=)=s#q2)?|p@sGne|IJ4WD8Uy88fY6mP_|4 zsPSW!6Z%Gbo}SrXa`m6^o(_&y14N@crRA_T6){yN3_WUZGblvV+&9 zJR!Q!gA;T@eYQ^La-0Cyrb`t>OuJ6ejWo2?fO$4+z@8HqoO|dJ3dNa7K`$AQsw4#V zr5y1+a4kwkz~yyL>G}%gZZlGLp?TEHijry9dz9A3M~i~uso>B^rjGVHs#|equIAg% z6Na$p!s69uEc^JnnT9r39SB5%K4IcJH4os}MI|qbOh5Q=^M2z2^WGWe?HGCUBR=N% zssEb27f&rbu!BYXQrtC?WBJC;yZSQp?)++rCEc<8RVfO**DOxuu|pxD2h9+?)chWJEmn4PXOe~!@>6r>-Ng1Z;89Ctx2+`)b-XWRq$y6spfYl(F)>|{XAm|Yh3{zc z+TSZ>E?ibzz|T==*v{^i9f;y=E`Qf`G0tr8T=DWe&bx%h8 zYkhNHI=JeBokXY@u*x5;zv$H7_pPlJ0Ey5JlU2N9Vz(0rxBO!>sl7(m(O9yEFViM{ zgGY9wKg+#z#X0RJa2r#5_VXw7Jhc9l<2a{}$EG#?d*CqR8=og({kf)8v1iPQ0yfgoNkpsT=ZyFoA-}=I)Hm-+ht%Jv zfoaRE=);cJ2&vC@OXHemt3YXB?uH-kgOJ@eNtdN?mczV#A)Hka+k`1Bzzc@HGo*8{SbdoB`U9~_QXhYTx=_#Q~4ap*XFJ3bBP)Gq>;oY}zgtDw9r zHvG?lSUeWCPG>%Z|M6S*_=(IC&6t57S%yHSW#vcoN2<@9NmuH0A%vud?Gas7u(hf5 z5f2dSC#;a_z7K&GtmnLnc$QXxNe_IJ?jr^?I*ao58W zhhB~VFN1yQSYdUXe?WM3Imf<-!;ai^P9uX)EB|KdbiYz_Z&wtTaN8!wkY8f9=|rsa z?RfW35t=z?JJ(3K9=`-5ww8B$^VY8j<}MSJW2#?pY6$Wu~gUKw=b-u>0d>$u8KFTaG1o?j&aKFZV?2#x|{<;IgmtQ2idT?%gYFt|2OmH9@1(=^`T26igMCadq z2x?Z5tj$J^+f~Dtyd?iSz|wAiw4`|Kq0^mYTwlWNb5{B~JjdF^07yd5l zEv*w&+S@OTF~1V^|NfqPZ2w zS6rV$6G%dlSG=Cdnqllc?vCc#!>+$^hI!+s*JNr+MZY|3B4Z5$bh6uLrF+jp1zMqSAJwD$t5} z^`ENkXc{lH9&kAxD$x|Nays%2;doEHJ92yy7O`NeP1(;oxug1_?6*#38{%%ebxKYP#c$KXFv9#lEWhI;L#krjoT^F@*H00F8$U*r`Xtq{^~2L zh1O4@^+z9zAY8`Mo+v`OH5sSAYP33%#ZShsC=<5OyC3h-B{|Mz1+;*J6Q66K62K+m zRTPq8bbc2(e{$_Cp8XBLT?;MXsm~YG;(=_X}cT?TZOL<4e>nd zQnER=Uz#Uo@2k^vu|cFJ9`#7as*RG4bR37FpmXXtaaDYH7Hz7Rt+~T~yZVdj6@x~v z!?JdV(d(%q9~T{Cv#-RVr}V{iblpvWWG$#BSCr#$s{q|p5!%r@QY&};Qy_(bF3gW{ z*Mut+OflR99Cm)3t%TklMa6$4x7Dtyvtina%x6KyH%U)RMVZTMQ-0{T`M}W6@Ft9{ zy&+T^zjtubeI?bVo4NJh8&f^#`04ij_7|__^q&W2EL#35i+^yH>On!x0ny3AwrS-^ z`mV#Xd>?8QZ#01Saps)iB{Nc7jYRnb-!FmpuFLDGMsB0+&tE(qP8pLwA_9qLZX2Jv z4F{EzLDu~`cRP@2sFh8&1(yrRcOA0XI^Jg)ay{&iWF#nt>r5W#lboJ?t_4YCcCpNO zx&TC{Zv!EM!56wVDK_W75((B-nl~FaZ>$!z2ug*;Hm*@#ouE6^^nuGW<5|kF&@q9t zLZ2nm!}3QJZ@%Tsc5z!0UshOob{L?eCc}a~D$ooGT|2K5j?~!}GV00G62=XBMU~u# zrQGJKY<8`-1cK^cI8JZsKH;(z=KD6o>t(^KVwV-K3Z&tj*}te#a@I<+iyiQFQwvn- z5OWjhJi=nVH^R2RqJ(h&aL-Tewbt)zv+TgO#(d@GPqYp8>N)MUR8p^H?6o5Wz0%R(^Xi_ zg0!Md0aBV7mp+sHP??ma|5Z_@H+j_ds5GWV?J3;YiM^i%_?hQvm^V1 zxy%*PBv%rN?zJd(qnGO)79MweLo2rM$h`Md<%OH-n3y<2tMQ^=*nS?UfW|tf9lmoX zUWt0yLl!Q~o@SY1OVjqp+mzlcgg)tS2M=D$3QE2?JmO)*b!%;vn(=BrA8@3N`pEh< zk92I#+V6P=@n6rx*V$ferfE`V->T z0Cr>+wLlc7QvsoFUn2S0#I|=$o{U`FH0%{to3Th3KBHwRVtBP0M?n1XHnhQ7a(tfi z_RrxQ|7+FYlki4A2&H6F?a8(9YRwG9+R$|Md_SX(i;cEkf@^nDa`sYl*@^g>*K&2c zI^nYlz73sh2nTjQ#l|zjeOhp-vc~Wcqa~~vMI?|bStB~mJ4u&kZn9Y`v*FT-|b6DJ@@UzRxT}y zDQr{V!|E}uFQ(6u*Svb(yWc5@&dEK?=wPv;lGQ z`J%r+ERju@=H|`2nqiMbn&h5Q=gTsx2|ED21{_aOW|Ah|4IZp#`-OW_1$0O042_5y z77_gx-^+JY?gN{c4Dm?!=Rwrje+N?b^@hRPM1gkU(%ubFmPmXr_Y*KGxb11w+@a;b z7xjyiw~n;eUmWfWi)yj+$;3Udb2ECwQ(>Xz-ABQxCo_<4OMR`lHUa6&er=4u{!X~5 z|9jLzik+7J-}4eX1beSerrfcs8-O9sZECS29B^Q(k2RxVW4tauM>VW4Qlx0B$PNYC zc6>jpiJVAjLml#&q1KvhysK@vNeIM$2K<4i|93jgp1yuJBM!Op9oWP$$W3=kIsVg% zg6DxKT50q561(4KFhtTnNu?o{dYHlG>=qzAZB^lK(#Xu@B8hRDPMDNk<9(# zg#fr!=BUFiKS<9MHtl!r^W`kftH#EcO7&oxV>54M4?VOay*65T*nC`g^LvxpeM?vm z39u-KL)O#G+&Tj@t4HG_z)>v6@kj{Nxw+Dw+ZW~mQ(0VOvit3tesmMk3|2j2CM6^I zOPp9k(IwD%*1)&+x3;&&vm2OLkY8H*mr1{0Xe8e-r5;c8X?eRL9Kma{!jlV=qPalM z3g3|Db+9Wu&(@*fVk5Yk-|X)TK7v)&A1&`WEDi(KN1L%V;H}~IEy7XCzM}!>7p;%v z;PHbe4u+>9T=YCES?Upa;^XhZe{ZVXl&B*g`$c{jEFA5XPVOwp|JoQqoH2M2-ZXZR0lJ5)#YbgY(CkOImhw{HaspDf4|XT@T6Nj z!waLQMZop$)C1ql_Ttx8t|a2Y4WD0+ao^WZ(AJmnT~W6)B*7#WoVBHK(aMwD?V;>C z27Z_8QE{Uf{p3atHR#D+uY^&4yFIHHy)YD_p^!5BP4%*-FX2&#h21Y!gTCrM8n(7j ze28lAXs2b%$w1T3rcKh-i`|GuE`eqGMtXbvr{*8*@p1kA7%o1rIJi0U&Hm4%~}JmoW*`x ze~$Axte>SXR#6-4+4@*!L(ma}ro#j;m-(l>I>#$Y)oFL_naR%&+Mgva+6BKgc6o{= zt?Di_C~&1UvJ`?H9tL8a+fUCwbS`c+yks$$3{m^`Wh@5%+oykDRDvUY4xr8JtJ?Jy z<@#2;`XKJ9?gxh0hTHmAxlB%7V_PM!s8Ab1*$R)AyI-yZ21ji&iHON8Iz!Uevz?Fc z#1e1sln~!WK@gY5%vJ2wuSV5 z^eR}8y6`siUzf*+Et5vx${WJ}U=cDx2@^a6B5^~HqQT9tY*7!1jOmiL%B$3B63`0Pkhv&}gFs=H!vDkeLv2-!E*M8f2G54F#PW zCj2yIJ6OsVED&E)s#~rQ`pH+&IhjwtISl>bHyX7 zHY5Ns-D!b{f>H{~aB)^N3_Q%hvK~{W&4NU@+Oz-=HEg4cj{zv>ta! z+7&)* zuZ^n*DUiE{B$_~Xu9m^G1?BNRO}7dR$P^r+v|O6Lo0;dM_)!a)#bGs;^F|%QIpQV> zgRgOCL?~(14w;72x*I~S-R(*%Ck#j@$LBN~qV3E7QE!`HO$_S{17@3zY*(Ker+nWk zij5(g(rlZw@`}YdiSRzYLhgNX779mFK;MrkjztsFMcUkmC!S~4|yVVIzMOg4E zi=Yv(RKHQrM>fwDK?O-?u@9~F^F-*7DN+P^wp_~#y<*a?3~OB_TxcM)GE7ngmWFfF z7g@x2CwHWCMv;e2_LX;;=)v@8+fgbPKmn?;y!^OJiS2MNx0Nj$AKirc<$@h+e#&B5JoSL3yYqwX;jNXyk%M)O_~-z!Q5Xu`8xZvvm&dCvpeGb%O;1s4|e5~chqsV_=S zy%(Fj?|P0ob0eEIZlrX2%_R$-UOI)l#n5%T9OrCkRzGg{gz&FrL*j&z7@JEz0T;)? zf9(`x(75?s*-(>~lRbQw#9_EEU_qUWd$VbwuMhIC!?WwIwd!M4tfufjx+wxd&QU#K z^UqICGLzW=jU^RlBXkHJb=8#J^DopNNjkkGGZjUd~q4qWeu9m0{{s` zN^KSdx=nM2ryjLrLzR&N>unp#u@~jP`zZsOfOADJo``3)4wk3hB#DwE0L-E35x5pPk!y2 z65M`t%H8Q=x6!*Fq}+jmrGMeR<+K0W^y{_8Q$DI%Uv4!=%9%+;gk_pXW^o)J#DuOw zkvu<*&CcMz)1oGiApdUm`nae((cJ{!WwT8+VX4H&V?B58`mK15LWE>XTR|G4E%;0? zoP2WJ?PZRZB%=fMThqs@_tv?pooqr(*mQBRJPU1yW&)$$ zgIdWuiioHeB!mcN#1bV4`343b>lDYpK>VuD2xn?^^G~~1-W7pE77f(&7G4m7fvt0) zycm{;;=bojL#yW%z4tqS?oBnd({CbH!^LDI-w)j4f!0lp3ffXvVZsFuwb1a~x}eyN zRo;O!F6KLZ90m8F5%%WULD6baZ&BG^AL~Rs+jLMZisUf8{K>Hbs^B)>`34^!L?aF? zDaB__1g{CQh*pL>*T*MzX%nfErP}0kD(gTeYeH%s@cmQOa7*yoG`FHQVD*orV#;xs z$!v(3uUmh0Kr($BFZ$Bv(OrsS;c3R2RVB*Gc^i*!fTqHN)lOgyRzAQX|A$6b$n7Gl zZ?U$NZElKt_w#z{nQXG3!O_$q_^ zv{*+8>on%ew7IQXuK~|wHg4T>wfaBoy=7cf@3;4FgGi`!3y7q2*C0qs2+}PAA|)U( zz<@|egS3eba>mfI&I>wKoC(YCWO=ii7n>VzuRz z`}(Eo-<^SLdH2b~Jgqq!P{PgLwmhz61X@!Ls7{PN_P?*hwWV&`o?yi#B(xR z8i^f$R@kiWn((R7IhQAhS6JY^SiY?`ICd1b%J6qJKId-_Rqgq0@Gl(JYL2Tx*V zD{5-$IU>J00JCyhmd#maOZBZC%=P%kO+YId^ip1=?zqUaEXc5Ip>kio_4{z9PmnFq z@v8XWs#(0y>0C|Yg5$%--CnwYCg{vXn526Fs&%dmm`LGwReNY_<47~h*XWYdHsvwu zRGvpGihPopRnG}r^lfv(owYW&qi`fDSppNM;)BaPsRBCYiaAc8BUSup-DMndI+thr z)#$e6sUddWzQZ%cH)*Rxj(cEE?9{+8xz>jx$HkPZTg7e?bf1ofg!~EK+iU5JZdoeu zv1`pLqY6FV3Il`Wc-V)ACK#1O{a#^{-{dGI!p)Sd;mv89XHU$CC~9x1mB(aVS~^8- z`((F`$douRMKB7K!FGt=MAQ1sgvDP-vvDtv$<7XpbUL2! z5NbVdD0B+G7HIbL+T&ZF;uu+ptIi&Dm9_`OoIdg$ttK<~p?~T06|Hq=P|7I~DQ(7i z<8HfIdm+Tu?Bh4Fof-xOTGB@^d_-q_pACJ962Ij-^FuL{Im>rcW_*@Usqwb(MiJsw zQqNTV?4kC4$;mHx0_~rzEaxL?mB<V;Afe%?UkG4a z4iR~wZeNIhrh+QmrAQx7`4dN9!*g`>iYgFQX!*_RHbH*z&}=K?6S7;zj9y}?4$#UE zRsq4HF9*Ln>oM@l$TbAlD930gl*!wUqT@s+*OieJ;!|CC49{P0%2*r-V#uIBXb#l9 z^4=QKamx1ew=&xPNfhJh!0UFoCPO0dLO4g2DKpSkxc&Ab5p}W8xl70gEu_lgoXF?f4fo|b4sOdSVcT31UPlc# z2z;EEi+nU4ZP{7EPNC5dC&6F9q zaMY@09M*JH;*HqCGX{;?M(%kFk+>&|vJPqw5T8ywnD?N&jc$aWoMCuXL+*fsDdob8 zju2$ANs~|sTI)4o#3^@jHRd1Zfn{VW2DSuu(Zzk6vO{M9>FFLBoao_Y#N3MEvrcWJGog zRh{Y`P0N<)HzwFI#RCUbdExPdd;dqPBop;*U&ZSsVd)m#>IRC8*7WLt<0f;hO*O)KSI)OkVL8GtzXeg zFIj(wtN^CK5bRP9@{n8LIaI{>AUUNp^_Gm{j7fEs zl|)D#ewuTgB4`YX4m3lLat?$x>aY|bzS-Q2RU^1e-ko-X9*k^`kN-D@u_p# z*7Ch`Jc}7^-aa`Z_{&f1tAXSuBFi|Na^a3b63AaIvD=a7L6Jo)2SEk&)K$kRm;$F4 zPUzn2gut~R2mPO2lpAZu@i*BK&M|3Fi))RU;gAO4Z^Ma7@ATg_sS0XP&SVp=X#B!; zKs(lxwCR-&RCpYXqP~9%R)1b5ZA8T7)8v--FY&DHr|%7oQN!vupr9R>ft@GKLEFS8 zlso9qVg9fIXoF?a9SG6okNPglorZ`_bIE|U*5Q}!S?Idj1!_s|*F@bfqzWQa7t|>y z7m+=syBnRiIwn>yPUe)3eHETuas0aKjjGNz(JbcdRe6J-M(^NRbFb{#ppiZ%hc039 zsC$>3w1)J)Q#&tAHw>&69`p7Env$7R=d@u8Qm;eYya5c?KIz@Lj^=8kBcB6Q3l#ck4y}^jN}Fd_2oCvgLl_@%9kLN$QaA{t4hSVDQtDeI5?FsCyR$gjb;Ilqv@eSIz;PDD!wJ6;t$O?!9RokH(+;uRJJR*kL<($6%&vnTd=HN)43PlVu! zd4stl-+QRib)9iuN|M;861T23+-t6OQ)S5)w@Y-dG3@uWULe0J&^;IV@u$f;e#-4;q_JjkG-Kc}NK&u9>%%yl(y zbN*3&nS<;2kF8^7x++l0?9(_9bc@pEn>s%Oy!<5wa%+IABUMap6A&_ctuF&ZZ4ZCQ zY46fJH#%v}qlaZJDZpE#SZY3P>fy4+c&pj=alQ!Xz1yoE;t9_X+z+I@Ma&4}<}r0yepUSxy_Dtr|EqBcx}crT2fAv+Ga_1gL>uh%EQa-lRlp+ z^f+~tb8wu#Tz@6hiPNYnWB)w9$U9Tepg>ToRrA_pzkP3Tg}m7Acl0L!(*cXU&O{RR zP;GRBQbhm$_einamJ~q)FgO+p#mq9N-G}tDV%@wyQAHHP{wDdmyl!a|LJ1|e%u}tV z%LItPbQt?&zjVqr1*nz82J5>FVeK!v&qgiPhG>wM{jYdgW3Eqn)E542MHtv(xj4z{ zTUG~8aA@QD#HceDl%<7aI9n5S6GG3Z2`IPWWQH1*qmJC47e&@&`q zh3Pktb&ap%9gB35wX|^y@29=>L!#dqtom$BRa+(Fv5k)rjwrqW-}3oRPdYI;8!V56 zulLK#QFN*yfB2x)CPP{{)5Sx(9wyGoSKhGG&u^)Em%nHo@~&JOL8v(zJ{_JXEu!vD ztE>YSVtJ2a*^#`}h<3=d=SK22xL zs(2@HW_i#;kS3`NYR0{>u7Nu{X4u+=#uT59dcTrtJ) z&TW4#uEns0%?00&=3F?>>Q@Adl7GaCOE;)sQaYj}lT6O*`zyS-X8#DeMt_KEvhQyK z+%+xvDj(Asy2vN6alvTeTO}KAj;Ve1I|T52e0f*NFrxDFC-niXo|b5BzP^$E>S>Cs z{jLi6k~k#wO3(Wcr_POuke$-%kVK8MO73|C-3>N~8^T~`>8QJ|zcYIEUxMZvmdEnL zIVp(zl8liI25-=Hzgurdu@#&!(tA0OYmyh(`Wdx&-1Q#3P%;rr-npg-|YNtiMpQRGl|J ze7}dIzNIw3oV(@Wdr{T&LqEX#cw!&h+6L! znLDw9S=}(#*#0(vpRt9oWSmc%9k-pKGd>No4grTlJ9c|Grg14^xA~F6(Our2)XB2a z(U;-!XA}yj98EQAYG=Y){2)wj54RdRoUUrPx0*%3qxA;Sq zIKr_GxoQO$qCT&_D0#dax6_YV^C~@UK--sfzl@rM?{3=e{o@7Dc{Uro>2qU`bD@+V z1%mXQFG-95vt(d}hpk~UnS6821z=;r5JY)^Cy7bP}}+a6tP1`%Hq z3|xJ>O;YkDhMsH~))mxm?{*x*eRk`b#e?_7AFi7Eu7^Kkyr50f+j(2TL(N3r{`JXC zcl`BQr5I(EFj+D=`}PR@*8Hd#ZNt=!L)EYl1~k#cc&W0?#VZ_hGtwJp5BXPuz71Ju z-3%1(<>`NOJpdrOY{^+D-YqG9h$fNE<^ecy?FY;9#udj>VIIq4wJ?-J8^)UC!ShGI zC}imLIhVTFGxV)QR*`rdAYZ*-^w!o^-EO5P!%|{vO7u+r{9qJL*}fSm_nN;>3-P>lsNN5@1{%|lPpM~{l~WEmuc@Bo8jrvB zVW3G_GVa0)7?0bK1?Av1^d`!h;OP`5i^w`+)>+<6=-J==-JX7+;yH(1RZtCTjwNWp zfp@D1yP)$}<%iRUhb*P2D9kV?dFH))w+i+% z?hh;?*!e!q*4yxde77S??}=w*#=)Vb@C}}xky}JgSfOrG_Kws*i7)qe8XkO?&cfx7 z+`-aon8vuMn7&qa_(}bni0x%6)EsOJPTIqO`ldPEO>XP5@_RZcd8k>PzHM3vDvK9| zE!-FVN8vu4hDw4nYGf5Y{psX)A;73n#w~vfCiB`CK+~ic+f-R;KQ^zEkoB&?b_`~J%OCWP^IoTFKG-{Xb2?L1WWam_ zjUv(4|E{fA>Jz*hC3L2TFz76=@UJYlyZoGJ&b0=#o3_*H87gc72Gepv`gR9z2uVd;X5$hX&;R~9!$(q%<}OOkp}*sp_joXVfDzaWO>tO~AJS%!axI4n+w=FO zjhQHn32Bw84-3JS{`QLJ%LnmYqC{7cLew5$aCGMNEa?`$^>G=u=?j+=`y|-Ez%|LO2(s|J)J|u*g5Sr9%}=!d+Fi{JIQumn8}2ow4+vmX{|_&v*z! zHzitx*EQNN>Q^{BBFukeULt0bo9#8U4}E_ZlPJ67Og9ViGWQ#44wgR~*5ohX1V3IU zCcp9YCdqYyZU-s(S_Ufhof`wpL)f@>|*g29`YGl!fVfy-^R%4^-!)X1p80e>!mlC z7wy>2K4|ZGK$0j)@?>YJho=gNaz)`ll$$){)JpvwXl%Uv{EJKAQ98asJF{~R_;j+Z z&l4gXHwS`Gw%ei%&ZRgrp0UkGP#2slvKyr z1@n8Kd#tDawin<)uMHcGA@Y1u^~%AGms4MDH2EtrUya6m(6aa;v|ypi`7K1si|OgY zo=f6Z5i_iR#c9(;DNSWnw)zLpad6uBoz$$?{5n;k-o?k(+`)6SE=k!y=Y&3+r|eQ$ zjiING<`3EYdb|W->1}BmX=~wL+}9`-vu_^+jdP;mcE>m2yEqkSm33U}%T2L~9+tgf z%cKn=uFov76<+V#Rhy1Zqi1oOqRbuWC0}tgX-{lPJ(T!HKUMJfLnm`)#%tFLrR%fY zazBjD? zl!Y_WM1T_Z1bVk>Gt&H zA2w6@GK@Vu+l1=$f=)P?$#mZEQ+$foKsH)A{#+}&dju_g#(hUZ?d1!~{N3iDvO6o& zR`@dH9kE))%U0GWPk?(tKUbVHTvdphvz7$sZ2K3nA+0bxpATsGoSKLV44QL4kci{sZi;tL*!~mo zeT&qyxLONrANadOk!NZ#^>jK?ldd7}NNhxe*Ue#%x~2S{kYimJ`}T@ZSpG|!v9*Q; zw&u1`3uVRBo?%80-@4~$tlzV0g6JJe`E5j`de`IKV?QGAF9ZmyBKUr4w5D_x&8qO@HWy5r^Y}B@I&wYfR?J!QoWlRKth3vpJCRZ~6&lE>8byRHoKU=`V=b z%vq{&vf{ja?UcFz-K5AaFX%G|JV<6B0}^z0u?WrY%d;&I%qq$pqQ(%?>1{=d`ruK% z%aDl6-^r!{1$`5P)l3wl7fWtYBaXt#`uC>}lJa!%y}>JaQfIi%RRP9obvDc`SuZ3?S$nQbvX(9-CQ3wNjH2bt?HDg~Zc^tzz zCbHM4{v~?lnN$LvZka0CQFkEqtwlW&zb%obdkaT{Y*e5fd4VVIpuG4ac2-0ntZoQm zD|2XX1Q$aaoUS@TDi$Aj5`xa68xaZ%1FYQ9fg=4KpidXp7fxY%K2w6X=L@#gdoJU4 zf>ve)dNCgpihR(5hf&g-#iIWsP0SO6XIBE`1(2!_4QL8 zaH%M)4MKpi<8wD~+tqaB;gLIPSXmF5EmXbntblw~VuG8VnPjTFxy@)igbca?Vt&?zO~UlRY2`j7b>^wyZxqwFWhv z#dgrHfn{9r%Hz-5FVi^|wl#o0T(0yS-z9F9DHQBe-Ls*gdsOO`iU)QFuqm3Mu{ik< zL(NsUsDRQF*?=tuOLp-}^=~-TducA?p<&eR??E#KUJMmxD*Mt|F;fzKN`Q{ zZf3Ceh0>kuo}B=>F`Lu?e5>=pzBPeM&5NfV^PkiqjVG@6oU;C4dBV?(e$Qk5^W(;r z>cIZ}%_9lhPRm$FiySC>B1_Y#bWjB@djoG(Tw*CEy_^!wrJ@u3)V{~(I2h-3IF)^p z*Ap0~@P;bx?pE{H08vX7ABpQvZyh|ge&6-w0VFk!W#PsnHFdk!`~1V~SGAllJaU3L;X-X#C zG`B4NihNY3*|&S_!1q65L!egrUFjN@t7s?S-uLwc5wj$7|YNjh1;Ny{annoWu6z7xm4VWfLUqNYPS3QgdwfkT955lRXrZmev7Mnt z12L(oy+EOj)6e9yUI18sa+``hDoA=HX-HRZG?Y|x1 z=qgtV7C5VnnxxwMj_8`3m(fVvtcbqFjDiKOi|0wD?dZbF=Ho)wMq|i)A_VS=ytF?% z(l=ik=oN3Rj7ybunmqX+5KVG;**tMf8j#b|F}*s));#=oM(@|ac? zOwTg&_sZh$`l5#OJpi_%#yVO+JZAK8!MVuChK;wTXiw0Y+`lS+Jxm4dlK?O&ly!vYa3PC(|`E-(qs~>gcawtG;gFISvAIGe1(m$d-Ber#Zc!A zRaO+ekSrhvasVMipun$>5$ug8!?oR%yLTw8^FNsOMD>Vg6|rsVk>51vHx9Qp_PS*; z+!e^7k5m_xBI;=P+Xl`fX}{1-w4=&=>S$-S(bmHd)Kk#e_1$jrNFPO7nhAYndAkdD zQ`N}WJll@H%k8(LGRapAR+Q?SW4=w|kP3oEJ!~p( z2K7pPx>7s6;Hu7>ejowj!hf6jpf@Sz6SYB8CRPQM1pz7`^YgMvqI#uBGSC62>W|wT ze)kX7S!9!>Z5qptDtJb89xipdY$lx{{d@*B(~6Vm8oLyz6ElX{L%;yDmh|~&oii{# zC}VBnEmn6@g+%Tt&ziM)DxI4a&U_#2-^2vQ zx3-vbcUa?m7GXq%QzVS;v-`Uj@d`EL$<69~>G~%Skp6u41Eg=8rDp2>YwLpB{ zyWgO?B)rSmSabq)od~KqYP}tw;{DD4w$GonZ%pz~LWy1JCnMFfJTbd}%D*Vs5*59u z-=$~7_;=z%srhb%JI`~IsDYpasPjFYdR0FLwB$P#M&!K?p1^q!VI_r?`yW9dvdAbt zEJRupt7SrUC91<&q>Mf8zbh3O@MwFo!c86ICN1$Nz=Lk^STIEh(Rj{aYgVr@Ih+2W z{ttEGOwDGLCmFb$*7j01W9r9c9Y`Tt!##Hn>Bjaix^TTE+mOZCEocYVT@svAcOd)f=^8n` z&vzXuwYv7`C_;QSs+6D5(1ar}@I3NNRKdq+eId=+=>Tf_CP{^gxjxQRWr!>3`o#CI z#(8O7nI?}=5!N zjKzy3yx4O0!n<*u0;x#<=+RLwSt_lNhq@LfHDYxi$8^L+fB zOi5A~&<*q!Z{|8^m2vCMr8YAt%x;mNrmq5P!J!H~&ZXe1ci_Pnn~uUuH;9+hu10lV zg9nx1WZepiCiby>_f0^v@Yb`Mu=FeTbep&6!|pgdk>0)1)^9o{Ka~{m*r7&4RR0&_ zgO_c1neTruynfSWH1tEwWcLHPz_P#ok8oC5{h-jPuPgWZ5_(>eGuFHv#f`0OtAAo> zK2TCsuvd^*K-dApyQx_>sGe*y^_z0>I!kQHc=ONC^-OpB20;jnx`E;0p1zVDo2&Qm zi=HK=B3TbxX=^N()j&{1Yfni~jn0~Cb&H`!lSy=f=A7=#y@Zm8dVYBE_{jJ-p(H+o zvR1(x)3`iy5g)Ic-1Li9OW0!x*XKO2i)YeTWPE>}<>G8b*2c8#L)`D6qrS{+NzHslaX9ES>#7)M8Xd$ zhV~{_1Dyt`yczml5*ygVGv0K0ux{SPKaiZa3B|@n72e3otEO_|(huftGJ0p5JXY;i zzq_L`t>VGP5s+=4rRe(key(1J^Q4Hd8ZW(z7F*NE*jp_NBaq+hz{81|?X8UAA-ti{ zT2aKm0fSlYa6|Zm4HF7&T~w_?ZP7+y676Gi(*U(X3+F(}y9oc}K#W${*Qw90Dg3}G z3NIx&CrHLQ@3?a@XB`|(o2}&ySsOoNqzDRr@0nhfEF`c0sad@X_ZR{PcU_8s{ z{UdSj1m=<9EXozlATUNx5iXd&n{pvW8GF=BU+=rFj*P$fW`r))?!Ky>g@A3l^xq;# z5jguQ@$iH`^sQ&qM!Xz}LbYEr#f@_LR}Oav;R0NI>K`>8EK1s5R{;a|67_JM1*?b; z-l-C16ktdxONc&u1(qCU_qosiKztOpFIKBtAiH@cohS0c)eW)H)g|J5J5db93So@- zB!->|hf|+_ygXRSHcjvdZZyOJ8ZQR1Qo*IT8!uL_a9Drau5Z1<_i}^{79>c>r#2eW zRe5G5;yo*2pyS0tuao{%ltO*3X3pP!YB43zYuq{f?~uJ6#?bvNeH`0|0WU?^0n9@-qjLW_A9kz}XqcA?f9iO(WMs1)9eJn9 z`b=HG^@?Cd9=!2)#@G>T7eiRAy5)A|){XBs9g-Ii)Wtv0A#4OL6?o@RZ-+GwUJ8-{Z?s3g)tf2bn_^DOdOPHSVP6t-G_-<;VZ7a$1vR`!t(;HWc|(C+Z2(g-eXV_{WLnd>z_n zL21i|-L5B7l`Ax8C9Py&0FeWH0x|9h5|C8_mp$8)XA2CE^O~dR*A&>(QtJkSYHB1l z8XRRkxyN>N^+eyF{em+z(Qkl57fDy89r0;q25sjZt1`FhnP{y3wfJcKJ8j zJmTA-2=Xz5Lgn5`N??I$dO>4g>+_J0<-31boY#KJ%fftQ+>rc+p~a z`j5s#rvgys!>D{;=`aV$;3?Hw;?TGli?J`YyY2+JYj3HsKjxq={E^DO1Me}8?TN(h z)a2mZ(e=aZvnlt+qKR_o5ze>qVu@2(7$%Zeh|cQ?v#rr-!N$qBMbzQM44GTLIoG|@ zQV*$_M-Ioqan|tXgm?qyU1HtCdI0KVs$3PdVHxH#uoKE8T*T$ zI_vI{suk0XkaoZHS8yokp{}?*?O_ef^ zS(tEe2J)Jou=_bg7QTjUq2T!Tn(!kvu}b@pj8f=W8Pe`iekO0}?bd8G3`loaRLa(X zXiB>uY#wD25^8TMo+PADGsUm+!bmNX3o(rL>pyJ!c@lhIG-kiE7bkJ3&Vfd8n3(qlymrmNne>d6?N3Z(BBt8&ID8hQECzb16-$a#SR`x6IeEv-

  • FDeP%+~q?K)48t>`0@0E&(I*zS{vVkKTKx znEvQ3XlM44ICizIUg>;LyHA0^?)pn~h*N7BzJNK1={N@0xWAWj8l1^}?~nGrU;*go zN%gc>Jgi>a8z$VnK7uL}1a`d+j{TO^pPgFQQ$KL{KT2j(E;YKw1d$#F&_|@Dg*rYC za&@f)-)5RQNYON7<=V?X#)R4(w$W^VzH31_Lv+rlXIcgHow@?};8Z^eg5;X^I@iH4 zOw_B>_`o{#I;AE2+->le6)XB~+8v4^ehYbw;L-d$ubx}IxMzFZ<0@ik84kI1;EQML zMr?#QB~Ey*{wD~Ox(qu`#E`P7E3T8~Rae-=S#Zj%kop`b1WFhI8e>RfoW$|iI)2}`K2I3B8hytrc{IwB zV&?p;K5!An&+Xwknj4Sw2UpsP&_0{YXvc-t*j;vq->SYRTQW= zO`o~)Y%(@PH8otZI*c95yD^gI_6(S?(zT6p=~oV@V(gGCts8>x`5=gC^~C)74_edk zKE5QE`_l0cM^v7`#PV5C@c0Qa(UtvjP@%UYi?3QZ_>)sXQhJcLQzvFF z%Ft{fcO_XYyoeAS6Ovij_MFJ`n3U;JYZW;m75B6snXccDh;-rw%peowU zdbFU=10d2M<}i%;1oj<^0lN?>Um#|L1dK_fBt>2=N9IV!X1o~nWk0e+%xw(*jt^b@az zj?S&+)Cx04v1W{{>-tD7jy39aK*Un4C|O_hp=_4~To58N(rQX)1Bb#}zk(o}KHg{y zKp81bB+VKWK>iGe$=*uwylD`ao*BSJ1?>wy{CVoto!<5H&Nv_sSXCan}5tjIkZP1t@jF^oHG2efFtkHmH#DmAlaL;0R_RH+75b}q@ zYXswv*9oM{&BD#)_)>(C^G;FH)Kb^Mt)`4o6bd|>~9Cw(1sa!aT0eGwF93TKLIh4=OHQ0xr* zd~B86hr20&38BRlB{;}%`PJM@^ol9X?}Tm*@S`2h_0BTK4m0E7%ffK|9@K1Tscy`{ z>F!HU&B{YT7u|Rbqi*xJxVy3}Uh}|@W5r;r!UZ5*OHY`uL(+*8ZZ&b7LMNswi&5*9_kNg$Yn zGn{g|k=Iz(w$IFPQ0=hsX4%nfGQ3SWHDBGBX{S!`8GG&9fx4{_C63`cKRJgIv$f;F zE0HpGO8Ily(_*v3#+?U>TX%ZOX&D=kXx-#Z2ue;VHe^SI>Ia>Uij=bgIPYPC$~U&; z!G-8}6}bB}ur(4z9?s`Hgx){geGe%98)ICJ$+v;ZDFI|~a}dFDz9YeYVgEBrZsq7i z0rhvD8X?n{8yaclZwCX&vfxzs;0E2j9*=@&pGY5lLsU${SU;8|3#_u!iz#_!9ti?H zqRB0%KkyIC)XmMu_3=?;->x(HrM4XDXHS_jku=a{>u znzM@;>fZ-KiXo5-$s`hUMmT~G70t%C-RFtsjQ&fEU!>a?vLqa!gqjYI&M66AO0F|2 zdwtY&#^W6W61#=08+1&WlOncjlO+ktrHSUnV&!r)4(<)XL%PiY5-iPB0IW}dJ96t? zsOH6X;GA0ZF+I~a&<`+&_{Me!HH`%Ts4t-a?{Mm94L6y^c6v@1#n7270K_fNGJHw3 zJu;KR*dz{qK4X1lC*S7TKqi?*+n*oi-?C5)5pNC3G+@FLZJ>1f8Zp)$(99Zk z-GSQefOtB(Z4i4Z`e4tKcLbo`K(6d@l-hDW4{^Y_arl1Qgt@$qBD@^?)R)#wu0sr; z8@8T_4sZr_onhLUOx_@#9ALG~pjb|Sx_P9c8J64kM9N(N>_374m~?O&NFF(|y zGinUa{BT?FI=JYwyT?C@FQL5Z5W=;*0Q@D=7V&nM+_FY32_iCL{l~ufE2kkbAGDIc z*4dC?mC_G&zG*${s$tm==Jwk)2|DWPDv~9M%0fdOdEIw zwd|d{;lo;#E#OCLK@>6LVu;O91F9jl`TIUNo<~eJ&R>Xs5K%UE-UiuDZ5NMim~{6z zj=3=?(ChlR)qZcZ1H3U=#S(wrc+z7Kx9n>fAo@TBC!+f`_{7PILi><)dWba3Nv8Z1^$A4U`YY(W$~(9*z}z1x^aM zB*6$K8xXtw%a~5ngXNQVW~ORif7FA zio4sF!1IWD9?f(UOC+^o9?{M&L!%quL+_i!cV?|E)aUkk3G4W4OF)7G_VE9L~rNCQ;x*XQ7IeyWPwDF7BM0I&z zvf|(`JP!CqP-35`WpYno7_HdW<*`XdTwb4>1069qlNXL(FVt6=4i9USFL8~|TfIYx zzI(~qy?ijiFKP>^K(2*6uaI216MJ*c>k}An7p4r;K7g?MZ24zwMS>xT(Ox|J|K_*jB&O$HTntu;XyK!3uHB9slqK&zZX>L)pOB zakKk-o^p=pmlBP{UhKv0ZqD~wZoTCq0(FAI==XFL3SpDF{M3_SYn%=X*%xHF zb3*)S{%fvoNf*le2a5pyp;B+`l>UYgllJYlpU%&U$#}I~H<|v!WG%08^IO?MUZG0- zU}(2^D*)x@?+$r*WRM_EQXkAO{RLCxfk8))&p;o|th+-f1R0T2U?Kd{ZhjO+qIo#r z+nbVyP8=<~qnKKkTCoX{0+$vN@r96P`clFvv z`EgL{YILD#X$guy3$0u7$sg?V-Zu@NRbYF)eAPZc2Dsl5?s0CCeX)#+&P=}^N{HZo zkyz5-XAn#koqHHn2cQFlJ-ZVHv2xG2_SL!-2L_KhdifE%g`%dcnqO*d%)vGz1g2H1 z*WvKE>c}$Rh}@tI`&~%y8R5P-hd9^5nZS1sCIJk>WIK=to8La_HRCJbe0a0r77eZ5 zI6-$XNmFd0Pyu*j{Z08SXX}=O*iv+Bvi7U}n8E)1yHEuhNJq>7>ct5JM6S%78;JiE z6VaqxM~c4(7)JN7JB20e34eQi(>>!0NW+xZ!CA{ekN3k;;D)2=Q698d8gGUZ%r3@p zTQ$V7wvSMvJgtErSiKv~`almZ3;8I5wP}Yb$TJAC3n9$BL!Wpgz6+yTI@dM@U?UIV z#S_-zX8uNFh6;|+FWaSV0HzR}O(VK=gXYAZsfGHq0DB3 zIH$2q-aB3_ebzx(*A}fWi6YX5ZSiqxiBux!ZRvw&SW9-XRwA2Y?@D@(%mXNtam?|% zovY9^6p(g(5dATh4E@!=kobDO^~-DRH!^WR=N&yG0K5*K^UyhIz&Ue6cIYbEt|PMg zT6+MF4rmqc?&Eh=&k6{SNj!NM_N)Wg()HjS;U}S_eHO-c6iNtbXY_Qqxn8_Mk(e#I zAz1d^21ehrrTfC>MLkX1gC!aO8bow80($`$1(v4#(SHOmTH@TeTNZKa2$p=2;Rn*% z?HGbU>Tz0wB2|gbr4qHx8C~=A`yB+E_|E!q*@zB6Y|zLp+qa$H9?xhg89QpoD@DSY zQy4U!RDdIW(cBAnMa)n4Yt%eg;LG?PrO%4&-;;gB9#e)r|7w#B8kkVR{lS7eHfl|~ z_F4sU$L>U<31h-#EU07|c{ydnMJ%Ruj)mh5AcPFlXCvc^WxZI!X^hN4rSD)(tDSpN zY*v#knBC(VsZnUC&>4E~?M!i=u`k|NO-`HJ6@;y=!SY^sb09kM!7@Ol>Yj4x``YV% zwc!@SNdbrNjLK3NIPS+V`ay9Ozth<$HQ%5Ic-_~p$alsI^x@aek0+|(#FY6Jq@dNy zn6Q|WxQ^mCi*VE(liOYnGM!aO_8q6jo}q(qYlFsyR0Ee&-MHN{eeJyFV)0ds?JC{Z z0aSk9s5~u&9<33Aj3(BZ8kWPArQj9p2H`7j-jF(ST;K+|b;gTWBRr$#u1A~>$EKV* zD7D;^hnk?>tBKM9?#1GRKq8P;0(Rj2HmYBi>&QD(m~RT60&7(cEGiqoZ~J{agke)r zlNc3$;_#E|&2T1Y8+kKyeZblt1-?HFtl4-z^V)*V*ceWnzhEBwKi@76`pUhA%li|a zjh*Wv{P$r>MCRJ`q#YjsGs~AX40O<+!zA!JKtFH4UKo^hArH1v4cjcnR)vRa9?#&D zMwy1q@rbW~9+;DTv{rGKVPKkN67Y9Fap;jPp3QJ~>G-+z+c_I8j3i|w-cWGRy_7+E zovG|oc$SCAk^8TnI)apxZWTl{YyIM14Ey)_{*=3d`)r3V_j{;LMTgIL37-wwqUe^8eo`iBU z{kugN%WLwQYjsn*OXE`-|M5JA)5J1CI=LYJc!K#vy<-;Hrp`bfhW=MLY~yRV=V!0q zAuxkWn{VXOry2ZyNQK3?f?b}I-Y8UGMmS?nurO;fSt3v7@bi!vvf zkbY6240t{lk{D&b?Q<5#q1+(&M#?f_;4A13diyF1P>?`PY9f-Fr^tNXKbYCFHxiIo zYc(3f;>miltr0aMm~VZELv7MJ8a@$R*Jxr4OR7D(J}K5%Y2b`&QzjnV>4QDr5R zfng3EGN@y-a3ny;b8*mKn4s{RwhJ@>$oR?L${H?0fQr-9J&VF|Lf8GXG+V%p;u^GG zM_D98;t(mQ1L|0VzIJ#%5Lk-Y2N#KGu+OnVPW2;;4@YI7!^{f4#UGMVoZxLGlmxeW zcHdV$RH`x2VDio%OI%SZ6&fMv`pkwLlm^GV^P5{c0~+2P6rwrl5tUYm0GXQ5B zQGuNW?Y^Qv`aKVegvDN* zgnFw*Pnok?zFED;O<2BPz-0dc;k7(wysaAfRP}Z5Db1cA<}6 z86j>a$)t>`mV5Rm{({#}_@0J1n~;pxZjXh0lT?!ha^W|e$u7#Fb&Da-Ct@$u0T6TQ<4Fy%Pv6rc8RlOqPQDz|D%2Jmr#lx6 z=zMFCI8hJzVoo0Fc+tU&jS>gk;NPHc=e5dY+wNjrY#Y!x#ez((iFH#P|7ex03~!aK z^RA5$&JseG4M_7BPTxqt@}$?ms!It%Q_Nqgu`Nq%ZV6TP?+^RHm0z85wd2BOPJO7p z8J<=I`ySN0Q|Hg>NzR16X}1gAs87bPlS;j%3nABl{0Un7;vgp{SQ)nwj!CiO3y_uY z#{GO(bbN_G(us2g`1|pW&Eo@V{2AA0)HtMHk8S`ykJOX-$%*d|vs%}i-EM%);Odfk zc4fwpGN`WdfX*tT?I*r-Nb-q%XQwj%5cMmBatrV75Tn|TVxqlXm@8zBW9RvjDiDk_ z;AcO1?RzWgyXKjeAmUw`OT8mR?GFu-HEAiR@s7vfm9_*T|2k^vB)5ObkcVR3;AE7mb#QJv^6#VI?J5{p^oP5F=RaGcir)E z{1LJAwE;|&x?$#qg6DWgFcSUSTkm0o?H?g#SL5iWtb&l^k&v?Eg_SA;%&g%B_ z9}(t+Z74$p*?o5kPj$y)l6vd>4WLcw)ll8=qOEbuZRf1piR?6OxfFT#pIqHULH44Z z95Oq0Z|$p)f<&)fW+s?07d1oX+lj)G9vg-}COTYvQgzJA(?>mT3|0w%`SMVMuaSG3 z)<)D{#O@tuFUU->JUY7MU!zb9?i`jJHlX3fwX^7_Wpw|KvG)pUs(-(DcM>|G2uP7G z2+{;W3B4*ss-P6<0wU5v?+_^gL5k8lhzLmUy+(Q$q<4@WdV&zj;k`I>u73YH^S#PU zCNn#e*?IPQ*7__nt*;{c7VRZxy|UHhVW&ev>#Y&cc2ynWNr%fS4hETT$-qCU;N?6ItZD-sUJkP7O#{^v{U>3eeWw))FuO5Ks@X zJJV|n&%}z>3U$}xSi0??h zO@TG!z7*Z4qS8#CScw{gV-L7K?_2{~_bH_C^2$MI(>$veE(}XQzl4fE1nw(p=>0aJ zH@ZQPVNf}aj9^DkD)7ruO}(6keLNZKcebQF@jK*9`*)tiMuGD;#Q(<@^dVbD!mj?6* zm=XEn{8-}B^LAIYKw3r!hsZHZP#_s`Ny}$NKw2C3x|t%dFX&%#A8gt}tjY@+cXw2k z;Pu08`Pxd$;!?pX*tIO6zM_qR%NYu+iz)Od`D?T9Q%t1chr_vnJp~{ii>S>9_Iri@ zII=#7-(Un*2S8+V9TFG6VZ0y?#^*wQB63|uu4m7(w|}4XCQQ=BSJh97KTDD2-OdqF z)eL)BzLHr*BxHLJSc+@o?qR{L3CI>hH%$%5$7}h)X$L@KYhS<#SY6k- z4c}e~g)b6FUb%G*c)Pj-#Jo27+tUqto0}Gt=$;h zW;V#GY(~iGjN$V1TKjdP4zqYSOO^3p_W*35AkOFCN*IU1=8eVD`LJhaT`NZ5bom1x z&F;q6r?LO@z~6pggE(?;H2ZDnY!|BzvoScHY>rUy=@l1P*QcOv|4KZ@K%6M_ZR8^y zT6dcidDb%$euraKcOh&2R8@xkN?yj~njrc&gBTB)GnSpa+ed5&Z{Eu6sgtSTXAagR za@CY)GGk^wH(%nH-OXyt)*K7vsLDK@I|V?Lc8K7KDVaip%M9cR=2hpL8jitu@8}l? zvR_Ajo;F_XPAma&&o`^t$9C56N@6%huoTOOOnBKaqDgfp*FeyR_{G3#?09{1Ddc6( z5vPH6nfe0rNBWLrh?o<_?^X+%xmcTbP|L%ocYlQ0^JSgVxg8Z=lY(O0e2m;ycjQb> z@us!qUrkCdo!L#HNsHh_xWsIPZO`#*fz~mSLSYALf?fZ*+ZYUv^u!~THBRBx8t$OWCtp6|0 zZ13|5+mMmRV>Z;&B)yPL^O6OR9!-}QelR!fE^nmo0>MU4rtY?V8JORf$3Yn{_*fq6sGrZs>^Hx0njQr^Gmeg0h|2kGE zmbpZ%#aFI8Rymh~+nd-7(aV<#vuR|9aM9s)BO+VwL$>e}Fk;Vuu6@IY_E4pJP`G`Jcx%@efGk*~7Eq*UN#tbVo&LK!*h*BNViV#1wm>4~ zqwV~y&?5f2aLbF%Ee*Q1+p(mN3X!5e@g&2am%>NLT`|l$GY_&UK8xn8&S2-jkL53W zdjocyjKoGu>4V?7;n!D{Gr^TuKM!X75gWq)HdwUb<+f7>gk%O?Gaefv(;E7zP#W!M zGEiMU_E$4p_M$BnxdJ#!tP9MhobLxS3D3T zwS||F^k*F9Pw<0kLAA+&haX&M^uX#G;9pOiW^u(1Qz$s%*S^2Si?_;zTPo$10=}za zvuMNlCS$KG&N54#VVwq!WhAqfjjR{AU7$RF;bKInADAC}%0=oe<1z?@r>eB7JPqU% zKN#+i!GAYsvI!v^AL_d0D;;#il;bji$}>tGz4oD{u;>ta=G%sxv@j;TbbhcdlGz?G zcyKaC$ZGYdv8?ZUR*qhw=iI?1w-4^>2X>sBQG|(vZJf^my2VKB&vFe+Wo8)F;z#kR z@cyMM(4d?fx9+h3Fx;@$VWk?0n0p{(;-Fi97~70wI<9_~T@o z4J=Ai9X{px6-!Zl-5zRzUo}EjHgDbAUY}#X3ZD;i-7NG8sp1FKcpi)`UNwUew=U_C zp8c7-Tyep!hua&qb~Ic>rxlI@wHZL&C%{qg$UL0=0sj28YCBo2<%L6eY2Ildr)=7q zUOW1>s*d!(7304h{kD6GlGuYKS>%+=z2%J?eX#W{eueC0V`#0;hnwFU7UdA5NWOm-7l08TfMmhedhqVuw<_)^d#ma4n=P;aXUjQ{ifX?+eoy0sPJ#2$B zRhpHSN!k7Wp%6K3{2xpp<1j)1@Sm|6_>yYef zHO$S5!Q5UseQCp75(-Dr;y@pWWzqvDR}g7ub^}2Kf%nztoQT<{OGVhpEXv# zVDd<7p%)vuJ*MT2N+?J9kgfN!_+e+Eh};tA%d{y?!t=L#9<&}$gP-1qXo#vmkjpCW zf%S4@*IWKr4Av_N%@3;HW(|O6k8Ls$bB|ok8NU6#^wV$uH$LW&qUoz;!Qn(rz2`?tRUK zoD*#y{sI{D5YB&_2?OdPOUsDzGk7@aET9hb^1tq*N}xg0Hu&|0<8P2gci@bXCS2_r z1ToL`isTyEF5TL57>XTG3aL(m^L|LW1)|u~6m?d&zIUGt2e zA>7vK(iNx+oxXO%LKqxNLyZFPvI&+GkJE_j`{2GT9(lee0?*rr4kILi`NjKF7AEZ_ zk|%a5Guxjlh|S@$RxRn}6|qF$ZOA%5IwuNZXL8%)VE@XY(R3z~$o# z*3ksXxOV6%lJNWH%jKS;vJ9LPH6#=xJYU+dk@~MqyDi{qS_`BR8LTe4?D?_rmWCI? zqx&Ovz)26vD(tBlMTW8XJPdLZUUY2S{&9#Y3v8(^ zdj(s_`u}}*bb-!%)MUy~WrR6?=7%vqyESw`H|SM0vtAeJb?s(IxRXeHlB#uDx=v1) z=}Y+$=Lgc%Q;cH3s!e{>grS5*JIy{ktjG~-CtP-;2@#u&{i>>pts?LFscD09Df!qp zitum`I3NVZ3MGL}ufX)EAYX}q$m@#fkje>xys4HSmS)|zv6;kGniBmspq{xs$6f`s z4=_l3x!db|B`gMh$2yElY`+2X;`I+!aZRtP35{UW7a(o&_5GDV9nlMGctucO1g=YOW(F)E+rOU_nBp~z>9nqF@QR-5`vw`nZW9z>LPC#$E)NNEpvag}3XEZK zeqbofL1e*+@nu`H^ws?Sbm9t{*P`yJMF6kS*BIhu1vcA9h^c4Cq))R)G(+lkKRdV~ zqg+W&o^Hgu7_i~z`bPQ)KBglDebWD;`~o?XAhbWo%zffObw0&%Zt|L5QKtSN7FD7p zo2)9Navv2{k=#^nVnqK$e@8tXOzWm!ILhQCg&nFAp6&Tm0K4BJHT-mFC zv^PBvo=;W-Im!n={<1=_tY_8UH}96wMxK2l#*cD2b=weMuL{EOq$w#aq1)ydYQghR zwh&Xic_7tNMb+M~f1o+Dn4RJQf)NT+e8EN7N`9knK;3rnx3rutje)}4op%a@NGSXP$FxT~nT<;ZZL(D@nheejRzIATSg^QVT_!kNCO!sh2mIBNXE-IrJvXZH1C#vR1JT`>sKG- z)BhHS{P#qvz_-Qci$9K*!nZiIL@yPkflk2V{!M8G6+ zuqH}xyJUHlp`_BsAO&YvbRwW&+EdJjaoju)IdB`Qubm&jpLc{zdoo!9Huy(Y^)=?7 zwPZ=Y=Py4+0ZE?ECeyOU2kdA;bxjAYl3+j6 zqJjb<*gRaZ{??!T&0e*Y)AHq|U+=yDeZ)RKi3QbBzZY&LdihmXRd4RLsIc=k*c-H* z%9bvu=CZMccdWXfDeVhxeG)yK^Lw}AxTkrWO=K^A<=EawcJJQ3&DUO)#t03YV)mX@ z+hJC%G2(&~t^8C?>zE4Wbf2Kt9-O`khnz?zl=y9eFxK4{H0AaRr{iJVJl4mT%_+B9 z#1vLiwkFW`g{OCw&%e$=R(XR}MxFU`BdYOMd1YlSBuR=5+#{ivL-yH|g;x_2dXlZR zF>c-*IL;2aJ85fbH)I`GHo(j(9bC|lRK@? zRaD(`I3^F%B=lnPgys!i)VyD;mvm-ge56^M#r~v>E#xhjh^6O~zu;aC28fUZDYdyc zSpOqjcM8><$m)7L?>|*~=oFH<2c>l8Ev^tDlJ?jr{JRc^cwH=%jzUM(vpZeaKp*U7 z7qVvI`}qu16y3kT$8QB7y|%$?O?2m9-V!OaCl&$_3L)iPXM!nv33`T?bSC3sP=c+; z^I6f}bN&&r(QSE4yp)-uyD2$IWY)>>c`G9 z0Nu&B6|VO#4AE{3FKAu68M@thbC~m`&8U%UAvgVXe3mK=!vLaGd}@$72J|F#JX#9A z2C~xXG7OMne}LxO?y+2X5b0osXCvFrg2#;i4F|}Hvx4h4y6yT?%j76#RlqZ55TV92 z@AywMjjvna_ijx#!cDXcj(qwDk>ah3Z1(F#BYW&II>CqNgQuTvDG7?E5QBd_h@G(F zF$;vXWr`Vt1#&(P#I^j6ye?<`9JhJXWO%z;avb~EhR+74V%s)pD~`yaFT2ruPTzIa zFB9w$cxmPt&f1mk26p5I6*saB#r@+wo_~!uUwZWuVU#;s#kc%W_|~ladr|L=;7&pv z%ph0xdL}wK*M-L6UaI!o-it?a9ni=|*FdV`)Q$puSN;f$Mei@e*&>=bM5p$$BiGXbjT`TjI^pJOBPm5Z29JH=y|0nXu zTWJ1*H7(Q?B)AZWA5I4%Z|S@6{y|jVpgHp3`}bONFJ)G-!;9~9=5`fMms>au4x=&3 zLMI?o0JP`yVquraSCU-s9bt0Y{=s!=uR`Xro`#=Tr*_DGUs)Bc4diPJA=}J7^UFym zT7?(1$?tI}ll4}S9wW8Mm$IFY`e`sLP*%#6&>^Xt-hJ)o*8{BAo9|WzkD@yLu>C2$ zXTP(&+5yDYhjn_2+$JI?+3>j<_rw%b1m3mR#EfU<0f~e(c;bW z^n;6jEVWh-S5x-dhOFaXx!ck*NC z*>KWK9V{E?ls%$06Nv9?y|+M)^Ss}-hhd1z-U5F3^SpN>f_F0>JYujda=TO75D*%MOz%N_X%m{hjj!KDw_{WHEdgO?FyQ;O8t}7 zA4nANuFrlwxS8nEcpf-|u3KWcZ37tohlrz)FmeC$dL@Fdv;aMFGoq9AV0GjF9-PDf zlNEy0f6?(M`Mh|uIXqiqhq}P*5<6oo@Rq+q3RUB{%8OE|sY-pV>K>d^(I>SA{(dZ8 z6a9uFpFS|F@fqFBN`Ut1Xb9=@S{6a=uPS5HmTxpqi|ztM=j-ixTQoy>5gU&`vB88M>%Z&-2Zif)`!f* z5b`EUz1i2mGYJo}EEt(`je4TE*wiK{NJFem$~e%{8k+Nvo`=#dpn%(=e}RzgadNB; z-7;sA^&g1&I!|Zmg0u4L0>s_%sz5_A#^qf<7f{W5(`V?fsOOBrXW9H6;sB&fMwi+Y zY5T{Ljhz~F7kch_QE;3*qTFy#8lPfddpj0@#F;HoUmwb`f4Aaenzy;LGo0>` z(QA|^Gr78K8j|ZPOn$LR{Ar3H?6gqGfuPgpvDIx$HlkAA^zTL1s4_L8{AKwT)E`DG zr<~tV%)ZH2${6!<6}7ZeD(U(b4N49m1>_R{lAkU%nn)iHCOprtN2IaEi?YM#n;CM* z#PxwF^ZLhOU?pU{{i9{13{w(Y9<~KayG+xWs)-+;9I@{U@-c}q#ZeGkb~@kCp4{~K z76Do0&j@nYQs9o3u~(FfT=vUP3{=ZxhW*Q*Ib8(A30XZ0ei&(Y8{h;=Fx8WId_OuxTbOYMr1jExfb)~jk3PDlXkb#9I=f$wJzQhiTh*X{ zB}x33#}&O1Xiug24o%uW1bW!GtKZaloPbks0#m+40_**?X|zMs4Sp7<|*}!doKG({SC3(@SJP@Me{A4FtNRBsQ9LlHIOW zJY|vpH2j=c3)H>~r4-18%eZKIzxAB1-;t}|Hv$kzPovl3Ot8y}e?quQKH-#{x;^il zZ*<>}@!mHMW36k=7=KUqd@X!X^t3t3J5zCb+H*@E@%mk^I0kv-n`_djzsJpKLw3^Q z$VefdT{Q-_DA!)ls_DWX?D5^I7Z_~v0D6%q>G1u4i#88r7-(K8n&W^2m)a)F-LaRz zzNyGKFjj)yl_DKXi*b7Bo7#4Vnz)CUo&Wlg@#p<+RQeO+65k#x7vz0jWA|W^PUb}g~~C!b|{anfrEVYEf=lF*#6VXJL#LGuYb6j&_l2# zs8idO2lRpXhlv9EuSP#oG1lehH;FIt?FW*w)0Wew@XrITPqM<6uCaBG2hR>yP!$8N zo%POZtM0o*wbtz#q(lEZSn~hHN)*0R8Q)SeJ^dI+`t*5`B41ays)nZWc_ggdCm6_i z6Ia;-CW(g#LCstLPQG*ROdOqB5ys z>5+MHQRsT3yff8edRZDl&L!=vdpRubXZKRJQqk?njzJa#?t%WBuyRqvdtao>dF3GT zwrcXL?3RAW6T4o|O486c_#_3BZZCE99U)7MRHxJM0-aL(v4lG~+73~(QqGE}$`V@0 z71w^4Ne%yC_ROnWnvG17IO`u(JL*J;SgBJ=Vn9kyOq=l3LB+IgVTEDZDY7@XdcDNo z;_!WXo>;zbC+F^1sMO04@+W@TYq9PH#m+2Z-8xKxN7|!LNtOr{6}hjo_UD+d57U0a z0c!i%JKB(xRwxd|^a`mVl0N+c;5|{02ANOic``(AE3H^D%_8+goE8+AuqVWUSn}w_ zs=gIBfwKX`oeAIL6$KnfLA6N+gTez^_=qu+cL23_(b>qZV8k33J#7%%2)I_-&E_?| z&j%D7fAB}g)=T(%nf8sMa-t=!y&3Svj^8X!bOk24WuY&d8$p3#om=2^ z9mag?)0!70`Grq8!=JMk$7BNCprtz={ggljdq^W=r_@kL-;ofs4vnm~XXzA2xJ`kZ zu}VtHw;`{UO0o(2iJSZ~P2qMeY>4(JiNoLRB%ZQLl zgD*37!|C@51|F3zTLk-9Jjfn=Wj{yx$=5KJ!$2_Cf}gm!KRJq>1~ktFwx6(c>@iQv zw=m1KxZ~a}lVOkU5sx`xx%=joMM($i@4uk<3-J>8dL4S)!D1e`+a_BHcmx!;&E^ z=WLZlUw`;pGOIY7Crw6OwKDO)uFF`2k?{OGDK(YXj?Jr#z-vUNiTQwDoU3;C4AceF zLiTTl5@UQ_^TNP9io>pq1pE6t%p+_Yh&t%pFR}_~1GB+WwWD|74}6y(z)#m{n^cju z63|yk)Px-gQ41-qqS{Kv*HVAY;AVDyw|wx&c%nG>P}fH56QXy9p4bPh_q`a*wUo>; zN|RLWgXVjEnA)ma@WZpky63=?*MK+5AW#wXQ^}lhHk|FssQ4&&@U^Z%J_OV$QFZVN z#XpaC;_wJvN~paPj>ZyBKaKfjehvvy^CL67cPKI!^i(t)CxLzZWc|7vwDIQ!3v!w) zcb|sEi@vX(Y;5{7#)VLQn$Cb#V;W*zLn4a(MQ-exR%13@J}@(P@wiO7A^|J(4j*A7 zjD3(UnA;{=oI^Z)Ww;5uzix{f4ZRxrwn1Js;#CW0FL8$50uSU%|4P9R`F%I+ic!4n zRab+SMi+4pRBGr+yjd;9RI-MACnz$!!&j)fo%Y2(u16iP62%+>p=W2}p+bGl####! zqC~R6&c3p~Ms}0M-kW|Z*|Kt(7SHglxpSKgrFus}X?N`ty0{?2OdiNhldjBK2J--Z zxntW?p50OyZpI#aowCQQn^{Wi3JTHcCPyAI{y=*|4)=N%ZRz`GkCaMi?0+^k-b>l1 zh>^z9BUnzxiVX+XJkI|nDFf=|4x5?^GmZ=QSo}|4Dqv(7P}+`*&El1_5}y0%PwvT! zgX)lE*5-NuV8)1g^+P-E`sRV!FlA!fHKq*pAjz1@ESP{SC`U{Po5YgXk1#5Ea}xHy zvM9wLsWtygqpbW`kEB!Kj@HujJ+)TVddVu7Tk^^$jjDa@OEE>!s7nrVSX|oe4kcee zw7yQB0ehzSWyvt+;UlNa=q!pdllQV%-Dd60Yl%M;#3>i~0WUA#EW|Iyr7!vjTwRXI zCr-+AV!laX?_gdPv*6Z^-KN(j((B%vLD}E!=bTwgrW4A!fYkYMGzB6DY0(%PU^uh$ zsMVcp*f+h~gb?CKM!m-)cB3E1wwzSmAE{jiyxrd0v%zFwC zejO7l9iq$2b?d6Uvf$YRm~TG|S>3bZV9J@|S_J@`E9seG!FAHnoBLuva40rLxnxo1 zrVL4-o+sb(FFoef&&2P+ZqX5p@cnzydo^?x1L1|`}-|42~6+H48huTQd(};6Byaz+L?<@2;Q3M|@)g*x6bj%ze$qUrXlJ2a&TGGASd9SUY(?R)UI z&K=sDgA0l%?DKPsMeNPEtwdxK8jJocu_w^JN~0-|INu8%Uwjkvh~U0zp|D^~h0*%n8;#pTa4leaA_}W#V>@m!rV7s8iTS%E#Qb6J(u$+y?YfCMY zhDCBl%F+2qfik&C^eyXF;$Y}0=HQ;MD_1tVOQ}hCNe5zgg4LWsGJfSX%Ab+emF3+V zwe{U@miJ!c9(zf;)2y4o3&)R9wK0Bf3o}V`ki&+F57Q{U3c5Y>CEelH>6;Yb1@?>y zH~WPCjb!f(mk6BF*Hw`I^G57ED&R@nk$?#hi+W=hQR-7km`3O$t0Gp zD%L+H^`5=nN~PZ_z_~w0=)k$HmTJoCM@dI_VntcYV8A);E)SqHx%L{#WF3P1j4>R* zjjBSTTMYTGX7q=xE>RU`C>2M!#M^nCtF2n{)RGBeAQI%<@?~3VFP|E;X9q|PwasR= z)Uak7(j~)%^UNUgdI!PCBLIBioLN({y>%)uIq=rrlFW5B%>(!Q;Qiz(8-4mg$sAoG z1fkgx@D8+9YjZ96Sa2g6V@))TC)M+~J9jgp3c-$sd!pNB=ejR>&r1&@DM&yqpZ|46ow%_K#pZ)%icJzJbA8GG5 z2)DTN-r`8!-x2ry?;GS!HKV9Ietvl-tY@1eO)-MiS6_!uOH%Yz*pxRL@&cV0-jRK{ zTHBeK`uP;s$(vrx=lr=eNcztY5sRkAKNJ~&0N$fSFj#=|OpA}H0S%xoPC2I-wf+tG z%z|_68OVJV_qMwpK|;Qw^;l!SlkFAOA~=|xCp-Wq;a5owSsv|H!d7d|b)YWcHGFcK`T*?2u#gdyMgf226pto| zt@aZrCU2g;3+N!>G_ml#g@Pqo>@&OcF-B)o(qA*zZ%(ao(RHNc^QK$hbIr*SEHZyk z5uY_?ST;hbwC!Sojg$kanK~<@@wo8;*x0p_)ea)lckLI<)5!J?(LAC#D4Ep4Jl?ql z-yx++U8#p~zRWc(CzJ#z{~`1?)pfqx-|mZ{@K4Jm;@`}*VSts*#<=(xIh4nSrRMJ7 z-}@KSpQ>=?5IJC@9I`eW1M!zsr};>{Sw-FWnBV#VB3(k#G_aK1n?a_Gx^rLNWm;SY z$E%omRgXP_g+ZDG}wfkqcqkKmSUtiLjGBDf$by zyV0(w8S%iT?J9`yK<%rxjgsF4li92XtnM$ot$q-vYY@gKUnWEAE>`T172T2kx>*hJ zeN&W&FE+HbE~r&R=kxgGK{YkNQzM`rpNG-a)Z4@}@vI&zvphnJAVTN!3^qFggy0s3 zipNpchlVkA%$Ol>T_-iYJJIov{p8x74jMtaxgyIXkHveC+fN2fZ@;_6dhbBvNaS0G z&PM8I;o>##_063b*w33MQ_eledhwn>x3hGu8pO-e%Ad|;6wL(wtmP2#Rr0+!$Tq1+ zoXFx|a4#NN7_2$Jn!Q`b7lTVxpQte|16@X(RgpvgY&AcT*Xv0AvwrNl(BLthcR#rZ z{o!Vza87=~e0))G&4n4)Yma@9;;#Yl>rBce^)`iBGxuvjf@Cp2S|;aK$Mze9?L< zVh;kNQRxx^3?bapigw!R?J6s|TSLk2a}Is~IVX>pd}6L%pIVgTWv*6W8*I zS_`c!fW`OhBU$%+RZ(cyE!t|z2a=3gKFi~{NIb6%YB5NxI-jY{UUQlB64^6Crb$d} zpr`yi_nFBmVqWY(H!s*`BfJeFaaDG)X1`}|KH>H2l^Ux$w&vZ@ruJ*v{*l%h1zV(_ zaSZxE{P4C>!_=VMyhHgqtn&Fy43+)Kdzrf1#as8KBX@?I4*Drd`?fU&rk){@z!X%~ z32Qqa`p;4!ExcW!Se#O(C$RvR^CHa+*c_Y^6rngOf z{P3jUp3M<0Q{;yKrzd#n@CH8#vSh*j!%tw|jh|z8_$2eTg882OTy%c4Pw;QF0$oH* z-uI*Y`)nPpdI5zUH2>+Q``aBR$F8T$bx5`9>6jHkHdQj7pj=AS^DpJORzv?sfQ>d-eX9^rr(vjB=tX zlRSV_;}@(W-vpu7&-d?6iVJ>V;nVv7J5ddbNg4PVqK>k^aFeNzWS&PCNtqoj-!9Zz zzciXt+1ETZ9DYGEt>xZH(VNL6 z83j%tKI`eo^dK4G!bPxO4Fje`U@sJu*Bq1}If{E{UknnpqHo-zs6MV^=N$=%KlVsMes-0C0}cj2)DEhFsg3hCk`vtbZ7jp)T>s zS%0oCkg^*l=%0JPI?TTsUo^ud0mjA2vMVPf`nZQAhnbkJN6LsBw4ec!{*z@*+8%Md zZ#ncl!xAza3`E%Nj?>8BpK9ot4fsQN@jDvdgxM7juGXksH;c3v=@6LF6DE!`*_Wr7 z&2=G}e9Bw8qQzmkEp5ACTQ*y{+?4TNd1ErtHQe8@`gzX-35K$%VP{Z5VT)Nd5_+J}dL>*U(SE-VYcX6Q} z9JnfRhx%*dDPi-vj>+R3NQdg?f`7;An>P^ZfBtfsC(qtgvwl3120BX1Xa^Hqn#_pb z9#{l;fG>;Rm7T)I4u9NEXYK#ewHU0xxs#sJ)xKxsQ-3@<gZ?so&Jlg$|7OIw1sx<2*~ zx(!Ke6!^FHTA>FILI-6WgwTOI<_~ZgD?AyHz(A^79^d(Lj}Nv8ZFClR0!lg?CYLo7)BzAQ=?mhZVQ2fS62sU?XF!=~`t$NU%YHk9a z`F*3!VqJg!AM0jti7Rq=G@-2Z;zP2q^$V-sK4jDBZ#53c{`jS!hkaG5oi&-&|2%#y z_j(a?n>l~lIz`n!*6AF@%=87Ifk+@QBM(%>2R#M@$_X8Hq`7-Kw%=tKSnohMz36m6>jIXq} zFF(cP=S~X2CDQOS@D}bTqcF*K(HD!(u9J^CUNNk)MU1hj(bRLws zoC7|*6S5PCC3gwBQaZ;1bm4dA&A!LI!)oL{J=vuRBEyi~UY++?Z; zv{v|PbpW0{6g;m~ZEjN4i8|BudQT*x+jFF6L+xUD!lJA34&v=_8X#0{4=L#0L?ft! zOlRMl83g51wd#g7T7KGDFL0eNFW7a?Oe<2SO>Fpy$M%DgN)w(<5c>za%We~2#P;&G zF|zP%ece+^n6w7gB;I~+hLrw(m9-Q*&#ono)M7}T!);3~CKg6Qh?Bj}=gN7S9qB!y zhx$B&M$H@oYqPD#@;{oU^GVJ$JM&li)K$2 z?q?BKL;>%wypuLbYSX$YjreMr%c@F?<6N9lVM8A1fo~cAq=!}PD@QYNF2<4jL&egV z+-9V}>%b%5qjY}>r~d8PV;_jUM5%?=G2K+K@*|h z9WRMCw9V)yHLbJQVftG1>Lyc`#>4Ofr|wAjzxY=UP+UV;wK#B(`Aad^Xczwl5$ zw6e&jRWx6Q+&V=R=wXh=FJ)Ng*}gwsHZ%@<`A*r@>nPQ}=2V#f@9oO>zBYF<%=Y(p zq1(NH(5>;2XbZDF&Mm4vn$-s5eVp{awVzw{$M8S(=ZnoIX=2F)v1fC1hK^0baCHqM zkt=?cIn{aM56u!{jF*ntXvy*(E{G5d8_kF|>iF`C3h}kIhvT{h>-K1X-JLtN zX6-Th#?)KDKX%TPF&V1owz%Ww!QV)o#E@ID_3g1OkLIV zpvZmk_j9zEbALBB=Dy!vjE94ojsKV|qx-4=armoNuNF;7Y)bnhYgJvlhZ4-cYs-S0 zUy0%>GbKL7b**q*JdB_!I`o(pJ2#EZ&xVB7YFvN7B(&xqO++{Z(^`ioBs0p?1yi&hS}0BD=qr(NF{pe5l)p!@IVc?d$#`z_IqzXp>-t=J;`6tlx3hPI=Iw z=b!RyJUE4r0eZ;WRc37(MCJBNGJmK0#5wc5S<6Txm_s%Uup%8GGMPu9e$eOJm>SSx^dno^lBBYXNFP- z7l-s88HRh*x^_d~WUVfE_#bi24SK&UeSg22c`9c&_?tAhKl8SapICOm$V1qz`!_xC zEFCYr%VSiRcT&USTfkTH)q9F8=#knxq_|D_pb3_{f9mWw`@7uT<4$+a71wxrbI9hR z9VE4mOtrLE4Zb|+dvux5eIS1J68Gusq3mXJ<^{E}j;1Ev2& zeQcNXGyl66Qcq-dePz(j{eH#IGnD+q&4w%6A8f9AVM^M?4Y1#_g=4f$82-FqIsXK?R->cB}mYzMpEH&kXPrHDHGc=BwD ztmOh!eTVDxl}jg?O3VY6GWi=fhSGY<6Y_?b4sNXT&NfE=uNl|u^X-g_?b0PE~s zI35{N827+*GP!sP+z(tNaZ&VFDUf9)5hL2VGsL-*J*qf1`aP>rg=BiRKqYwBCp;-yJ8xg0=Xl{ANlPW^uZ>#oNREt7Qz%X+ zt!64R-(ff=Y$9%-&%@1_(9>tX%%KUZsLE$7c$mmh^37SMj(5G@zalh8KXXww ze(8Bjb`R=6?L;p&%BdprDe2o^=f@(@H=w#WPwpp34U-+vrG{=Yzkc$5DLPs@Ghc2oiqT@m12W0=h_fkMwvYC{rt=NVNg znX<{_?^d5~j>>Gq178}F4x<$BZv_v&C#U#MHS58P z&gsN68Z%S1q_-`NPNt_UcyUqpU3>jrR?ZCC)R~VhmizU!miug9Yb$wm*P09{3`}nP zDl%idr`a9L|0{DI_7;vx&KhVoQq$WbjkD-14Sk>w5#sjkibV*&jZpmjyP)b#io9ZvnfMRm{GSzWFpd$op(Ey=7%V+_&?%?hLsveJ>=kw2;W5Pq5BivY~~@w`okq zxpV3y2bdPNb)>+oWebE3@AXrSu=uP&&y4c^MI=O@pbw7{PVFQ^_j5JBLJQA1yDdCv zc8$0*v@pAiS;MRc6}4tE-$%lH_=sXJE~=+EZ2Oe<*2)7Se$zIJs~;4W#Z2Vy_tfgU zn3&Jt>@cO!RvD$}hcEXobI+a|HhWa>DsLYoA4F!*?Fsf9Ub6JaW!XkO!||I+PAbx* z)lKMGQ1Y(555>INd-$`&;A!oN>RNRlmG|0|m*o*SxX8q^+P6NS*kG&JqdAkOgitb? z2$kb|P&pb*NHDlIDHs~hzsP)0Bl!Y6&lFQ!{XCNPxn3$FWeRVO07|cIG~!EDu(A33 z?UkFu+5W=gSAUzmPdYn8NfmG79;-#S?_S5+z4S3cONV6)npG0~@1vB_e3(~CR1iQlMC%Eu>g-+$&!*`=wlM!U56f9DO8 zfmco&*rq9$?1owj=&@GbL(-`#7s?C;K!$dVbA8`2*~=_@04L zg;(vgjqQ{vPl?{y8wcM!x@fM0m$PLKx$BCySz*N5I{8@33KQnv6489XK?+ff}H!`p>=kC~Ci| zks$`*pFMPUPD>LaNuS^O@LWPYpe&!Hx{0cL!Kez0d|GrC$~;FA@@{B1Y(J!Z?@tl` zhQo(38ZIcC2|w~&Eh+&UH_KF@37P_yV0A1ZXYM+CPnYvGU%N1;($t>e*ROjBm*|3lb! zhqLwneJ8Q0RjVa7wTiYBu}f>#s=cYI8i|P39-&lQVpLJJtM=YO5wvEF7O_Goq4wVE z$@iJ}b>H{z_xyFvAGxlaoOAN|yx*_)YmBvYTaVN|mC{rkZK>E)(rgZ?-7895XtbMh z*$A%VeiEGpzb*4*I+O#)Rpe4%n+|jyco>YmNfiE<<;F>gKX~zahmR_q!8i^Pwk@J)XR(t%{g0@Sisp#K378)NhGHW5p_1pe+fw!IdUj6Ik zrJ7t>;hzPjX2cn`BFrxLW#L5l)!xd&xN1>IT(_6sxl-_suUx%cD=FRA1s7o*L~r-4 zX!{TPW6Su@`YP&UoK;0r5V4l`T;fJqk#XF=+<*J;?7AS}@=ncpi8kpY0{zRsDmn^$ zMs}j>^L|&2-wb(JHgc@O^e797Vg|%W?~;CTeM`3F++-)OBC5ggXy&2d53eL_O4Z5_ z3tqbc&(Icj;h%d2i7LQnS!90?E$a_fG!nP^{@v?T`j5t9NGGu|qcp4yN&cY*UAv~z z{mx=)@ZI+0&R8rXcZ1W}ZQGe&oLND;f+YIEU6;!Jbh@X<)@PqynjL+*b-TFg*`>0l zf_PG{Pp0t|m>;$a(GR~6y19M}CCjPOT~6~=`K%!E$;~g|AF;V$ZSJgsJE39qOh}^! z_x6OBWt(?_LRZ{@Tl3E5uky|Y`-~tiNr>e5i0TLiK8|bT;L7$1xr4gqt&MTdn4`-u z1-pjk>4ps%IR}ok7l>F8>2+-&)kp6^h)L|$S(&$)Yf0e~H3$9yrI*Ao$(BQEyw%z0 zjWu(8x*k4FjICd~3?JHxW_Ez46Ma5gR~ylzN57gt-qf>v%F-X=z8xF6O_`f;tOdtaP|LL3m8BB-2Cdz$!&|e{UyJ?z1aPyj_%rqMUk@keT%Y?!QT7* zl0B6&W%tUUYmfW;;O$^P4e1UnfRZ7XfOLDIs& z>>%}-E$!F0`t`z^foXTWvwpJzy~B`}8V}X1^*f`_8i%~smzy5U)*4i`rMoBsUXyW~ zPAhDgTc-@BV%zMK4>nSv!2!=)NiIn9<p9 z)8r_Ho;_aseShT5wVo7o28-})RmosItb`U&T7or>+qAT5 zn*{m@W~DKeoZt?0u;!*S4*Ow;DHY{a8wzeDhJz#Wn`O%dS*`r!^gZ>1oKj>J#*qHO zC5ZfyU44*C^_6&v+X?k>3<-eTW4gvAju`4CtSakZm8Be9N;FcLC;B(gO;kJgdt96z z)Q4V$loSzTeB_D#h!#54;ZYxHEMNzuKo4=BU>=>)3vafCbApdL$%Apw1VgV z;^zLZJ3hDMo^2HNRUISCz~Ow-e2U3`8O)AOciblXlRvO(MSdgp|9v577J8(g13wLA2niG7-14lxIB;Z*uE zK2P4C+*zg?pwFJ-UQ@8#Yb)e#cvJw`&ulm(5im&!lk;ECUV=f#0}cp;nu!Q&LPb={f2rrcC^IltQ0U`sH0I_6JqvZ zV+RsT@psUhX$pVqhU7$prF+m(Yt!6IcZMFAdHhY}^s%nD2L=On$IFaR6L?UXx(oCh zg}g7dT3(nAovoZ!6c@D^wROGGw^OLH>%t(9x#kr%O)lP<=7C!g1z2pKsRb9{<{ers zY`z}H8WG??x}tGJg)+>#-h-+Vx`r*wd2DH!Y7+&)78whr-Ej;jdZYCP9IoF?hhZ9t z9`e&~XgL;%32nUUZ!GYw!Rjq?f57M;mfODn`k!V4q=2=Ohl9L2@AWZ;SqH@}UtD++ z4XJ)>&mH1QSPp;oCt)iDrpbG&Kr!sf#0&}eyDH0pH*(esV2>d0SaWYp+JJ}L*EP?=~c{N!MfERrE}w-<{>fx zFZ4wP?E8iheO2GRx_rA>x-BGmms_o!hgzTH4D3DT7?!l27a&}dxASa1#7@V~ypr+O z$qZg7iNxv~@^Rs~7e_(|fMIg6$NJM%wySe?gFp6c9@`ltQ)M+ipm2UVIZA&D2^LSM zCwp>$$ZWmu`#ofbOT}2?@ZHg3f`{x!l}?`zuhKV`CmLQFqZ*>hSK6Mjy^dW8@7;+| zlOJl7tG8hMk?5R;O&R4$j^3);*_JveDfbKYSjNoDTQ?M8S^F|hmTFARO6p`bcWpRS zM8bgHad1Uv*e;jh*IgoN${=mn*&EB`pCBu6|QZ4^ggc zwaa-T<(-#g)#BQBOhV*ZSYkw>0%Gy)K!$=j z&r4HEsooY<`N0aJ-=ExqMjRJs_H?ObO-Jx(qx-l-Akn^rq;QR>`+;MC1o(g;{Z(+P zN$0=W=HNRbbC``kX?xE#({Z(rS+{Awe`OCn8}hKI@v9+A9_S8oxW_;%ZzuI zy0vU+4{4`t*5b8s@!Crdlmn3L*vS1_Y_kYOIHmLLwBGtb!pt8uGkbARuPN~vmQhpM zFg4UgxE0Uv;Ni&G=d)Cq6^^Jw3<2R@c>-ef{8?5jF;nd^%oEwqY$5RMdA*trhzs%o z`u(g^?)pKVPX5(6ZQTkEI@D_67#dLd^9_}}O`vHjVL5I0KI8j}+4>&Nm%UmL-1Egq z24pSM=a`MvhaQrxBWj}(t==f>j$`3$e#^|Pg;&bn!)CXc@J`b!OX`hXS1<3?YG%px z2A-!&FrP7Py;F$_iA?)_^i@SRd4c4h+gJH})5um;95aNQBG_%+zNw=N_M?8`WIu9I zY5vI{Meel2yC0W>nlg`BU+SiNtZOpNeW%?C()l!cGC?=&epYT3=xJA{bdo|XHZFC~ z%corL^9Oh1u4T(32_d94a%AQ3^WojYqCDRw=CcPZ-OPL}hS%U{&mN%wPt+-SwP))Q zVWnjp?$90FDmrJe4H13suFhGaVFud8YF8Vn65c!A?5Kc(11BU=KFj_bwaQ zIrs6$xPWa)x`Dkx<}y2g-a_l@h}&dPA!)71g==^QJi5dcdmI=&92XIZYxb=Wx2hE; zB7Qo5U}=i})dxhon?*;G^nolD6*R8NcZ*By@rM#xHjZUk=oIrHEw?g`C`{8&lYv?l zrmX2E-=;E*#d)5be)*&v+=RMumv1q6VcG9^>t|s^+rKWW{_p$N#aLtM-!#yU&hc+T zpps6bhV;U^P@WrRDIzMyuKur;#3?42hEk?cm*0Z>m?*=xP zVxL_M%naR9boP5B{NS&=+T9OAs|zf3)a{>~Y6=LJTLtwt@roK`JmfuR9ux*_zC>OM^yt$S{!x%)j%Yi*j>%K0Ks zN66b8cqKT>8(foIGvuF>73h6QH3*K`xFsntO*6uHshg}78q4&1F`!h+lHONZB)RzO zWMYx-DAU^9yS-$s zWilUsYp`h5Zh2?Tt^$g-RNv}#_Q}Gs4fV=3_THjK=-o_lQEKWl%Ku$e+{FPI$Mrr7 z`|h&()@iB!BEZh9w5%#P;Q8?&EY~H!V7IHz%+JKe$9)_-<$XG6A<1JtD$}Wf>Zz*0 zTfE>WutQ5VS$;(wdwaxsW4`&*6t8dp3g8%()P$>up%1L~GIDcQl+vRYs7B&5qZj>^ z_ewX6vKlr7Ql6I03uW9DF@Qdkoi%7&Y~RX1Rw!+1ngTeYN9}@ke#~j5-F13{;#81{ zctiSjW1_0lXA|d2)IZW3(XvYmFFwuHADj=eM#CN9RS9bwSyv}n8^pvc8s{QzHn5|h zz2AJK;hWR$IBritK|x+-=Uv~ttJcSZA47fq`wEj-1kL?1!h}u#kQ&cocg69PvP~8i z7gI}8S6TLuJHueGWAIRLF-T=6QUs$RBPmnizcFjg*1^@97ys6Cu%vn#*3;k_Ty_U3 z(b1el(*<0lOn93Xod!>@6YU$;H5B3OlAz;(eO$1aey@GZ)M2moUNsmN zr}D&c7mraS z3EJ>%SIN^J{~3zmw_m1me;4~AmgJq6cf2tTMyp(Y)A2l~bSWa>HJ6*@{kWSs26@g0 zzdyUk<{5>}s$ZE(Jdsa-_}46fi(X%C^-&fGb+h351wk1WO>rSM5}-!maD7kb;$wl* zX-|`)J%!V=i!XICx;dL3H)U6Zp!e{#nkqH5!nv`fx*gXMv?+p9_dJ7O;(qL^mGV?@ zCA;GL<-XDdS32`7T^&{~ArN%e=XuL)18}2gRP0Z-;OJ9e(+bCt2G4os+^ABUB#k>1 zcMRspRg8#$3wT=Pojw}uJ#=4KnIq<;5;NO0Q0G|gN3KxYe~lsa zu2<=B&FOF=A;8Z4+W0gjA+h;nDm5+$(kB7x92SHzA9Wu(B!I$Q$FnpI9L@N-^iW| z=OEgH-TqS7&CT(3AEvnS@0%~`f0{4m5!^XXt~zo+`?BL|I7fY|=b4oB2vOMJL2Ofo z$>Cuh<*Pj4MQsD{fFPyX!wQEgN4ii1EoY3Od>B9CwNu?+$6t{=dXjGyhr zT8Sy$f_s(D-kaQKE&YpEm?jquvgwKY>J9dC$~g0(UYvgl`4SlW*G`p;=Zis{$J(@* zf=AFYbr8cT>Yn$K$)s+D$rLZdE|w<#adNavzS^Bb zLim%>h6jdEQR&h&%jVM*o~F}piJb!W74^Y5Kbc5CU)ZR%6ZtlP;&8pf_^oPP-XR^A z8^i58YuWNtf#cDLa6wgZRs9GiGz$5oHdspn!wI57zCPTjDfOmMa^jRC`Tk&ehI*vI z_@oZ-s#ruWn=#yuUd3@O@5 zHb;(A;7=VwEKUqlWxE$-vu7dBNi{RB?v!J|N#=S(R?~(f;@ZsDQ+moL*CiM&)Q*SC z-Ch%`>Hf@Ka8%#O)m=Qeh}Agj7c3gP6@ps(guThb2zZygPtVhP{41mG+IL7k*g21I zz`M>n_Z}V<`8aEy- zy;XL8CT*cm#_zC6H!yCLK|!5sA39*3qL28rItxBvGmProm_5@WfS{GSLQ7L~4<@ao zK#j$L6`A*Wyd;&(XYzuD%AlBu$<o-M;|ltMlku#m`Jikm zqqFU~dW^gCd@4_()xP7|SFt6AzP_=pzeONe0F1mp5^E`8xJeS5 z%hCko0*-G?f5qIzu)gi{rEpK)V$QV0o|6YhwQL7P2H%3o1^yo|fOBos*kAv((fcF$ zBB6giC4d4>xmIwzy6%v=*<9r!l^fcPwLnL?X@KLR&D!t0=$U}EGF~>0wxQ$P54*dF zvc%7T&gw^=K(BKns_xAhVuo;vJ>5~_5iP?}T}7}sZm4DFh_8*mmr(Dw^JmUy;Zk7} z-T8bl{bq>4Z!<-3(<8z2nSTX`*n~xE68ki^AsgA)j|1v5jP8%_Q+7ToybT8B<>fg# zC7{$0cw+A_zgRObHHR8+1be-%Z44ap@osID?`VYwc%vKFmh(`?qeDy5fumUma!L9& z3oK#N=aHs|gHfg+VO3y_SpL$aDaaP{3BbgB4>qIGXhC`%?#DTzam3i3-#WFpSwMSf zH}!pqrPingL)tOCas45nY5pM)0c7nV8GFPrJ443QGlbQRWyO$VHf|E)1J`$`!)~GM z_-y1K@b2X=O&PYM+Aq9g481Wor`|0nj0HVRrLv^BHpwSk?;2c2D2#m?mK=6b1D(Kl zMAo`fUf@bmrzq+~^Fq|wVYo_ac-X#E%G^Xf^c8UU;E7gC&TDTEG)~T$MI;zNubZ^s zcGAIaz2jI$3r=-rav@j^*-RgSV0?DmU8FM&v+=b+>zHEH}&#A!t zacGPHm;zc_kdlOu>BihapS3Y7p8T}^Daq`5?O?*PU2qME^tMr}5^}&SOb|&h0`l9K z=LG=w0;o%(r<5|Pmvx!KvKb@SVOP{C|p$>Aoxm3BAbVpNA(mT zMvxY^$-xfC=YaG;rkDo+2f6*^%a5lg%varz0!3s@9VHGYAZ(&xEs9LMrg=l6@(@v6 zH}=W8VW_em;Z>Kg*)6S^R<}e?{WZyTU`hKm-RDH7@RHIveXHiggBZYAELfXFv+UKX zI96^gkKWNrfyV6mbH?MICg*Zl8tLj}pCT1-2HDT!T5~8TcJ0$?zZBMhClCWDqbM6X zv?`(8o)33B8{qEqD41JfC~KPyx3QI{=_<0tYQ28BURsT5GIByJRtf~UQt$r87G<)O z%PRdjB3*7wak)WBu?~{1*|ZO?xre^Yr;bg`WdOu6m3wjh`M&b7G|n*2v&H@<;&<@* z^Ey-A1)~(g1Z89oSl;jgTSK8p8c;r&tun{3{u~9+)I<$Zt-ajCHdV$J7q4}GLnQ># z%3TB4HCA_Bo@zCUTUt3aT4~PggY`#!dj)ezMacF6(-?PEc(FmztjymE26)B1PV?2} z8Fb}$9=5^Lp5wmT$AL_x>7E~~VU{|HOq)_6SHSV(nFjy8=Ye|zvYmndAaL7r?J{0} z9u6!c;VMU<5u{Ho*#v*BK;ulv`2zQJ-MF1QaprY>TREX$yuQ1c`#-|N{~5GqivrQ% zBx3#PavwuL6AuNMt@HpML{DYPw3vvAzQjntMAxf14vy4_zNaGzYrO;Cr7qSk(2mm| zetJ;v$&bH=noqsJiNo^04B<2a78+BFjSGG|9bfCz^5konP+N$UOJ26Gs9d%yt1S<| zxA=lxNT|mTni-1zIyGx+Q3f%6A=8roxTmh4lpWqFZfoi8k4OeQd9KHn-FzEg%_A;9 zYi$DEQ!b=N=BV+I&kb8=zo=2|lz&jc&XXv!;j)@3R?(M&6G85s>n0s8O0fX@?$W2Y zk%SZ@ry1I15Q>b?w;qRajNG6Lp5XDT)!{gb#uwE;nwr}(;VYQ>e}^bTTkl-Ra%1Ghl8cnlHGOINSpwKxze=})2WGA zzirPmcW8&AHJV8@C7Q{y!7@SuxOt9iwc{Ojc2@z{;X^;--*9}afu%awj%TLIm^iqP zzQr+!!Y7N_12;BwrmJ-}OKD$W^}R>bRaf6Sj2+&u!#*NV6racQf}+AdpuZ}!9(TSY zujQ-dI;ra;YiC~^O)EWVz6^l=DZv9OBKO?ZGuTQ>O}syjm{n?ckLC)9A%6v*k#0zD z{twVlp@-&s zWQ>HS-&0R7m)(^Z%dfNXO59QthFROYh~2*&WXDJJBgo`PrN>z3bruxK3rx~SSZ6zH z(ptNwp4;KC3Po$96B#H>LLU@guup&aG3N84t>dMeRWoJ40k_0)iTk4ZbdmpO$>lML zFkS~QF;rt(xhduMyArD7N%kX61F*_G?r~Cds-zG*kIN^X$q?X|27FO9xUuk-L2n&n z_yZEsFMyApgB8^TT{n+zQPbP*R?9$3vkAJW3_zl)z|@#Ypi;$5 zSz&GOh)XsVJ9jUGCUnW|z?vI}B~Tl#7x(qkm4C}0?SZC%s=YIZt+mI04?Y7_x_=Ur zS*a5tCc(>R;X9kyPj;EcuKjsl1ZALW*^npB^hHLX^Avcc*=XoKuXNael+ZGn3nx6d z5jmY`U^ekYDG@^Ue5SVimR+%n4&Hsy%PAHJ!}J^nUb8*sBqKc|rZul;25wS03R011 zhuqKRaO4&lipE>+9I;lS@LqDwC!FY@lifKh{@PtySsX*hL(nYTpJZJQ09!`4A=qf18CZ5t&NH^}7h)#q#Al)Ik&VaiuwA^TN_9N-s*+# zj9noEoVDXy%YOByaYhD9Us(YX0L!=pLC{aIRDd!p`Kf68>y;FmQCoYS8;R8Kj(69R z4oAINcw=l)Hn0@E{yMP=E$CMgDP$l`=veg)XnTQdR(T}|7@@CT{n1#PuB?61he0Uiv{gN}lCyZZD~)>nOhvRB zi+DKy+^XbmFrQrg-wikGI|Wl+j|ZztJbT&T{dDE&H^LDKfTcB0(>L+2ZSQlbQPd~t zuJVU9$ujFmncu!fG5xcDcm33z8YB?(3YJ%Pp4}yLyR;4#F4dV&u*k=lHO}dDZz9TR z2dbYo*23L^FJ<~y9{%lje2q{m>`n>~nOM*aUb$uY8GZPRIH2t2N#` zO2(nW@OQPVlcscvz){7F?q{eH)N8y7Ba-pY4pBy0M_d&qd|UT20GQLm4GuMRjk)A& zM;1m#Iq{i+o!n`+Eh>5B7R<`5SXM9k%v+yIxeSh;-grukg{RzrOKt#(U=E=#=`h;< zoKIE0&)(|?3viprCfB^s1-AMb6pmhb|4ZZx!-5uOhR^vJJ(f!QD0JDErKh9z0*Y@{ ziWvRFo|Z_QB+;k6N8^1qrdVVIyg>0=Zy2nJGf0J^Oer9&r#~O68+CX`WskAgGY7S2 zq{8Xpb#3*^|AyM|oC(wr>C;2swYIa|M}`w!4`>{KHI?&3@DN{vJ&=Pz+iid#gW9L& zo8fdR?h657$jN4E^~NV#Vk$*-+(8vRA%`mOJyO!u)0ja!V=koinT56Po`Vh6ZN>~X zPe(O{$8KIHcB4dtRd<5i)3>6T->xBp6KO~lx?aCw= z#Nezg-?g0I8D_nVUT_&yYZYoN=1N&3424S2XS>DE{VsL&_c@3;kLO=SB7ZQ^n@n6B zC0hjolfzP;Pcx%@yS^$CF5D<{n3idj2Y*dSwA~tN-CN*mG^{2Yxc5;~JgIvF#U{G( zqg&KtDaqfb+u;4;+ujc)qEf0_bC_v%yl$WPm;ut^b2fVpRc}q5*0w_|*fff?-;Fe? zysZked8%E-nUeeB;FLX0((nG!ijJ$GmT>&I^_|+>Rq^5U#oiZ3?=Zi<9hH=70Fo-> z7u`8{EnZK%UXFg!P!_z-5&|fJXY4mk%Omr}-N=U?n#{Tc_&)-dRPa z23<6Yp6w|Su-dU-Qe_E~4_Ca3Hf{N2=j^jY=^OE#Yc|@ zoHQek)5^W-lU~yZKhiI2m@WP7tDZl=su1LL`2oUC|8p7G&dR-$^s1-(sDA%cYuGQ(VfhYZ zc=OybftEX|uc7{NfU>wlOpRNXjX%G1y(gtG6JL#$yPWx89rpK&$n2@xgi1DL3L7b#j&2MSGYtX{*Q@@qZH@3F;)4ZharMwcUu>0Ax7rIWp6P!q zSUaIaBiQzXH@Sg0w3iRDlbQaiofuxD5cM!w{@Avc`zvuA*!7Pw)ijaH>2bQhf|3N! zYir_CFJrGIorV6t%H+TDZtMeypkeSz!S+w`cv4uz!228lr%V|&PhudWxC!|){!!Kn zP6Z#*f@z}d$y4H0xclr?=W)%&$cY|-FmGsI*-gEtCk`q{JT}a0R^GcWJ(f*!jYc{2 z?QsE_k@;#TjrVF#xGUz9`C>Q@YV6tfbbp*#ulnxFpTxn#60G0jta8p)>VMT18Ui$i z!%9T<~_i=r`lx~4`CIdm-avE)p9mjMMlBM z7nl8}J`LF;Lzj6I@1u=BM_v{wgr4Z8UR?Tuct6)Dx}KI<)2s*LMXF^NFa7@rhOtFr z74k1bNrWah9-*x6u(Lc?}U9qoWd2`>tHI%H2KWJtI362ZbIt4!9z+Oi>tn9K+w>1vN_T81S zhA$m%u~|bk)R8jiw#?v4oCM=`6QMjjo(<&#>4`r2?QVLA1e*fD2+aKAYDK-}P#~3< z$Z(w%LuulY=O-K30&_7u{B*ljE%Z`*+dFP|y-K6JRT1BMRMr8253gyXj#Ms>yu=vp zqy^RC;@m=a(TZsm{-*<;{iUyw-DBg_&w3%!g#tpQwxB#4MTHen5sId1w!fzmnHR2^$1UaoYN8^IMH*@t}icqiXP?4eb zLElWSvjT6(fT)5p$7f3V&WwbXSDJ4aNvGwIfD0tOX^$fLdt(x%?P~J9YjMxNNX~-IO=$p6$lrv@G-QE8_R~j zDcAFdaau_Fg}=PO3JBOiu_aDlx!rt~J1oIecq`Pgs+GdEO~XlE;zom7fN~KomRpkL zQ4$tPRve|Z^~-4y{89OWv~@(IZu=XP+M&@(n-=C{6)3x{XgEEkI7m^BJ2cLV?$fLD z;~l`D^6?x=;9`QNn*Q&s^F<-*&VUwNvaOok{mplTPKpanIql285#wDbj!v4ndSWmN zVCUG@Z#lzv%GBJJB^|%$vXCt?B^Wo|cDdFzW$oGGObiB~b%?EY>WXn)&sD&MKfBEO zZ)ku2dud`z`Ugn)uoBCW<#sLd z5h-AmkZ(5p{2kAqek{#vn{^3I959mkWiIlaWByEsY__kzZmnz&mr|mKP#x21f75wsq>D4?p}HkZ&J1{5eUoO_+6pwsp2=mjcLMK>4)7hE-y0en0w8nSS<&#v}Io(38qVX?dSh?7**-o{xZqLqaL zsd@E9f2ie2PP``z@52Q<$XjAQD&JXmsA#?H%Pl5^2ImV9u4nnyw(I8}s;&(*h7L zSaY+XBV$dOLELS~8EIW@@6~6V=a5o50uy{y@aWB1iMll;6JQu_ohAHr@!o^<9Jzr- zWzp*~CFYu9q&zxgtZ;*|zxSU&aQUJ~=Bda@nX*oZq}h~IxP2n?(>*%i=P;|N#U~_% z3C3%W2(5vts$X0 zQe=j=YM&)LSh_0-^)vt3dPVrcKl&#mOjmXBI~9aiKq&U>z4m3NRVQ0irtFN0mOIm@ zQSI$VZzd9#;vvvYt-*#Hp*N1isA*2{S5|KKi(l^6{eBrxu4mk1f=Cb_`y6Us-qDa* z!K~F!<>2=b$>;X6Q3&gOV8 z@l+t|-BPVbKy0A@4h4y0%&8OUBehy+cofaS&x+x_R@X?i%wlf*$%6z{dl)i00*SKv zap+YJbnw+OM^5LqNj6Zg_?*SjK)cp$*2*j6%lswwgY!b?Z{;+cS0bovlW9-b|lBi==US8m@W@dx-^f;7*&^u?0JyB8;mgRZ)gdFvMw-%LS z$O@1o4Ky&FG;Tv2=`)f$D{zw9o%awNTEC~17*HJv_MEPV(v$^XsyebK&ZaK zM-3*zlqF9$rG!j2EzgE%`j?62*|-$qC)&PnYr%$gQBd(?Wr8SZBB$Zbe!e} z5FWVKA{uDqkC&mA^DrB%6;zR51>Z(uDq&kZ++LfF)aj!$=ysF;T@Y!ywX&NwaDb=PVU^01;L=CIJm*U=VCBS zYVUP4b@mxsyEM|e?N@}|$C?*kY-|y0AE`F4`|R9ouZ;>Zc4U$qs~2vRWb8!-NgvjPas!SB7~t7M7y z)!GBW18r4_IX_|^#Rx>CVRU%kMv@BMG4OVv(T4JI0b-5!lTRZe;nhe>8L4<-%8pdV z=?$Cr5o_)hJHDgt<@=Y>(!A-n9Isb=Drd<2{VjTG*>Ck7_1Vys94wN01!4s0?{b@o zG0Lp=U8FwS2u+^E%uo7^50QRZ{IV+SM|%7wVEC3d3$|hvxX$>8NY;d+G8ftr zNmYRnmTYp9en1ufdZbpwT_x)|jW++}BHv6kS>^4hd($O5uS_U45W@q^UyIBGF=u0a zJ9Vr4fETedId$?D{u3{*oZ8VYWRWQ}Hd`sW8&8zPR8Cwk+t9*G*8lmiN>H2(t6jI`{?_bI+XqPGSc(BoSzLxLa)d({Ec_?t~>Vjf-iUDBp=$H8V3)t z4_L{g!!eA~|7%9B1mYk1g#Zmty945?hF9Gc7K3+~cUq2D^ zv%kNF$n;G&qH93+mP~`-X_Cv`YK^RPM5_B+oH@F4VuAu)XXIC(YFtYtCQF$%>jJ_d zRcqUC-QlQTYZ#r7hhoc_Z&(&EAR)zUb7o>Z`F}%XZe|zg1%g>P$twOlzxRKN7LL*8 zBnnsFIl~2v5(PZ>GN2Km(c>zmwkd?ug@h4J)NMD>R@fO<qJ%c%~ z5Y&YsX2bpxZ+7_R^!yndJx10&GG3>xuhl!W3Qvr2?7YemBE?;PLZFi<(Y8781oHw; zm?@y4wzG_zP8RYdpYr1zTnt4gi=6f(mN!5pCqV(?DLvAVH<>o zUVr;dCws0LFH!3Q(k)GpYjd3>@#|yx29SOjHC+RO883oQ%Mod`%*QJ{e(HXv*wJWe z=aYCfTe`%x73>x(=@FOVCwLNgl*8_`1rOMvRFz>L<)73(UvEE5C%f#?UO1i9DK3u# zz~pe$WFv5Ng48uO5nlDJvq&%uWP&%5`;{P%&voycPX^Ox^$KaU)79CXJ#rq1c5OYRM%)Lj zL7r>S6Nzre-YLsMusgk&EMM^P5@)2$Bdf!|A8uxQrZZu=DHix;Io1Ng!0w?Jxtkzb#*o>Ct-Di>igGx(;{rVJe21v%uF0os@S`XAd%@|DT?!Og zULhn~As1`*6cooS<~EFont3ZR5fNA7L=%p_m~OkUrWDb>(MzOa|4(MtoXz#0KMvLS z|35`T#Xm(u$a`aw7>?+3QV0GB>}*TmKji=RUTjL8tnJmLJ+4;<#iQ;$l7$UE<;+ICD+IFvPhri5 ze(NFsNiLRhsca_uTrHg2gTXlTwTG-AwMut1?fzG>$iSQK-kPIP?h)~&Ob+~QQ`ZUo z9ZHY2huK68pxHY~lUe{E)U4NAqAHPK-@aM_d!_7he;ap?a!&$Aw9A zaO#V1`5w>NK~GgK;6z`4HJ!eVS7{y2Z{k0=MiC$PO)!jFL?mVMXb~=!kjy#dOf}a7 z6u7ge-Q#cWWG;fk+v3Hl?V1`aBxF0*y}-tAJJb<}UW;!xz+>{=Wzx%eqJgWtgjgnq zvrM%u+8aDkX-JNXj*P=|rliB=!2Z%KhmBW=OB=rJE-=&cq#Y(0!;SEow|Cy6@$zEl z0zB(L2(D*=BD-te=WLv`<2PM{5D<1{=N4%tl}Lu`DxUon&ap?T6mrI$hqc=*o9t`z zE$UE@{q3)o{KcEnNyL|JEv~NNq5i#HQrOsMNC=u&j>7Zm4e~4IUS;}@lzlgu482=f z)uDytqtCwkIi4;EL(N@(p7F`+?KOg+1MU`UCbf6^%(XvLIFgqcf&-{K=7l+jWfdas zea)DI2b$8!K3E72kBInqo&$!dlgX!Y)(VjRRg0tw)@kgBrRF&>oBj5Sx`8TtrYz8} z9Nt3ss~{jZ`Wu3SqX$BBtsuBQb-FeM0z*2_99DaeaZe_sFks6#Q@umt(5NMaI^16H z&SSa;`+QCq5M5M8iBr?k1`|)mrcKIzX(L{M0L`JqCncn$$*u^vpI5cZ*xk7s0x3UPIU{J>#$Lr2K%3?Nrs9=qj7Fw&ZCt?JuL5}6BsUT{ z&A}FyzDH=yd2RZYI9Acx=K*UFwCP&hb}sr4rDJaZ1zPE$R_`2~37$A2y##l6M-lS)H|?I?ISS$SIe5*^VK}(Fp7PswI7Z?BI{A6ZJ&E;d(r#++2p~Qsmx_-$v#q;_1B(luAo&SbEX8aF-?ANz$ zl;*5gRh4BUQ7A?fE{T$)y_uZ4&wa-M$@B0X3o|om0SDnvzVk@n=gXg6O1yMlngC4@ zDGkflY}5KV)1I0iZUKZbn7;1Qe%=?JF9DKO$=+Ih4 zI9c2ar&IRGg!4XAGRKiDcnErN0u#C&=j?N})mOUA@Zg=%B!XdSk`TbCL02%(2f?xS z=It&>!>vW)(OSGJ%VMN|f(V?=8-sk!#+!BJ2s*}&JFz4Un9L~+N3>EuTdtnF4MGo? zT$W5C79!$PKp&2-T7QP@c;;d4G0@`d$!9=XH60D1A!B+|N)M_aD&=6~#(r|D*253{ zE^%O4xqLeoIRE|vi#cI(rf*=ZpaD@-H~0kI%O0s#R&}(b#nxU^Ma}Ei*+A_P0oVaN z5bEl#vWgXP5b-8~lS#YElfVP_B?&Tmyn>-TN?wCNLASb0O6%iV%!KD<^G`(d06EQj zTZucq8!RX7b(F(`(t^Bq8#akzNrc6GCaJ)m+8{%Q>o1}@p~w*RXqJr)x+N@|!Mo!sHu2S4(RGz73#T_iz8Yv;%(7 zqpm?)0iHF26Bc#On$+azxDz-LDUl~*s#IiX2iK%7ah_u3FArQG_i~l@MIc${>58BB z7QcBKaVM9SvTmCTkJ}!!v&h*<{H6=Hu5GXBlCr&RVQ}2H{n%X;02>-I>M;h&f?*5y zm$-3Wd|w4B>BRAY1$~)U?vDWevTn+f32QM2l{QCLmUMLkR`@bf4WYhYzG;ZcI;QRC z830@+v25|cpA|5}7aEcJp( zmo9q9GE7Dg#{T5cnp~Vc)-1eahQ27wIywH*&Ctz!Mi)=g@r~#2!8e5IOflRS|3tW* z@^pe{e75FdvrTtn{K_9k9{aQ2alw2233PkUD&H^|oov=u^@y&tBD4mx$#6%E&7pl~ zw`uoP%k6mYBAE^sn+#7?y7aJplJP!WPm`${#5mbs-U%T1boKtpVH1G7ymK=(l(uh0!3bNkMBx$rmQKaaFeiy!L&GH4G!VMe((C2O$MgmD)n0!?+t*4tk` zDy{2$`-0%JbtO_~_qxX88(7VlrQEX#CneWOnRC+N#KOBg>eu+S^)$nTuHkwZ?_4`O zgcX1NrJw0MCl%@l3WQ*8S-GIX&duba?2MY}6@?2vt6oF8rvrvOT&FB~ zi95RT|KRJag4+E0XdT>&wODaVDK0I=Ay^9(ch^$fg1cJ;YAplWg3wxdb3nH*Ahg~pXxCc(E=wEEQE))wKhq)AyUP+5h3 zV>pHaUg3u60S_+DHP$$2HLHZPvH&{h0r`tPVz6j5UT++md)dUsyLJkmW6E8;)_}|# ziF!W$j1O6#Rx>f~9g~P~UD>?jhR-Y`{qydPD~2QX^paBFyJ(o6ROaD|!p_?>-%tpfTh3+V2(T0pdkA z<_IFluJ)sFWK#Pc_E>bIId=e~4I5Fj4EPRwLcSmzTIZbM&c_NT{rP8afK$e_g9iLC zu=`M*feLhN2eAS=jhsi;(JP)|M%WA$4f&xtcb3$xlpNr^T}{OwM_+oydGgYCN_di| zv*(NT!#B7_9C!dOQv5d#iGE#KB{*Fx1^;vNdE8~;MKdR_G>>s=gwHR$Q8=cv22R|Z zN?7Emt6AvrM`_iE(sfG_VuqHIB_h1ZAmn90X5P+M0nD^to?n@-En|JZF}kMjcT0;H zCbTN=FzAf;5DxbW>EBQc?26fCcu@U3p<>+ks$z!Tqa#ZyCsARioi*&pE!v733zbs| zc?j9=T=NfXy5MM(t~#2+W$^K8$z5%*!)t`QnQEOe)NJb&QUvKu*R{%~6r_2Ywh_e1 zG0lL(_UR4u2pWpf%`N1z(yo5Io$1mvl*k$_4!XIyk@g+_6U`Bk^B2h*Y^RjU1I~1A z{cuB9^F&0o;8~m?U5h}2{(aJa$S*Db_K4qjz`5Tbd~I#*2uhlSmKEUvxmTG9?F$LN zmc4jzXu2KOHat)rdmmWf3`A{17P62bF#r8`zs04(&~1c5Bv(PKyM)X|{)1X`0yRdj zy4cPF09*_g)(E~ffJbI|m#2w)wgWXkH^py>wjTBOti9^Z?^y_W7V!Vjr zdXH`@i?tj@K=$1zrq&mc0)sc9{FOn)f>%gf8lXOt5SF_sMWv4+6fLiR`!9Kzp&nD4EsiCFFn%-{Tl(D@lt;Sh_t{$=+%9CB&zCP{(K5@qm3t z-N$o`DDyssebkd*Ci0KqJ}WL8A9VX(Yq;hzuRp0I>r*z?QngO5bIO^*-3l7LIIrGx zfg1#z$nO+l-j7UW-zsDzld`|JMq>EqPjBN!JV|M@&0s{@BIjR}JZCzj7($*+EpZre z;qrUgSDRENeCfrs#nl;92D~#R7xq_XqKzN=lgwOVnc7L62&K+f8fwrh5czTF^~LxD zKe;tYZTNvx{^?=Im0J;^)&rE&rC`~ZhAR!{En@)34Pcd|()d+Koo+(D{$xq?_RnxB zS(;dIr7@$t=LNYVHD|LfZ)hbCU4vuw{(S@BB?%knM3NB4YNxl21;{bkF2^*{@}zq; z$te34MiX6-2an}wP~tn{hLEOKR9!H@^sDnUDPtRAmp(v~u)EVZVj}}0^w=hJX^>16 zKWO&o_RdNHbL$-!^Kw@n$>S0s{2Ro=iVcmXgT8XA0#dAyEdcN9t?ja$wr3S42e~JB z+~~As#P?wFEq#oAu-;><-C;!Tn5NYYH9z&`)a}Q6u-2~fzCr2l$oOGGh>XZ@KD5 z_YE6o<6WIhHIUt0o1H{l)q+gkR)TOZ_Zh9xNV1S~!Tb!5bO9Rv2fs|?9F`0rxIRj6 zA;w1yH2n#}Fw9*N_`z+bJ9V9sS!$|p$4cM~;u;gGxak=-_X0uyzVqHW@+@%d#K0Hg zb;t*T4+d@}fiHJk6yrL$-R{;~3Tu`HTznzJOl%=-s(FMZ&w1K{@S~L!%xFEA%&LJ} zIQfh_9NJv@FH7)#5>HVdGM38a$h)CcljqUCEZLHhP-8KKNsyH{X0I)yNwB)Ko04Za zS&}z-6jq;!=bvE2Cn0)>%PXZra+iD?&c)+k{4;vDoAg1Z6Yx9q1Vpl$=E(3X+`M=a zs^h8yiMI8e*@k@2p(}|^xfE<}T61c^leMOR8@EgpGZ>VqlEx=(VS5*S`d1py;)h<9 zp%scmoPUI9o+%1#EQZ9o%uvHyYFr;m*|Ax?`a~Oh^mdQ%LQ|43;_4M_ zVrxKqdC#@b1=ndG{bEzufQNF<4JiX>fjz6Ku--<1@i{+a#^f4P-(MJ%6&VM>!muwNvHQ!7u3~bUQRzZGhS#j-K4B&1l2BwVMlEBk_){j`Rd2X zaRBZ%tx)-En3;FRkNIgK59iZb7KSmmUgC@ z+3ocCnjsqrPwk!=17jbeMt3o5hm%_=pVG)dE4Qg;y2bT;ed|UYuum7+J4%_hS1QOl z!t_>a5`Bltf=LfIi7Qd5BJ&n~lIm3ppQCzQ4-1D!KJ?csh-=fn&I|Lm_J-Ajb9W?j z;hA4SkrAM*0#F9@TZh}`M_I^6ZEO$S;n6uU!BVVoZ)a;d>bv!^U0;B`?aR`8b>Jw zL038qJfy!4c}fwAmii4%^QJ*2A`7z8CWnKk}h^(qs9Ctl?Yd z2Q{t@L@?({!o071sar9b0s@kwl+Hax=;bcBVd%5)tFgxK!xLUbAs|-cBpf8sEVZxv z1Q!pa+RZ<<$Cr)L2Gd#mPGi8DJ~NL7?y63tFUK~khUuZlRgD*X$fpdgD!EmBoJ3a&hyzPCr4C3A9avEIVUKN3kc5NE-u`p>B~Orqc4# zMtg6ye|JRZQH@K5AOfV}hTVIdHA4jBW7T1U&`ld~90hl2eXkoeg`M*N!G3(VKzY#b z`&y}Bg<9=E(;^X$XP^^aMp1Pp<>*~vbdbnXL2^B2b3t(bG=^SRsNKKf6j1B>D7`Q! z&m{Q%LeTHrwh)i#n2<&=V#(2^^TrzFwHbmMq2fV4xYnW}t|b;q)N+4s7?!AJQwQ^& zw(+$hKuxGOhEe_s>Sd3pZrv|m!=LC=O$P!;PR8}4|AQW|h93Yyd0sD(88v4s2;7Aq z}^cb=-U2;Lh}+BX=PV?d{56T(Pbq?j0quYz8{W;G!t_*e>Xd&{8O zJo3PoSuZp1pdtN#vbrN&C=*^3ZVJZQw3$91mPp~$n*%_;M%gZf5V@@!Y3I}(p*uVm z!S7>Z6HwnoE2R@HKP$J)HN`pScq(Pj8U1Aa#19v6{48yu(mx&#o?P+R2OmV)#b@Mc z?(UUBgwvTEw@5=1Iyl-iK#o;;I~r)HLp~#h)TScH;PHI^!g-gt&`*PNza%;eUyn2PPo}}T z`6X_9#M0`tl;o+gxM|0dX^+hY4kSxDrk=(+X_&dLAo14-6X-+O*L0GX4EziGD5~O3 z@Y@Dk-e4x;3#6E$zrs*-lkv}L#$+Ez`!m=QO32yDN4^}L|2d>7S&y{0JOTt)U`22s6YN=!?(AbPQmZlE znk9>N1$Au8X9&$Y5Dv2}XhFYc1#G2QcA#_)r&QobvXNtn2lNQbmNpNR+tTo|ylgny-}-(ur-(;PdUwDP=T;D+@@_S_nWJ(>39D`> zSHWk{(vKeaG-COst(Wm{QtvI>?oWB{kRV{`2}wT&S;xEI2_#h^!aa#R$;Y$)%A3|Q zhy#N|_OYk~2xB7)BPcUxs3`35P9REE7*U7=$g-jEW+oATq8CmSY?}`}yTjAJ?8aeO z*y#Hv8zf-77{cdP;ku^G{AsILCDXAx3=3@%ZP5^HZ-u&w+ns;ZsvLxK8+Xh857m~5Q!##zh5z)dUu$xp;xI}gPCGR-uKeGr6Eh>S zcKRS&H(KD)e%{Q^c8qtpA8=|JbJZ8dtmTau?uGrayzns-U|+{vPpLd-u|WE=6O9&Ti>|B~8YPaeOSs!5Zwh`|l+7zJiczov1bU8ZFBh?K%9A zWipBe8y27U$vS7XEyIbq=U9b<`hOUrCDyDAu2{o;S@PQY;_4jZd z58;lLp*rlW7gt6g?ji(pF#U0H2lWAuom#Nlq>&X4elZ#++k}3+JuuE*SzJp1oq2ZE zkfc!duJU(f6a>~1D%3I~oMuKFC45<5gmwY2xEwhG>sf9XjB&oLA`WfHLD#b9y@^40 zOLpTK?X&LYcw3>~q6D?ji>0d?@=f*bSxB_Me)4GJXiM2;Igd**o=8wGW4X&Uko^-L z1u0_ac48+~d@i|}m~xMac*SmJ7}wuKpjUGK$>~%1xbZTCotcxxeqW-M+N(Tb_Uh&ldU#l^?79enNPS9Z?W)>dibZbX*ckX%_H0qV{mAAexYhkFoLfr@@5&Q=2m} z{wL7?TtO}7|56{_%E%iU1-F9N&JnrQUV>7a-P_mQz4$7nwILCLMg@E>#e@q_&|+rg z?6~$$Y{~&~&l#S(0C}{cd|Yq+TC)e}9J90LE>>6|(Ja9v6riv6--YJHCL) z2PVqM$mad+S50gR3Ga0n(Al1nd`MP`kAK+j%P3v}Af9@jeFsJ_8MG$8Igfsg-jap^ zz^nzBpncB|FvJ)}7e{}hI6Gn+s+d&t}9xZ$2X zA5dh@6_Or^7hsi?yGIgLAe!88Fq|~jqWQfPnXqB6Zek(m7u$$wbD!p|EBM( zMC$U#f^pY(Z#oA={{x2bkKCjFkKFTm!bV_t*HJ7pa{&|CXU$#v2}U2OIkSC*7~VO{ zKee?Q&Nhdq#UjUxoq0KH^Y2%G(Ss(_#`cPFen*U*)PBj1qOvv3#O87gb8s<8OwQn4 zGXn5x7eac|e=y@d6&wT`O!P}0y(#G)!zeW`$nG)TRDoG`+`gur#iPYL%LaPCa#;_$ z$Dx91$RZlM^LcO-)LGSKGV6{KJv30-^MBJ*cI%dMF4&Qh_6RG8hfwV z)B^wH!G*{3-k0*5@GM(%DdmP3qq!+4_4fo1xxTt6q;>E04$ zwGf4jDmo~v`e3JJwmh_?Hc{bN7L9>&JH7S0&e;XOTn4E9rk~F^M&7Y1VWD!`M~tbP zK4ZPO-4j9hCSNOr;vz+TilA-VMv`mh|5#E0OZj;E8_ioC=m!+E? zU7IcRBXhPPsDj6OoeEuc$2~t((Ry>^kYETR?X~6G36bf8cc=;2dGs}(W^GF|wxaiwN&xp&9672&->e0Y6X8F?` z(2GE|Lo>^6(4VcxQsyP%cTXdR5-TbdY6SC0|KVQD#sf%ZapwWt}S5x9^tfNp0*cs^Gx_1-+{a zyt`aPjoyI7DE5w?k$#g4)!T5$|+n%su! zP9wlF@Dm7yPv6+|L=`ioTi;%+-O5IkH!23_tKgy_WI9v)mS%aW*E7WCZ#wZ%YtLWX zhaM69JzURyiN6bl0h<@EoZKYilHDafN>U%F=_MWN36mpKt^^uKc6@T;Z)y9VJ1fL; z&*naeOoqL>iuRg@_1=HE-EeAW9eBR4oTAi>&?i!vKKdovBY`LNY+}IIjw`oXsEKqW z@?c0b@u))m(5!N&@NUoS78hB{YM^&Zef9D}IhV+#?qLhE4UD$GiLXOn+M?zp107T^ zh1}m-WBFh;!fWg2=eeruZn+{o4@;Izhb0!|ZaQQZWR~L#i1p}hHlv6&oq0v32BmR9 zMy>U5LHvayc;)Y~FahDEL9-Nb>VaG;|5@tE8Iq7pxbA&8*>J5C)022kkAfhKd+ zWb%WZk0WIj41Wlio|GEGSu}0i}mO=@x;niPdTdi6%JnmcdG@D=J*+Wop!p z{oJTZ9x9t`dFF#P*F9*m3aYH*x9-2V)deuC7osXG>N?K%wD144sz#y8o}KYQg*APz zqXkl0pW+J*ihLM{poj>LgCyO0RmomkeoCthchAra0&x83#C`aLmrwI0C*X@m(owHAmk`X8*Y;5b`SyUVMqVr+re-1m0v;GmN zg?H}V=>%wgZV{KOE7bNz?;q`wW&=9k0iFpNOTtWz(~?<>9GttveI*WCo3%@aZmum` zavwLoP<)C>m7Fu>%H<&$>6@{+UD0>hjQ*89q?l={X1bZ8W#Zx&d#ZA>V|;4sr>2Bd z)8bi~*RT8|+ghj3xajpaptXMJz{XIm)p%xOs9feuMXeEJc4)upUzv7yf$nU@{)=&) z`rFqx=y@D^rGqM&2ab#N`y_8pYuyUhn?_PIW0y?mu*^h9=HynIs_|LA+?9E{S`QY< zmku!x>{lt@z&M8Ywenxj$!QH}SYB;LFYtPD0%&`<@Nun&%TlU3kCu7!UQr^pW5G=} zMuE8A3l%O+$>$?Y(r)u$ovwYIoYz*nISO?z$fnb^YPySI&+gYX6B$H2hna~g>eOeR zsc<-+ZkEue_WO-*%Q`mLA!kWz`ff8gpN)KY*Yl-muD$ukcx5t)>Dv{=mIPiWsFe;F zurLVgrykU)hh82bj4UkH+0mX`SxRWDtq3vF@Sb+MCe*~LCvW){1k4Cf;NSdbmXfmQ zpNuwa_qc%_INg6n39D@0LSZ7zWUwxAK~<@;PKfa&*(Oa@0k<$P&v>mW-J2?TSmm0% z159w{>nXXtCJY#lL1_}nxN=E{9)j-de>HCa_93Vq3kc11HsQwrVLX8CSa#)eL?nTf zSm?MC0~u)6Ewdo-_paC_2hvh7=I{>~B);}X);9?JquvkXhDfmYe}&0g$Whk{*NMj+ zQ9tucjOGcQk2Oo=sLKYRaxI2MjmBAsww@^ z=C(6G#}BeRJQrOsK>Y-VZk?+x%c>~fh*Mh1ZODq#ktI$-63Bq{0`%>~2Cj(t@pZ(f zlwC)qgsLLZGUm>o1>E!gO`hP{sNDa2M$o^j&-y5Gby;Dd5okuU7VI5AI!JqCrZb zj8dzJ*BTSo4}BL~J@#KNzBH-Cw?rl0_6w*Q9Oo;aitZSR%7}f5YT@)wz17*aOfpGiRsPVSRA||&K6j_Bqe~}{)Vc3!M*7WgU;ViVy1>fj&2>;~t0A@f z=|jr!weSCaV$^Q~Kt1z~Cg%x*+-H;8DgQCoaMR-H24)t(f_o`wdhe5w%0Vnv=D4fyBNk9{2pE*olsP{$?X%f3 zaVaq$Tzw%|o9rlr-na$1zEK_#;`S~VwOoEU{tw>E8h+L{)XE=!FVHnWeoeyx?e|)m z<*Bi)JynE=lj#uhsp}_L^hG|GO9J2CVvCv1n-3_`R5rDC+uM)U*PjmyI?Ra9!1t5W zeScPWttWJPsKDk0^*yT$ux?XJJ`=Z6e7M{AsXX$dIw&9rzqZrifV#50oM}`VJnvOC zPQGPhYht5sTK#!}J&tV*9oCxgYdFTC1vRMcs0M z2!{C6OPFGoWPyc_&m6tIz|!m9_&V_83~>JLeaz;_f}CdVrqzYlmTmaaf`N@u^PFK6 zR6bP7Xw}p}u=M5K%Qbf%z{W`F#LlwK`HqwtK)Xz8=K;gBNy{Ci@2j(F}ETlIes&&^UStYANcxD!m%h z`OZN9z3+yAKXm50y1Lf?aPe0OBULRe3yO-0m8b~5*X|^fSim*N2WJVzYFYL|8R#qD-y!J}>QVgFiK-+Q3 z)#gORfd0AV;7hfMwgzh9QDW5M+|V8z9(I!emf-K&%_+$i!cZQgCJ#?A>N9F!VbK&z z{llRk3c0w>?Ai3Fo{jpKI~aM*8F>1J&;2ehT}QPr2#vDUX4+5~esI+ttT^s>Q|VpZ zHCk^-dz$}M?&94dueRCWe$9+)(;MoY4=dl03+SRrE18d$a9DCutJ~;Qe`FnCNX0-=+s?# z<`Z!FNpj&W1F%@qjLobarhWG_M^Q^-(Y$1Q9?^6JZpygndNl9IF0-vAl|4JG&LpT( ztI5*W-lj#=)T{9Y`Cq~oqb>xRKH7n^xoesQvm2bwO9L`JRYEWJb1W||cKUg?$H-EG zX0%snN+7cn8kS&`SczK6-AzZO;P`DlgXc%IZ-rslM1x?oK({KB%)`;NQtmOZyj6KS zXq)f@H>a4Vb5o!q8L^crL+rt4tNCdB(R!U@EcnxrDUpZm+3Bg_VMQ_5jfj63X4M>V zyZ7(k&*#hH(4cI>p8K=xpT6xKJkM%v6&3T?P2kqAg`z+)Rw%iDzQIN}=D?po=3#|C=%VgBh^_>~pLpG+N9h3a+Yp zVoDtY?#Ynv;5aAuO070WqYesaZ9V+B1w47&RwB%`(9AJ3ELB>Ioo#BXXR_DKZ7Stb zHqMdx$Y0C2tu$ZaFw_@_oFbiQr5x4}^JOr5tpWf9sHg8$roXMrNfY#)zw`Okx?wZN zlm|NZ={S-!wsGIlh9B^18%p1C9q^07sxju5+yXGhRlBbENaX0q*;el> z?7+aE{F8KR+(HmTGZz*-T{qT6s13@&iriYOX1JLlp!rwSli{&OEv(WQ{dhQ&d8>n2 zHaFJCj+>7~*&J2PJSbi!eD22U#OXbnHD1f!=X)(6-5*-S(-D>Bj{2@x0a2rQbDYo} z&d@OT2ThA)=&4=9nN!bJ0FB7Y2~khLlVSvU*RziVAH^;V?DlUk3h;UNz7rS52Wn+_ zyq4w+1YW)J8=+<5Al0XsgHGM-;}m_)weRyiHq6%MwL3MGePA@6K``9S>kN#CHF{-a<>DaKh(r zehm)~JEB~sO}0}n5N+NrZ1Pa2(X`iS?JOf_tF5E8*EIj}>JR1oH#sE#d}TFU-Z!v? zUU<9xEg`pybUvfOG)&~elkjp$2~~RW4e2%aDDChS4NjQ4cmfN>)8))=}lBGFY7ix4|j5YfT^h38E5(YNP zG!v~QSAKhe0qnWe?@99CSPHyWsbnoy$+-a1*H!;E@cb=jl_jplkcV*K{rPB#Oj#8i zxs4qHFWUME5pjydcdA2G?iIuNFJ51Y<;(?N80BsX1jfNO{R4^YrfpchTv6}D!Zt;= zR34{%toA>&!<>pJeK@e>xJ^u*f`cJFf{vASiLBI){pSI9W?i;c%<@}U(HYYYlRu6o z+l9l$jwh%5G9QDIJARkoi@E(AZ+$XU_Z+s32oKqnzIW%p(aAL(x{UrfgZhctHRqo_ zca7y`iRsFkMQWYDnRb5=m}DnU+vcn}%#)ps3dI;CJTm5pGGz+&UG{w>bsaLH`>P z|L@$(qk}q^rmDVzG43N0FQF_@;SHYkWF_TN`ES8e9&P3?EJnj5y+x92@q?6YVC3IhTMgX>Ha!_}9-`y@u-A+qp)Z zXam&_CWhv`fyJ;wN%D>#_qNs0?R}+fos9n6L?p|JRdpgOxIG8*bmeXO<=>ZO9(xV~ zQc?T%4SJ?EwUYOcrTtKyWj`xa=|ORbk$*o^QhI3(1Sp`P@{I@FI96Bt>E6P}h_8Q0 zl=^^fBz9+eI3AX0#je&)e`a$$fM!1nEYhMz;}4}5X_|K~trI!Xd8%9A@8wiMxONkK z-<})`kP`rYywj0Wal*)_9;4u@u4m2VX_mm*uT+V4%oN00^YYdJIJ-Hqt9h_m!(lCJ z=R5uPcCQQ?QWKFCf%ut_FTjp7iy~Ci+X+DdGmO4m6g+T<6eL^|q-J$Ujaz(P%LOIB z^D$*HUPZCGDFCck)HtX&5aH*Oes8IYA99z!KJ^O|wWiP#%q6Op2-1rgxcHOjxGNDF zS}lUL&>m+=rETg5)wO>XxRvAO;C=4+dpg2|xs}AO6-H1M{P+-{=Go7kD%+(gurH)o z^RHP6hatfHYDq=hmUS=CyfxlLeCEEACQ9x*J z1;3g=Vg9@B+RGHPQJ}Zm`CdaDS<45AhjvQ!j)olfaW@BCx8kpDay(}#m7Y5N?Ovxx z0vYeK@!xeeBb8C-*{(Du1htTxTNzFnPB16VvHIYHb7a6g=R6HZSB#_ueVzp^16bDb zWgs7ZWGBAD-egBY4uJ;4T#dtirummstHgwX<*&9S!_!R5+VYdp=hI@Tox2$xI_2VT zd1sYwp86@a33qvz+|N_lDi@aQ!>Al+3$13~!E|!7OfqQfb8ly%0&^x(A{h+Bi5V5v z2chCFftU$~8kR=$F!=S9@8t{wxIPray*J2a`b9i&XimpCi*BucUjt`0I#Q~1Acwu- zsO><;zj%Y}-jBKkHI8S@2UHk;%&6G^250V=jbFgo3xq_&3$6FmMM_-N@zI@8*h0>Q z5?5v^*B!+W8m5;v%SPriA-e@gP#Z32hQG32(6Jocyi(RUV?*P3*kVj0x4Rf5Y$`@# zt-E@Zf#cUSYZE29TLH9pwiTbXS=K|$o`@fpjSaYhs|HM3zI!=aOLn~IKfg4H6!iLJ zszBMJ^adwMO@7ygDr8pjl7ww0$EiZMs=j$Oh$gV&$Y}Le7gWw3^U1=3$#)!5Q;}B) z-*G}EN18zEJqc?5jqJIMnLX;`ITB*ROPEU@rNM39CrviFf%Eg#CN!76GeOSiE}{dn zdw;!o%GYiF>y0|{uSEk{ddV_e(5D=EF6wotuRl1u5 zQeu1;?_GTp{aQ(B2?&%wG(D~*rjb_%@Eje49?>YDClB3z0$n4kc?yRY>on2j#{{-G zh@#Q^t;01wkLq&Wo=9rjAg*d_DD#iY0W9cNSX+C`;R%_C-X(A5h|rQ0*Jl1CdU{0& z;wh9HA7y*e?u;AIN?XmsAX-fq*!o_-EM{{+-_e1hrWslm!3n*D#?w{)O)6B`E;Av5 z$Su7#MYakDnBzfQX03nfAs|Lmg!iNd8XYn}G=op7PuVZKrWmr%6-Y?(OsX_A*djkXc+)t3#^F1? zTGg~Ez+<^tc1h94*$&Ua z3ay`y3Eg(-GKI9WnziyE%?4L&g9BPey(UYZ?vsH0f9JfQ5oZMB7-NQH5VT#+MLpU%GwC(6qZdVu=WwZ_5RMi{#{R;TQ#qD)mMc~gzZ=Nd^dm??AX~fqo8q#@ywyRZw zL|m6DNrwIAbDw z*1Nzb^tJ?(=Hd{q=i=w;w6?p0Sb`G~&%_+cpZvBVBE6GH6bZu3+^*zEQ&1&q zzOs$Hekr?DB({mYQs0tM#z*`UW)KSbkPUW12)1v%o%~ru@rrlnGeMOKGM3;-W!Yg5 z40WC6D{;+HD+oyT%?H{hxBZdv-t=nRI*Z(<3e_Q$uUM87X&MndXp+VvJs#OFVPx z*THJCw)6!3;`-bhZ7@c|PdN3t0;0FlCG(y!~fE%FIn&vV#6jN=N zqjkD~FS;RRM{V{fZcQ@1@d>kjG)*xt^o3};;dE}=D-mEAyG; zAhhFB_&dA_r_^(>-b;$^DEig4_pU-VUGJRthHW?)qtpWpbhN5vgAj!2O9F-zmQ<=5 zdC7T7O`sGa&Yu=Iux}$IP#ve}@yyX%SfCj+^%u21B&BuVnhuvVY~F930J2Dzo`ndb z&5BBAqbH*)h2!kNNtUFm(B7BtBnxo@1d)v$z(!BDE380Oj9=Q;Ktfv_Oj^;Gl(-3$ zjLCqA{y%PUT6-Ovm(wTN=e#uPi$uFxczjb5ko_%cu6%~0P)93cvHGr+Tfp-Hj6~4{ z-EpV{fVR7Q6D38o&N}kN)v~vo&DIzS_XVv@H;sX9q4Y+CFneAq426*?QmdU z5r)PUUL;9so2#IQmb~R%W2WmvhEc$lk)j0 z0>ngg)y+p@4u^bps%ZBfDwd^`rj@m<>w^}MdA!M&Y;8Gp!GS1x&ad^RKv|Q8n%{yl z6%@&UaMUaW%Zls|n1xD=OkUq*+tp?}u3u^(%nHyfiLvAbGvvHQHP`eBl2Eajx;MSW z%E7AM;+P8{T^L>eRzx@PB@dA&4Y2^?DdL+(g=^iK?4Myh>@UVmbmoNfC)Ggi)_;}* zp#AF$LoPt!X>LhXu?IY(X!2@5xFM+H4A_L@1pQ?o>JV& zSbf%@wyLplR?vBgs>hu@pwQINb~X@#<3TUznD)=M6>+JVHssUjA+70<<|p9&*7T(_ zkBK`>$AhgaU33H8LtIF2JhIxXme|!oB8OLd66@HcHqHa+sZpyE=$rf-RMT!^v1~9& zp-l2_+F`EM-l(Yquq z(?oUa>s5PKmuz$|Iy@$!14k-#i_?X_06vPEQX=zDBp^F+63dOW07QMkJVjM$J>iIh z9!va3x-N*1WTjmdXR0^+PdFQH@nm?7h_VnPLtlTqmW;~XjR;0U4X?wAAc z9nMj*35g4Z{CYMJ>V;!>66%);7;W9l=^@!yS49C#V&vTo72CEX0#}s%1v(FGPx@I7 z({(=0hl(E5gP71#7oHLbPnickKOkjxJ0bx&B5`yCNfb)Gh|&Lg<}V|tqOqh9V-*fg zR|8-{QuDjH6EY)z^MWNO&gYMima91Ws%Ae%&7GVW9A9>7R<5-eU^aSXELF067bt=d zD-1j#y^}$*?Qv$9w!I-N->3wdE(g9bDkni)I*b=32=5uEmfCg69yas zn7fmQmwDAm+w9qXT>?s>8z5s|EnF0@w_ z%=NQlgY2tjJ|~0k)_1^a=gpW4d`xKd$@vim^oVR<%kRDke14^}cmJQAx>!F^Z}6W_ z%}yIAcsBFNEk0 z+H=O6j41pQ!H4;jP5URUekQ2j)~w>nw7Svz(bUeedZ^)p&=vv^f&frVfu&XnuWJ*r zJfu^JQG?3p8eMqE>%#*~kAo*54WIt1mQ2CQ3h;B-7NU}dpo&H{O|_+fhq9&YhI;#^ z#c2g=>uA8jn~F`)mll92Kk8?W=qWnX!UDiYloCMGD!$zsf(cCoO=DJFt@j59hr7jo zG;e`0Ig?x)WXp1#Q%ys6WAKyw(@V5LLFY2UamMsPROLuBO$|F=cT=isfn9uk-1&*z zCA9UR_$Nt_J8;tK_4*}6mxfanny3W3l4>?RG>V@U*X6VB9lQH#v1sOJ`i2wU#y^{9 zD0r8+KjqHk=0x%$T8R2wIyMb%67@HK1?$PLZh%G9m`_k77{A!(YiTs{*g&L!McDLibFE83a z15|%0{7z6w?Tzq~8sWH|a{mf0*R5bJZoOkX>tKXJUn z4*~72wXd}s_Oz3L5)qot?PcOh#M3#|TaqCNXYRYiF&yC6&vxxdT5(j4ls~~64NyTc zR@0#Z;F26g0-)33i$6{}@PbuLWzH^sot2oqccFmLv3l*oz6Uj18*lYb~*Yl{* zYilXo3!O^r+582{^j%3{?*II5U}zDRd;);XlfBUy7l_0$Y3uAqu^~7L3op$NmyMWM z(suwtkvZBTOogAuA%P3$ZA|8`^e#{x+J|~ww>sB^xBGfs_ChN@mJ50+&ds~(iCOq~ z?85&h(rbe$7)DSPzN0C5214Zd@iO9HL$0v{3Op-K$)1Vs%%K#b=HGHZ5d2r3O-Z#C zH8Fyt{5TjMT7Dhc)=EJLne~T(|LFw$cW6*xTZ3{l^{3uA87t4q{CogER-3eiSxV(M zZFwaYH;Hh-I5YNg0+TGdk5bCa^!uUnR-43AxwOfxF$&WsBjsh_ zxmWH19G=E`e*sGwl{eEorV1tcKVuN{BIDE1mxf!EtuETJx-6*XzNQ5JitfFB;tO?wUc>i z@;lHD&Kz(0?ohAGe~I z*T*MhuXk*5zTI(xCIjAm?f^?A6NcnCIP=tsb?MkTEqHTgHZ`Lt=WzyTy6spYQ>LK( zGyS~8OC6)eb`D|mc#;gYyVhk__wRvM2YtFx-###r?6j-f1qW6x`4J7(eP}qtv%{h4 zCD+Bl6bZLH4sYf-(t*ktg)EsbNegY;yGa*Q7J!pE7NTW!C zJQCtKG6>O=KUMj}+flK1@Fdc?a2X$qP3kF=HSK2qPHBM$<>&8bdnzIO?lG(=9OfB4SL}s_qoq}o`cfBV(Ib_-jK6sR!@|?{^t8+#Yod%B(Jr;H)w5(lqUOb` zNAE9$=#}i4J2F}E6J7ci2#`@WT9YwWOyHVBA5&ogT!`WN=7~QH`NQjPqj;uVZgsXM z$>oh<9EMAGo6hAFr_Ov8B_QfXDD3z5u%0$FBZje>CZ~|~f)+@%1#dk=aMw%WMWWY& zXB4`NN}~fg!bP1oQmz1JeEDlq^Rw^rO0mnEjwbD%L8x_S=GgOV;0ytt@^ZHOLg@5Xmrhb3{9B-8~4IJz1t)R z?dmVrIh$nmQt5$!Ep9kAy2Fbi7QLop`8P(yc7RtF>3}qHQ}F|Csq}!B$Nn&;}P6 zp2`nU?xLBh`zIXSZ8AK5PCPA@IG>blp|mmT3cBxk$iJ^ndVf3v!zJuPihJGr56W2GSQ^8fCR5!^h6Px*k*qT^jx-by@~GGmZfp_P!AA2c ziFl4sb$DC(kM|DA6<%f8qK0UKieDRKaJ!a%8TxQ-e%zt16rQU*X?oSm7tiVXB z;lTXaSj+XfsLzV7wfdoPOWm+zD?_CBr}y8{A`o-xWVn6v?bO#GTMg5@^fBM&H^N_z zy9yRMgIqL1USc&$6I>x}{|weB9tk}f*b6lX2Mgt|M-lzvjw&c}ZCk+swclwJIrZo? zJ?HyBgq?LzRDb;Lm!&}oDWwtV5|wUHknV0lnq_I}1wm9&x)0`^TBtb3XCBpXc?A(T|ROAqjiwx;NNaG9GpduMbK+{uL;|o7WV` z*&3aqf3k{?RBp$?@?v|v3hfjf#?lz-bY_LtCSnH#S0!hcRvQPuTD~;IXEgAGY^;o9 z`~f{p`W^Q1$8K~L`P_Msp3@?9EFnkL_pKAZ1?u)vcRE_4xZWkRQ^+BkONQ~xoguo| z3h`&(D!YmysIl8EScG8Wd`3!lmf@~!@KfZ#ub+A(v^oTH$sf^E!Ubkm#$giZ-WOa} zZ4{(LN0kc%s9annCwhw={woja!|xI;Skmo2L9S5D(t{wT<*Fck2NBS7nVek=md0Uj zEDw;Uv1@wkxluvusdYhA5C27L5LXRp9tHIa8Y;S6uz96L3=dK@ za!93Ti*4B@O#1~}M~=`^<%3(!_ibmr8-gF7e{5U}olJ3gTE318Yniz-u?zscIuB@{ z?ZG^~B2DsOBdP=%t+F5UR>tU0;V8_t^AES;(fDRGwGwSLgH+*cNR_L+OSa7!f0J)P zzAd!2CLg1U+6jz5NE~W0JvI-j3W3H?HH01akV&3y-cR?-V_uw!({~&p(q3$0f;PXu z`Jom`+MyaP8Kz?rbB!94DhQ^TS7oa&1kjcXRV})fH1#obCwauq?a|_qPO`5Xx-f)bM|2UDGr5f>wKYS&mGBv4wmEm>#@;Hc zom9fnouGx@@YIL=tb20k#Ca_!B~ZJ*Xg&m+aBsUPn;*SB!=K22X6W}=bq~W z7LK=tz=h*^tXnk)R&8Jxeb9}oc8VefLNiYu+m3CeP$R+jO>deB1fbOuL^p>KQq`U> zF{VGa4C&iu{Y(|K@a}3hQ0o8Plm4%qgGaX`*jPF7Dx@eP;tNZ5}5sp?N^Qc ztXNKjblduM(mtp&)<#ntpTVNe5O3D0$I5~oA7&V1Vi--JexO168RP*{!2Z}nz?3gSJ>o}nX<6Fe6j8NC(?)aU6IN!jNa4p9$ z$0FR=H@r1|E*$mR^*$~@vRER*eOFIULHbq0a$9fhmC@^qb{65(B=BS#*!reV zU1>I+hwao9HI7X9aq#?P^%Y1PB%{{mW=1pWJ=8m88B|M6YjK`C7t^Nw=ZG zrht;VsQOSki|}NG|wLK#T{*bKxtg-nj0!z zil?EbUUlR*FKp9a){wWW2JU943YEP*RbctqSLl`fj++SMLI)yJU7Qfe?y>4?ad_ih zTJhQT!=wH3Z4fU8W7L`BUS8uPX#}0HU@A#j`-MFh4L&zTsOW$=NrGSYVNB4SYO28K zjpKm~U(YHh<2WVb46`uO>(B(=gJM}Jlt8MDZL($a5&L!H7oLBQVg9svME6DRF(@eM zx|ewBWY1`}2tty>^U3JJQ21V%Ao6UG zj$@@-J3eD%9+bR8)skkzf!t&eB-B5ddq+52I`}ITsdF!I&Txa@5hXH#`JwoLGOT#x z{<(|S8v4e3$^kN!on0q{zv z6uG9s8i#gaXuuoYq`j~bv~+Y5f+4=q007FV9S}NZv)#-t zP}Y(S@3hD;hu5fb5Eay^sYbdJQhzQuIJaBz#9YbiEsKZf*hEH z4Sh@Z4BGZ-fBJlXbDJ3Y%B0Ke?Hu9=YDc0kh`tTXa{H9q_NnHL#G$_9A)c0bpoVpwb>I%SwRXh@SQ}T!W0vdrPLi zN|a%pBmFhI?$bvd`-j0M{+-(SP?uj4`ZSAaha`|1rh($S?jearz@?lB;qW&cx zc)tx9!HEo-cK2tS2SH+!e8zody2?&MpVpYyZ4KYQXsVaQ)Cqx7Bx z?Hug2@#pDyF$M&`$69U(G*miNiG;S#r~ugy7ZLUT0P&;U)GiO7gQKIwr&La@qn(uA zjq79&be+AZd}n@xNaVqyzYUh?SP$9z%;iW~S?&kRp#!m`5$;sUQcge5GTn|q$N!+| z_|Vkx@WdW!a7%YS*VWDgClZdWY$g4GHJT=!j*axdYw71lQ+J^7d*DJHCJc^^yYA*aG056J&7;VGD#{2!aUvF*fpU!x*ITx9RGP9wKvg{p_UTB~hm% zNZK3I-V+bew~Br-+{DzeaB%azyGNrc>*pe<@eP+N2N;>eQ(1!#8_K>brWlFM5rd^4 z6a7&l^Iudn8j9F+LJM|%X-K*K`}J}-1$O-c6A7~iLkMV^;y2B6HqD)mN_^pfyFN3 z3)iXTz?G5TW`Ppu-N&eu49Bi9Rn(K$zxx7VCc#Y)pvuWrBW_g6BX5L}S(kKxP8yCI z_$-`;Ngy?bLs}VUoHM^X>*xFW_oPg6o0{el%L;XEM%ELq-BCEgUTlL-SPe&kpDC`eer8=kcM|eprTp)v^{(9c5{%S?dpGTm8JDlv_)HA*%~q zKuP0iP8dJGsd}gyk;p6`iFY2qVe$ZB&*YN@!jGt71HCQ+F@-q0$&=R#E+_rkO%45? zE?JWEtAi(n%V2iuX{B`kwCP?=XoOnsmf#x60B-F1acS;8_qeJpLq5me%(L@U%3pBy za((xRKYiEK4oz}I^p(W1jv^6Pq=+ffw2G`>H=4|D&g8kIaK7md4+K0Sk)bGdUW0i> zUgY2prw{RF5-i;;#5n|#iDBWZn74hph9c>1IP1oaHaSk{+o4g8gKL)xwoP`dAo} zB6TDB5NYt70sk?r&XkGfjVlU@X%e^70G>@GtUY_*no@s$4*kv*_e>56;C>g9WQj@1if|$ zb=eFiGDSg|s=(|V6S*F_(@n3}%n;)#JeP#|zzg)%3$y3b_-3EoGgzIMoPayM0t0WB zpG$8px|JLnZ#K3n_%)zx|Av*Qam!V zv6MlHJV6MK@-=-tp(k_@z8Zz4Jo}Zx!(^dHX_^*0ZoxF9XR?o{l0lGm3Cg)M5Y`v4ER%PcX*{nt-TLrV9iKjU*qP2($tl z5bA-DP$wQ-gsSy&Q}%7XE)>y+Dv_92S~Dae;eIw*4$A(!Pl>oyF+yR?fYB_c`I8l z7_TN^Whm*c>AlvX=kR$2wUP8TTs3+~wS;tIno=Elv96p3RLyt&FuI61P5bKZ%ZLrP zdA4#YE-oybfa7w|#iuQ`$o$@tAH*YtSbJp-md@jGXheoxDLjh!AW79`WM6c-MBn|= zmEz=zsK*;CR((U+__pigklkHAEd=;=Mf{Mf6V8}n4!$tVhU%&SH za6QO!v@?l;UMTh|^Y%bs(kc97Em};r+4?%|%6a0$f-E#d0W+(Q@zO-Xs2FAi>Dr9Hlq_8#+z}P?=FSv3 z){Cs2;~1Vw_y>jJArS2H-Uh7?2>wgKq+hNm-RbezxScPQ{@BZCdLWAyIyc5K z;jXVhnChRcFaZFFV_lna^8;+mbvNVw5O`;8-?zNv&N_}sV?PsK->9CNyqV>{IA}8xtaZ#!3yG05v8mg~GU;1oWuxErz>{Tf%}LJy_MBA*%b!6U zOfY4>(nk9I_Zl#kB45@cy-_JF;{~;WB z2LzGm7$Ys7oLqQyvIQQ{%y$7)Y7EF=I2yD#x98_g54CCuytfvqEdErzoFS{UgpEKr zt;jRL!)KKdz%w}Vt#}0xAQY>fK_>!(NP*oQkQhl3+gWHM?XBJ!0M}pN74BG^J>kl8yci~w-sCJY$iU}p6P zUlGcNqZzI9vg)u+uCJ9hU?}c%VG%xrw!r=LpXax9ep~h+`hSjfup=d?vlW{jETIOwvX#zmK91Kh(VR*b*7{&R^NO zih`s6iX~!@7wzx(kmZk`q`fbSo?OU^^3+oeK>YwCXBVD>ge=@1AvjuPbxH}~+9rFF7E;RN zKBTt(XRlptsb^3VPy6V6)HVzjV>5`9jQaR?m=-1V=G!U>LhG@jNH7*d{Rf(Z0I$U1nCWXBb=+qJ(;ImS?%Lb@o!cbo@aVqutr-0X(1eNk6G- z$gdLQIm&HtWun6z!<6$=u@YQdzn^>)$4lwRHQYj_^+Pct3R86hj-*A<$Syz z?YMHS(#?=%xzk;b;#;u$TaiLogSEn{$zP2l>>=G6sg4*~1fGo_NY#Y?RZrag>1AQc z?4L3-4~eR}gCnzm@yHd)cj%*04K;z_#P^O4d#WaeVxYQfS3~UZ1@94JjuHuE574gX zP2l{%S?g`ztV`IZNi%2a^x;s~77YDw9do#$Y5sfm(x49YL-oTopmab!d|{wND$Fdh zL$$J8>`7yT9bE^Tb4>NAEcH&-Rf%{Aa}X}#mJ`&-{UfXCxxGnS6lRAFjK&DBv^B{K z@n(fmPKnba=hZFAb2OSzSCGDU7MP9HMCC*hfg0Zi7H$9)y{AZ`@D zp2@D6pvqoJsDZMDAOWpbab7f1HW!P_A01& zI=KUp&X#q&<6`2pdaD?wGF2y*tWgj$I*+-3=K_vq&*Qu1t9;)^n6eKl=jcZ1nB<#V zjl4juqF?2zOR+}UU;gw3ap$tO>yyh`HT)GB#IfNhF9&`9ThP`nkI_w0sOc^YKR0)H zU7+Y5Zqj^ZHhEHYJ5e8Q`r)oG$H~|0)l$0MCM^gqTN9m@V#}N|(-oXLpRMKhw&52= zk?(rSRGYIX7HA{sVWq&S-<1%|JZHW!5Lhpm9AOU$pK7QL4rwp;P%uj>Lu zoq$~HLLkF2Te#dn9T+-YW$tnhbT!!zB=OX3PZWjE0O<>|yPsb$g`%SW6DqI&9atH- zG!F>V^a&%3>Lo`Pi@_5>R1+uJKo@H zJ9cc^TK*J-BmM3+1}@3J-apvAuuZ5E{U~`uT0eQ`#J84mC011h#s;Pmt}uVsY<_#5 z#was&jLfWyfK*d&_u$2D*-LiRs^DuiVcJg#WnlprPcRz2XJeEtg{p8%%USZXk(@tV z@Aekuw_bNl&$OA*Qf^|ulSQg;<<;P$XntiuZ1*k?#o`U5-XJxj^=jD$I!;tU{TeSyJT4U6%~CwTS)QSzd$YXQjRlIZ?%YE4JA3hi_3808k17z7-g3EB5V2FZO49Q6h-`f{ z&>Q^0;lv(Y=J(Lsl&=?eDHE>^eXh2LkbEIZ8qh-wmx=D%ERuV$k?Q<{k%bd{+|oV=7A ztq_ymb0q$AfyGS_$%D%)Aur|iD6l@1Ki+*G1IreHzV6=!vvK&A6naeHp2Mwx1VM6|#Fy|{x z8jWutRv&rDi6*FwwsB_AW<>IeFBXY8TM~rGkz3iv8g-%?Rbi~6VuE15;7TFna{VNE zRQ956lLjx<$T}XcWG>T{(wd5P^86fTZ&0yJP#n-#D z!e2tgh%i3yFg)H{_wglC_@k9I2z9`t8(T$qP{OY;mH0(E;;%z$t-}-6-6f| z2onsGqjb`ilxc%I&dl=#T@FCoZqWqt;39S`tH z9JP|}pHRW^IDQu<)4G76u46T^s?(?AfgbF8hB32iV#MB0(hqIOkS^5G4sVL@`+sP$lFLXhglU__n%Pm)+IwD=z0V^ zzJ}eJfxcpdiVcq7l8+_JQf^1U_}um%{8D7bjfVXJ(`tfkc7!ZPB?B7_Q@CY2L(z=>C z24+|@yl$FU^2G+>!TO6Hc0nCgag#Xr@_@HOok?(G-xpP&w#yh`y@*7UGe=rJ;jK*_jA8f_x%1J?vwltD zZ||CLPc+zJe!OZog_#%xgEP#gJsi|VKQX~`+EYbL8MkRVt)+-zKG}61TonrTEW>y! z1~VPM^xHeq3r$yI;!Q3(C*lIrsv($`eOF*kL)V$1CK)k~mE;za^ct^HV`CdJP3ux3 zLJ*G0Q?m}o=)AV;c2inubIB{t^*tpEq-H0lAPD{mQ`4onh7obh3G*gQ(T=%Q5R z8K;L#oS=+}$&J}oaZ+=;`qc%&Uv=w7r2x(|XR>fJp6pBHC3_@joT@hWZtDGhn^`GbN z;05U$g;K$^q+d)`B@7t0ON7{gE#|dVbLopzQs2210X5k?H*&-UUzWd!_9kD^1VsSB zZE{*Spb_GsWsFf#cIE%D0N~!~(rGYbmAoM7r*=i7eLw<hD@LhJc2dwI4u_j0W` z;(c$+)!u*c++kd!T7WnR*D2w173Hy($CWWz(Rs|ifBEPvEg2>k+?B$IzxDl1R>QDL7(F%*;c%;HkT}3rAq%+ z4CAGRk!_t(fo$8cEHHMiBCSTmkpQB96ZbRlvn!pCiEH1}MZZX_!qnXk*dHwa*gEy@ zuz3P2LugY*r-Fabgd5}Ui-$c8xF4Flb?Yg{V)T~q2Lsr#JpYALhiE*ZfiDtvaKA?j3yMm{KshqSLbkOEA`UFgp&db+$ z)A84Hc~FH+PmC-)QI5Muvcuf}sX^&zw?3J2-Co$4lr z6U$v_uKhS>!G*VzXBGsqA_98|VNR_FrhjH-9%mBu#;93;y8!MGmlu{58G>9JeS>$= zv45ze$7{TB?x22NPUl{wT;-IZ4C>5#*kI*Zk@@a{<6MSjbjt!%3y$MOC$43i4pJ?X zEVX11M2BUhx-9w<{;`=Om0pm~q6rg75OWb;lSsP7pm>MIvm{IYljKi9l|!41?Of}` z9sT16gs)z*)x9gjsJ(Q0>CU*oxj;Q6AuROQo~ZBw0Vq>Icw7{-?h~1k8Zra3@b4Qe*om;T)4UUHQs^Nqzwn4C#(++C z=QiiFbMmPU=Rx`^l zY>~V;Gwj-399Bnd9_qJtt)@~o#vPEe*E*DvZ`9XRr~9o#DNwYL@aVrW57Jh^r!Szl9yGg&C@z8Q28h?MJf@;s6IuF^Q1?}Z{JO8)$og)5 ziqgEVUihT&OZhedZ&8*?O{!2f&7lM#zVJ zZM-HB66Q=}_*lxfSTIzLyN7O)U~||4`A+Tw2hx-@)`rEBIhm)b;a!JJI(iO+vUo7w z7Ox5K^DKzOP$A7}#fUf7`B}tC9)<;h;cVLN^i#&qD3|R^30Z?SG$f4fry*OoF-GpL z67(ZSeijgrwkR*`EXRyh&Asl?*!<8jBQx$&a9;q*;-owi^XhA;Y;`l3#C?RJiKXMf zZbi6yJH7f>1m<|aUYW@oirDSFZ8)u?KrOvNbVcn^Oh3VUPNFgO#M=b7Q>^V+JeMj+ z7vc<4kSkgl#CYw2`&H2#H8KsCi#}5m^CC~uV4{g3U2=H|k~`y2<%KCQarf3N>+`B* zE9Hy{B13kAWprW1;wG8|p2DvF!Je}2Ve9b=hq5OxUZTcb(BHdkJZ2R>w_}FhPb?XQ zg)Z+c5L@7YzICZug5Bq=R2cQ3UW69Pelw^})1a6Beq~svr## zv~aa12!%(~ZkE8NZ;lI#_mGrst4Q#pWG_UogN-Ss`KeeFB6{3t9mF^O#z}4r*4@^t zH#_0{ws)zgr7xndcYQXJ*nUQCjtiLy*T{#EPs!i(WgLwvmR>~E9Z-Vff`=xv|r4_FG!YxT}F z7Sy9=eRZ$!y-Q)Wj4Sq?A-vc<&PDCE7!%H*_&lG==0jS1kAXhcTk;oPPh%k`j3V^aozVPE!(V%%U+ITB3+q%0#5^SHZ9mTDnZrs@7p}Uap)d=k+{oQngFaV z>`U!sSB}qxZQI^&{OWj25TknK20(i~NzOw1%|Lq!TGz2JH(*8bYKytYwg~RBEQvt= zb2AT*F%`we0blW(c49dR0&|W{^haVzJALy2&x>#t*ZXm<%L+$DKZ&USDql%924s8l z{i9a918TX3l-#D>qr0$SBZH|xSzYcchmo9dE#B&_0GV27F8DSJzn)z@%S6nmWxpRB zM8&9Rl27FnzwZZQ^MhJ*)SDZu{7s|m6XTG8%2lf}(RgDL*UxBkt;mUWL}a}1#WzbJ z%R0iV-g!REB*Xo)1;RZ@D9he$h0mL8;8G`-=PME-7gIw;GGN1Re%&=>j`Ji3Da)=z z6fmajr$42Wt_xPdn(R-J+Gr`Pxd>W|1dD}rIW6DKgC1dRVwJkVj3f+44~YFq4aRaw zV%eZdbh3OhDGjiS%KUF+iq-G=-s4GozW&h$%V_||fy&yYj4fmC^hKC3>zw=d#2twb z$=G8n9rPJ!E!_6lP4YY>V z%b5f8rQqHfveeUB4YFB(`8V_HI%1>?Xzl0qxd$;`>HYpqH2%3 z&VH}5>YLhgoX~!*aQm9Nj8ZJqw~-RR`d*9OT7QuHN;~KZ<9&e@?yHz<_D{0xn-h}t zz4K~+>sC^--oRB?_19WvUJl=5d~gOGm)>K-^1;Cl={-jz)i3P24T;)+ZgIkldR}@b z-Oi}-XI()^O;DfrNvR!M)Lt;u+h(@S)TJ6bZ@ULIt&J*%{~eIGXoQtAItpb4e}idZ zG|INQ=p@Y-^^pe|znW1}Ru2B*=L7@<(LFRSp#jtMA676dc1}T~<`8O zSkc1KTCd#JT7&b#r&i=V+_ATA!2OE_0b7YEaKk0b$r?We_*syaqn?#e?Yci>I&?=_ z{}~B1369_ZS_|G0+!7-gsHD?(9F(aT;p_~=VZvhgpM?o%4Z_pyxL{odzlXH2UhK2P z^>7D5@?n(sYfP==k2L+%-jfk%#1?`MbAKHnI;u{o%XZSpN!~_>_kUYcj9BU~(-&YY zG}>@At)vVlmEoT%XbBN9s*lM&c`Sw!DZJA5ESa3leHBIg{SKQXVW2ZmlJG7JL?#pO zENm$V#$t^cYD^84=I-8LVP}IrL+FN+sT?{8Fg;AJENLj&i}0jbeMt;5B3x9K{x_!o$;p4}vs}iuEp_D$KuG+EL5gvFOiQbZV*~C}VoF?R4D$P@mwZS^ z@!nEYOXZ9!7L<69}nJ=*;7;!i8GD0xtKB^z8!gQQRzsb9- zwpthfpf0@g3!u{v=Q$wCN@?5uD`m()*HjtZO6dq*&C}C^lmPd@UG`SgDr^H|EwL~EzJL} zD;|24M8p`KPAd~O78Pe6XuAWmJw>qQ)$UA9zEOSILX*hDA|7%KMo~BUhjO zqnktUd z2W4($HRfRKFgg>p;+{UM6DM?}>_P~Q%QiI{YQ$k$IH zrp7eur|H0ZAAW=tSiCIy0;IT>YF9L_D7IZGh?Ds$JAto3ELtjAr|me@O))RvKJJ$2 z%*}jb+srBZE15sxZGoieObf8r^hfv<{>8Pnhh~%#2?#JC*N*dw>Bdw^R_Gm^4mo{TR!Uc>GLmD54FzE#&dnxQ3?7Z{%Vj?aS>77@Rj0y z4s+U<0j$dGjcFbsb*0MhO9TG6^qjD-2&R$bGk>a!q4DGHFL}JFW>?m~DvM=Hz|&;i z7YFL8Z%;F^16y@{dPq1d5d4Zx^_G_kI%C zNlFrP>E#b8;|kjEd;R{?fds`i`ZsP!DqgqeErn5PpN+pOr3|3}yW(D}v|igJ4rS}V zH$?E%f}Zg&z9Brt+T?;&8VyXp^WGV`Z0suXppN>WlIBVSdtKDSyc}gxaKyFoaO{f^ItWgcqS-0M!1c_W})GGEq@!8MfMxw;rA4(US{LGCFNks8D| z$d9FBspe@7HLXtkXxpGNM;i(f)|c?2ncclrbaznl+}nB2U7@c)G4<-!vyBH`ZnWYReM~(9r8+i2?$RyV+-p z#6wESxw}VZg{mb^tIfw*jxCbx?Ue-kp)srKoOHKuXP%sdKD+AT`X-JfI{Os4?Z&R} ztd`_g`APkJ(Z{pQxv%F}shzP?*055X3RS)KyvKL<^KsGc-N6X!Bh7;psilcJDzY6j zSh`R+l5moYy9a7!O+hXfc0K<;7eh3EMN<{JlL!x=wwn@992jDTjuR^Ky%=q7-=O@@XOq7vmEh4imh^ZZ^3 z`Y+;br3P>S)@RSLRp)?0js&rv)tiq><}CcV)NSy6|FAlWLQjRjb!^J>$1ChVPa7!_ z1(qiD(FNVtZj1S^qV>l)Vx@RFPXxKWYbL>efSw&{p+Nd7bZz* z6|L!UpfuRewT0VW+U`1|zEc}tTEgM`-<&gXYk+Y99sFEh5~{5Bv|u7jt992&ZG_*kK{23$+ClT4VlkaOoJOr_Ob^^TT;;ZNy*!%g{edFvW$^$fx(L zLAgNlB^4*jrL{BW#no-~-nG(DqFnhJo?e_t#)*W-b$NcS%N z&!vf!R_5oES1)OmI!G(o`XzDewa>c-$8In73iyQZ+y;(5ov8XzM8CiP1w{S3=p*Ly z_)PtkjN^V?QjCs3xtV5S3rI^Ue)vxzOz70RRw@wNj*&!EF93dGmviqG`2t8 z_zUep?aoNVx7*Er=gv@kj1_xx;iiRmS&>Bj{~xgviv17G4n7j!y(F-c(T(^a>eyc6 zwgT9O&b*GXxK`bs)kxD&nU0KXXY(pz@Idq2wR^)(Rc$m$`=p9jSXXrZ$B&M1Vchrh| z=@;F~Jrx_^oWr&8MY|~WlVY%eUSCU`Jx7Gd)Z|zK&W?FXI)VCIF{R=%=WFhTK6+=G zS#8KJtE?%c7(VS>SMJ9ry$q6FJ>vZNz6dX1-j;h$CZ)K##P?An-vwH{l`rKuUWiY} z#eKt!UA>%1q+T^DNafrx(`vzZTo2Av7=vokXd7*`#_gLNKjl%SS~xPyj7Qla&#g0u z3Y5*%3te^sYH$7?vrKfqkTq)CC%o*XKm|jbp;bGVx^L%%i>M!V`K&0bHCpvXhXu^( z6r!zmCrjet4pW?ZM_=5rY>&8Mdc9O?hK7dh3bu6fRTY5OgsB{$_YloXG8LHrSAriQ z`|l)^3(LCgBn?m*JJa(`lGrO|&Xy>0N7D|k2!yblP@7U)7vl4y#6FmZUS&=R^4PqbNp*K9FjToE~q~< zerad>q^PR#H^oH;XU?Wkc-FGA2v+y$BkqZBz*84&bl->ydDS)5RTOagivCCHF<2Rh zC@mCl{@pe%hcPOGo~^1!)cJHOYvizuoBi=UXPFEP9Jg8Me8|(RU{~NXwkIPB7aGj; zXgh%|gdGbSZ?U-)4?e@Yc%2@H(m<=1EU7niSKTm^BSsJH+b&(zg8$YxKg%2+iBVFO zwB2;DzsqDa+(Af3eK}6&+rAO2LGocjBLYO)!|kVdbq@;(jvW7MUYT3~oE!$?hF-#T zf~!5KXVi_;`X=B8!No4(wl4~2N-+)kO>Uf)y9K*f^~g@mb~oLketE8dCerMbn)`r| z^*Mh`obB{#`ftC!}e6>cdAu_n>up5h&7``$X~pOan*8tq4=Axyx|SSq4Frjlc;KG`k5~@Ecj%dJPPrWHp1K!Gykc)7&S7j#VOU}tzh2-g1iLzUstqfg_4Hoc@IRz3@Ep%X#Qc#` zDss0<1Vg^Bq#k^loum}cCKY^hL0%V_HBvLwC&;=G#6Mf;Pl_-tFSn+-Bj|2y6EHpvruwneXp+SUkr+9*%sV9Cqvij1%J`bOVm|SA;rq>lIu)?fi_& zAq@tWYp;%cwKuKrdM@M6)B1O3{%il|RsZ*m6{T;wVM|gG=;#Tgn87`5;P8jrqfv%> z=F1kIXEqf|wLTo(v$j9%%+!HvWXt!Z{(kE6!X)(MD+355Y2$ARo#Av^L#WryOT7ar zs=Tui``1_)HRurn;qGIpm`$Ul!kKc+d%ZK!ADwhFoWsIOY9g-X z%-+^q(E970+$;6RNS9~vjhB{kmX(y?tHGT_0idddkB?8}8%(3gm%NFqx&!%@a*=o= zr)CPSuCBlH&il!#s3<94Y@T?E#IyMy{^xiBG6l$7Co+Ods7Q)*Co~iI}kr&iZwP z0$^hU(bQrAwLg_cCXLIc=cWq-k{;3lmMJ^sKr=l9?)Erei7GBA2Z3Ey5;!;}tGH24 zVP~%cgAu!<(5lyIcS&Hu*J}WK$@)Dq*;UP4iO&4Ae*7(;OeHhh&feiS--Q@8@OG=; zMviv0=MJdl{(;B$d1&`RzAv@gUnhV8uk9Ef1I~d3k=B+-Z1^|g$L)v}=%3Sa;Y%N8 z_%$3|2=Lxd&~t4U=DD_(lu_N&Sf*GZFW|Gd^9 zUi{m$QUkLVC@#V|NvL|@h_acqhf@s>0jdU}mF1nc!7uaMddefi+*=`t!VXAV<)OIk z*XeM%yNG&m-e`l;IWon#DoejAW zf5e(20EFm?9I3OEJ1J*a;n~T5vBBwx zY*4XGK4cYC7%Ho9#Je}Jxc@0?+}twXa8O?7P}%2GU?8AjYbOeGTTmGJT4;(EWe!t-SwwM3T#p$=v297s_4(RBLe# z{*p_MNBV?oq-iYxH>wrDvBDvV&d$L&xUL1Xv--BWp(ABDEdXccTfpw!JH{Ci+!(!d z_~Y^yX#k6?Zg?BMP7c$Me8KL5pmK3@UW(&P7i;H)MZPn6-ZYs!5OULi@@%kok6QH7 z&XxFJUIVv}!XyS2F;A1t=Mi$6^R=qe@|%i>Uo};Y@H_wR4Ih1Ww?#Wq#$FI%e|o7D zW8ftoeysClhbU@AA{#h6sw9gP(Rk*HD}6szJySoQ>|G}vk27GD1MF{N8zDE77K+VP zHL5lHvys+*f38)6CVbXBhjy(cP4(kqB{TIlb}Pnz`5%@0i30Ar?zUb33xfct8LzpC z^T~2?EfQSk3ZSjK`z{6TRoXmzS}At*eWC_b68(?bYlcY?&Y99LR}m zx70}Rn;x678aEHs@S3U@5^s+St(_%kl(fkYMblO_!qZEgH)1snURVa79yW_Iy~ zl(Q>$O+Pm%J03lG6<+Bq67~Ek_MVo zi=1f0U(G<;Yesvb`iY~>hIWf>fqHc}T?JEWB>qze4f1`#aLKLD&Tj*hZlg8?+Ni2* z$F-9B6Ufx(NpAW5?>QFP#%*4d4CnYFo<45?j>m_Opjs!{22ETKN z-_b8B=cN`OjN+09t*6F3hpe^>t>;UQFKq_#&NmW_t1W_^m)jQQqiOAd_Z1#^dllyK zr7-lSZ{)xC?Bjn8J3h?;+it(Y#5}n}8a~{Od!h-EOci501!1OPW`I+Zzz=<}A*hne z9KN}5^-!R}#@Q3vXir>t5LDM4c(WK+4J!9veanXh$d&KX1}{yLbo7pUmuC|~6)dCO7-N9HY?;~YCY|WL zIZ7I5y)L{i06l0*dS&hgqi}oNV96&DOi0&vTEdl#7HVK>;7I-9>i5pHPh6>LcP9wI zXou};rq@evPlS1@CR6ll2b%Tbu{F87R19H0ZN1ra8?fOjvu|9%>BJCqmTmJyC6EF2 zWMq~NOXfSd&7EbMJ+npY8~OCP6@BR~`sP+`#bI%H4|?RzyMCsp2%3F;-{YR4vg-al zGR53>yFO4kipNmny~{nkvlL(1Zx{28;O!NdMv{G|`7PVuIBKe3U{=|-bz4B|An%kt zAM}U3gLr0tXG);KO*4wFJ8_nvBWa|iV)I(%df#*J8yF&FV)|J2Z|VMp(sU+A^7Z@= zb*48I4*J)MNp|-_Ib}Jf&gPb#or5vz5jB4de`eTRP|P6O|HIb%&OrX=MlD9+Hy`kZ^%ISbBp z1gL%E2&b`Q&TFV9oX*C)k$ErZbg_zRA*KzuBM=VM^cm$xA53f-v{8OtyOvk%?ES#k zba>Mp{~mqF{NC$K;F9}-(rAWPWSVr#Ye(lJUDyBO>#f4t?4obqmKL`FrMLu_;>F#H zTaiMs;vU?s&=6<~!KKBDw76UFHh6J&XmAe@Tu;7pp0h8`+53N)tK@m#thMHvbBys5 zo{Vd+y#XZg68oF_q=w|X3UkyPXTOr2DUqUZvD`7v&ob6|rDNA~d9t#ZmF$LhP17Ko zHV>=JK<3~7_-MP5wxx05zCZFFo#`m?MC7n*TE7Y;ZuA5evzz4GEn;~H!VYU+D(P|>dFizjTSZhKcD`x7T4rs-vRfNV5$d5NF4Yz+gQBYw$H5iVQeVwX{fp}-)#+ve($OZ@8ENdE^OHo znwFZyglky~Hq6%KLUShrWcD&}l=f-VPrs#S!cS8x@=mi2nh z>CG6%hz;H2Bb6%Zk&1Ed+{hnJ#TSjBRR*^BDjeqMRQWcwmhs)_e^#v}?Gu6B`(}VD z;nFXGtDpk6Djj)WQ7^}9`B9M5@&wXkpb?Fsc!Rl6H!4G8RAoUo?NpTQ_nu{xfRmH+ zSIgmlQSj(s-}vT5R~s~PLV(ZIdDN?+p+VsJ&+|5rw%BJ0L+{C7TKH%P0senObunZc z%DVqAx{cKB1sz4+O(5?l1h!GJa&H)!6G-}TY@Z8aP(}QIK8^qraGORDXC<$GDr#eA zs?ddR#i-5Rowo)r)H3q|PjbC&KQE{~&u%gGI0DT#%1WJw=m(rO_Z}<5g~JX}zKCn5 zxIg^KF1XyyM85s$UuFr5W}fT<6s7Y)6XRz3T8cfx7t7YNSsJ>qvVXec`PAJKcKzW7 zjXNyv>+yCz5|1|&=ZK@+1N?LgJ#v}bsP7)|AGKq|qy-R((zkDS#NU$ljDd#1ZLZE? ztKZC{){4K>Lh=sO8Ab%4eFEd*Le0D1ap$+LxG6gkf`o`)&nyJTU6KN!v&})H7t5}L zHM4@2eI%ll?XtIP4cfF567}OME?FA#oB3RL;r_G*Vy#rY`b*Abt&O9r`?xOH|FXKP zqdelR*F&_1Q zn^T2A11~&1Xh(pM&Gq8L&xsckf6Z}o}pIm zt7-CW&XOK zQP|mfyVbfqjwsGq2_Tty2!Zuyld@&Pop8Oh(EYH@hwz}Y&h&fTeT&6ldJ{xeb_!tB z-YTmp)hrb&-z@2>YQl2dDqn9XWVHHpEda*Tusbf|pRDl~16krg)5YecW#MCGDCX;> zyfa9Q!Lqz2r2N=1idKP-Lw4CKF)5F>E#B*_IxS?2UD-s6y9-D648HF=x)I5#<3;4x zl`)mDm++vYWsi&~7jLZ?Stkr~(bh534>H`4q`SZGI!Jjt?@q7oO`#_lt8VBefRdJgw%c zt!iR7TLgFSq#3LR!~ZwS=YOpxT49WD9+J*GGScL}JLm$OP#CTYP3pqKAYz3erVPGj@uQ=Q>5X5~ZDO)}{Na2h4veS`n|!GDQ{)~Aq&)`EJ<}^U z>5x444cR|SA2W$&vLoEg2`%Q7I_c}>-;Dg?f$%j<^w`(ofXQ=|Vx9BFKmTHz1SGmV zaQli_W+7mGejuBsJYc=(D;(wFn$Iin!zNfVZ_8jsRpt!7+nGEnG3xu|)-iR}I4xke5{*ad>mIANq=GOb z>v&w3Ca{j&4hswRnRp>}LJae7wkkv09@O;%qO$Cko!sxHzVQ3GDvRMx4Eyf(39CiC zRrpa~)s@X2Od{nCIt+?Gx20d>Nn|tGqNR<#z9Xe#N!($BwAMWBqEH8%zp1d5%+$s8 zwQ>tm1P2T+Uw;pG!?yFU*;|H9O0T>j|2gAHHg1&$}hc!$Usm zEAA+CZd(u_M9z0`o2D$8^s_RJivt#*6*nO%lkr^T7E<2rL8`wMI=pyDR(YgMp@yeZ z4)U5k-J@#-p}AshQV{FZfHQI9x(?xm$FLkiG@Ja;mMPJ2`) z1?-Dl$JQ&;_-b|q@VYXW(wCr-Pu}+4u-l_hDPR^AM~4H(J%J=t(#Zap$&{sHEt>=JPL=Z(S#k?7J6bNGqkAY6O{GfOIuiQw(# zpyWUUFRKD;Q^?J|MXGA>zX)t55;hZiqW{bxGMd>E!YfU#f02Hp(wAU|oIp0!>g)Sg zRw8J$aR9Bbv%y`5h-;xQUTBOWVksdtUgpLz;ILuVx^<=#0ij{KPWvm7b7x+4zQOZ< zKKEjOk29lBzJk6ZGV{qcK2o>ai^H#oQuH^F_#s#5=zK^)Np43HmqYC9!j}c}`3#La zv4_T;btCi8bBQn+R|YX}$=%1pxefW;$A!a1ld~@RDVLLHWV)NIyIZ8HTSIcqud|mN zC2{9rG}u?E8^$eLNqfbh;EiusUJMOUJOFVD0NxSlky>L#P?PY~o@(!B*{wpaDH8I; zr3BgIXeV6h{buBc!NnKMeJMQN>WDoZG^pdq^M%b~I&8<4Um*tG-oGCxGM1 zP^yF(Eb|*FF1RhU$d;>AG=&{LjL}$scp{F@HFm{ElJeGI+w6^Q=du3)i|4)C`Y70z zdHtQ`mCwc^642|t#!tHtyBiSm`!xSl2`p*D;CauvAK*LWo|2%X%z;~v%^pTqXz7mg zN-ZbV<-H<}5?&WEGmyX_jj&sdk24=7O{xtLB8^F-Agif&!;Qmx8SwAqLtJeUAD27| z@hyBPAv6P-T-JE7fh~{)F2tyYNT!HXpG%RQ9}CyBYB+b6^hj}85<9YXlw(D4 z{cqPU2b+%gw~N3_HQ24I`WpEb_tP!$i$7Jk|BckcPLp%NXbUKImuik@FBRgTZj=Zu zCyKu}*+RD%@X-cQeA(OiQMkU2G-fXszMfrD{2`c3o6%vbklzXp_E`JO4go4cy0hF@ zbZj$M7lSX4;%qamIML)9nobnDoE}kaQ9Gl8PVkU{=y*88NB;LU^#5H1c-Z3D!Vmk8 zZfx+3<=l5eV!y2U??>Z4qQJE{SW>C+MdX0<-bq z^{V^lddJv2Zi0PiwW#B^-@QSb>%JuA|Mid}{sgXf$C=%QSZG@JL2|UjT(QKK=k_9- zy;U!z(g6W$QN71I-h>GPq~dl~dz|Ema?Vu5Zn_@KGqgtvr}4VNYU=qEFd5DH3uk|> zgU@2X4PMyMXM}5);W#98?hWaeTF0~mdBNK;fhSv%Jcoa4ieGh!cjB|(R>on&1Hp(^ zbgYePEG3(6t=o~p3zkvQXp;nxjh62MQ24b0uG1uVK?H}rkAwW{BEm-bJcHGTI5`HNYNnyNr#Wxte^VfY{ zYva1-D4r7{70FN?mZLtc@S^!ciFuyIV#eMt7=}K`@G9NscgGG#9)() zk>Z;&X3V_3G&ogR_OK%auRri-3ycI*a<)AvdGCyOYEb&;fNAzUt(7S;srbEA110q~ zmx!YyE+>QlgC~8$ro>Jn48h-UYuHx`beT#@gVG!3amUA>U@bFhLb1~kr76{zsfJ%` zz6lR9LbKRemH?_>uyBg}w;Lci04keN@DJLzy08kHL;Hw2@UqDUwyO=@yYf^pV;0-S z;`5jvWsKhqEhWNIzac-^a2qwfkz#qFBKU50!PgWVxffRiWCY6c!8#$p>lVe03`=1d2N2U8w|bWE1SL5*zTaj^hEy1c@9a`=Pr zevkcBXz@KGN!fgBov8d1H{)7ag8Zdv(Rb0#dmZw(Bl6Zw#I2=c(jhrQWvod)>1Iac zy^2Sr?-!q+Tw)-EwiV6XEX{T6&Ka~u1ZN6JCU)E1ICg%|s9kPTNl;0)o=)t=80veQ z2B3!1JVrZ__+J?%qfxGeln3$rzj1m&&k*#N@(JpcPZSTpHO7<2|r+>OXd? zW@v+HV>#!wOYFP3qSwInTae4GjH;#XJ#Q(a-h%fowTU z-A6vIj=Mu?V+yhk7I?s3AzxCFUCF=chio@tL|#EpFI;0Lfo+)}mXxgzsz`R+#nzaU zv*>Etr&I^QemJ(s8?6c}diA#3@I}DN2G!0dCU+cTs!FfZfQ7yl!wpt51Ns&xzOctq z4ojGa70c2}2x|Cvel$$P%(I6)QB26TFmv+`7#ld36aH{b`F0}60_6x538!Qp_ZwHH zkY~&#@2oQXdtE@?xqolr?&=DLS$rWy9v+V=5-VdMD_f87(BU}vqei1w1>fhaPIMeP zS2gxG{^#rd(-2uP+b^Ihg1#cEhzYwH5{DzoE=@F4m9gBSow`SEbq6*nzJ1SQ+So0~ zp6&D2=gHpGq`gK83?Y^aGw{X?lL|1gy zljP?$TJ_xS*I}8j+(Z$9t2XC|cw*5XHy{6fH}E)u{`td=R*#_wtxgVRs#8KYlwyiG z$pxk?@ex2#o@f|6aTQyk02xz>fVjB89dN{(@iFWM8;w%Dc@o8R8wBYE6?+!#UkIZt z+;QD?+;J%q3l2*&OPNjVa~d~`hh8dpXocrXAl7Wj~Y@r`0&7)R*R6DPVpvj z&G2^d(r^MNQQV)?Qw6G{Ao|m z%KRy(=YBb1#G4a(U(*hN~RroQ+kTtf?y*hdY2A5&nyzX11$`IxLX zY@nF(P10r3E5#9zW6l_|2Sr9Qw@ZZNgc*9M8MPC9zS@~2%%gbTuT(vDQdMe?X^_A0yk!LIdblMAdL#iDlxnI!54&=@Z2hSVz*SrwO+4mWjjCd3xd>OfwTE&OM!!K)C@}?9g&~~!89jEG+OaEOZLB-z2Enrib9P>cSBy(ySeoob{!1d zmSk;nF%v!(FX+Q(9E?-f>~H4aLXFuDLjY(fV`EiIGh3Fwry;s<7tKY@5rw8*Y`3Vw zDx&*Cb{l&6*BiEuN6xm!!0#`aj-{UE`#uNPsS9^yl>P|KSn^C;91hOs%M4~Q?vO<#SLbW&UUt@adF-={rVH_OlggfHY0hb8a#bX;#Dg?y>4ImhP*HqdmQ*5s;&eQF`NiZ7~S)}qRUSsqW*MFJS;@->+A`E^)o#lhNbi9 zh*AiblWtp;HwYd0ljwua@zmERQxhFbX`G}gGV{46r>l-uXqr+BD!$EbzF?T>iyi`! zg0iW0R8lx^RAlQ@piW)l54(t)a~uyu3T2^ZUK#l>0UbX!O_% z(iglt{_B{wdzhaV#hz!~dXT^8f%$w;8*T#nE$}c_MFS57|7hI_!pX0iQOZ|z@HX)O z=Nk50aE-XLggBIO4HD0XU!;(t`l1EhC@=5JbtQ;9n{E&Q?^B_+JI!cjT*0aezNcJP zB$i>YL@C=;gp^k|A>#`(hW-jJ1#zS5vaHNcTt&4K;B}AuGUvA^zu?Lkj+}*Kf?J_j zK#?lgPqwUBMKjDV>&cGeM65|f4Xn3bp+0M^I-eLr?wWh6Df2hYr~J3aj(D1;->9?) zwi?M{QN45*C%R7MIN@!p=URFgZW#mX1dH#LHR+ZPgN-HWw(ly24AV;;e`IEVne z!fC>HMk=*i<|(n0xh_OH_jv+m5IM<5$X|ZUBu6P{j?d<^-ZO*XR&b$ojzDlSlta8l_S>C9g#EMa9Q>W$-Tf){Ig1`X0-nj$HDA8CJ7?uU4jOowluZeL zOX`Q!J>wp}0(j5=OM}seh0<`DN(42|j+=<%X%{e+%=oz@CfhMQ=_6O+!&%Xd>I)HO zF`D7~{Ynht{vTL56@tC<_)h#P6I!H{bJ7Z0}pVE4fI^?>qa1&S{RG%JvF2&8QebgKa#V zVTL=@N_B!TCAO99=X)OocY5p^x5{=*K*gx^1f}ASl?z{Vlek(w!76EGS9*r3Rb33u z-Q)42^jSkuXtA60Dy9GostfxXn0qay$^x~|Day5^s_e|VmRoClY{{Q9t~H!>#_m2} zijs11f8D2>&&BEf_{I(P9Al}vaO>;YeZ`GGk%%;zbvRa{T)~t7HwysBLD(g}$y6;? z@(QYQzB^xJ2uZF{M0HGmOn2#wHT!<~&>}OGG5rDpxHx5*YNE*1kAhc3hNN)d0T~Hz zIZNK@W|6OSB{`NZvUD7EK0aKT-z_7B_cbCuX=D)?(4KF8(tMlpH$rMZI(BqUbcFSTR|mT?r`TJm>+M$K!3dDl+E3 z7E)TX=w)j@5p1q%$@{%4L~6G*EsUrWd!YGkHgVYGk3?VJw?h$oqejtup{=}I3^Xy$ zF2v0XEC*qibvMT-h68+uweF5B?`bQhJYse^>XHXQ!?vaJ$+@eI@kb0NPG|JMu+sg} za#?{)#Acu_%3SeQ`ZsPDx3V+Q@(J9@dVOjzLY2@0 zXM16;f}R9Ze5YHMn1m9sMIHI`*WZqVlgzgIytNEl=|JA*FFqD}3F+2LVK!eADXoyI zqQ_wCR~4--d0E^Ev7{Lgk&xY*-s7dvI6!DB0_8sqp^H7|`P+%V+nD2Nu1wk4s_S?z zT2GhBMNjS%uc-U5#eS>cZlLL4p^YOo6((_F2|QRHeISgYxi?eb=&S+9(2khV{ISCK z79LvR@bJvqiV98%G4_9~YPVv+Yg;@jCrrliXD2BfP8-?wKxxk=n=e$}eoc7Y8OgCA z$06el%_bJVy&j$F_JC}S24e|sQOK4P_sPS}u*dx+SO$!}uD^9^DQ^twh(e0ORw=RK zcc?)RL2elYe}dsp3@qnh-9=yWO^TUBTrR?qkn(hY4A#H)-fTZrL?u_kn9pt0ACd z9XQqD?a;&@<@*B@-Xs!jx%w_1ua5P#ppJKV=)H!o6{*T>-}dO`$XhSgju?z78NTL^ zWAQ_?{tIR}b-~DG3ef@P%r5T*A@g9dR)8b@0o-{ob%JUfXPSO>g3nkht?c zB7eD$%iT9>AZYgyOF?)4a1A|CpTcM22QA-8y4BXwl`Z*W=?C;wCf$ayX^}GK(LJPr z*X$!keLDk1&!xTRWuVBrjkg}}g!k%nM{f@$SCRCU9cld8#geSn6*jVv`m8<)DW#bH zCLl8SxKc6)(8yk5ftg$)^=Zr3yN~@VfZ^as#N;^Y*O#%MLRN-FX_W_tAG6J;Lp{0(hp(UnXBWZj*w4b=q4GQ zyQ+dU~_etVRp!TZPlS1_(0E&DWTOI-ISi*yJ$oy6(fA0jQHbF9h< zlzkVRilUNrWUd*DRuN&CRhGK!#o|S2Wf=}Ge&VOiDk{u)V@!8`((gI(+dxu+- zAI~=}`(lh$rfq573^E08>m_RjL1~f5=;C(i$ubmgrz*rl@ix)X>eU z_d1&6vB44a_Mb*br7KYlND*unDL4~Bw@nU263H^Bh4CNo%iNJ--)$Dxu=ue&$0B|e&2M$DCnbG)3-OS> zH~*sl=)Ec@@VRA3O7$wb0QKHWJyMx0V zBCmUeyIv9%d1$y8sp}5Qab6LC7eo5%Hj5)F+w<8%XfQLuU>BDo$*-bHIASZ_&9{&8PTm;V@o__Z)rCZCwY|+w>k-&zBSLw4a;l@(= zx8cMO0!sG8Ga+Jsc@-g07XK-agDyZIR|bm$;p&xGX?O6WuO(%~=~i0REO;ORi!>3E zr0F+Dwc29Gto_mII^E%Q)aqTc3`D{8SJhWWnWK*!)G_&5ZMlrGYw^Gj!RLphm5)+j zno>!{9Q76Tk8MiDgDPQ@0L6>YLw6tsu3holDm;uSExVuH1qe}p4GcvIH5oUnl@6V> z_DSlO!;W!>VaV{QD2q9G$f;|IZO}Puj3NK+#VVRx!L{+SP7Akb8~EyRGnaA6abGHC zsBU1>_ZQeSSDO@6Pv*J4BO%E<`<@Anvw;VBYOx~bIjVc}b`}TPw}B1zR(xMyHohU~ z_>@h_JAJctO4fHh*LkQ83*CMB!Xx2Nl)nIwJCLERPg2#(o5HyM7&=zDuNTRyIDLh& z(cFB}tMEQ2>d-Q7&k*hg1=FbiPKP1uJhuJ!;LQMs6_ok@TtiWAentp|rcP8yO10$W zS!s2ge_!Wac<}nUhVFnmOpQrx=CF{D9CBIPVTzj<*!2$QMAf2xylqfpHDYA6z0R~& z;KLJJzrC9rL$yYzfB$O(aGmjLl((tP)nE7UCR11}py_0po%mN=(7)`>t$4m}JK*TH z=GOJbJtq8cPtzjeP#uVeo6seK|xf_`p*^BR}PJRiVxH}g-r$uYtqh@$rjhb=j*)lBA~gxM~2IC zEjhd3N^6Wvy^S0`y0g&Eyfi4)SQ^e07}kWk_Eaqvi(=66!kBVtFpBIOP)1f zd<|=9Ag9R32V|E`y!1=nj;l2fL=;6Ye@Zf&)|bP@N?TL5CDMuOjQ^p!`_~WX2QMZf z&Oh^1Fb|t2X~MPGvqzdDo?%0tKEtZt_VA8`WNeyZ{kQ=f z@HpPJU&(q!$C?V;B|Gy`>n7VpzAbVDyE_Na(+yhH%t;)TA`E5WcTV>T8rYBL9j%A^ z;jgE@-@eT%qYYeQoIXfc#95Zp#IPS^aj+7qA8U_Sy34Z_T zZThy1yY;XMaV3LW)G~5!Rcrzw8L6s&KrHBl23cZekF31&#L;1YMI|#8pf#Zd9|nNh zfhcX2M7y0Kg3AGc9X1lkv_s>F_OzwasxVYi`HC;thscO8bE&E zlCzABGFp#B`oOMwOOK&e4~&rnrj8E{iDL1~=GtoV1028ToDEslibcXxP^r4QCWY!- zvmD=fSy$&AzJeEY4c>62zRVj5ycyj{yEV6gVPP&Eu2#m0{~OYH^WqPzy_KGc#Njfz z*qHu-^-yj|t^E8WcwB@&GVA(0{vwLo@p1pLftMia;{m=%<)}n|gbKYQ!R39UOzfw* zmaFfjW643KM0EZv6B&vW7waAw48)wj*4y}CUrcL@6f53Y`9ul+1TXFbI3P$fRaifH z!q3?ntksSJ(;aLoPDfJbU|aJGwuY7&ZsG0=2W_h>3 zS%hjkJ@TZZon?Z5sEqziMYwxKo}==a%Hc)?nYy}UNz6G#>ZSlcL~Z<|37@#lBR%9J z4F(jKRvR5>sVcn>=eamuKn?3*2240@CVI4D!HZ~&JJg`C+IiReniukjKeyY*H;2?5;f`@Z#LU=LJM`*RQ*?4$Z1Z_S(o~{ZLUJ6)` zv+7R(3NU1Cx@o|*3~yKOmsg`y7(!p0dHlijO!tx8Ows=>ALWbQ$vXb$oy_sSooxKa z1#Wo(dMATx^kwG#Bv7#nP^0sQe0}}+_0o5Z$Ff6erK$X~4^0k4!BQJhdf?2Ft#_V2 z9Z<3mY^ax76Xhw%RxhoXbg&O@`oASkkm!clu%Q%FJk_t_^ z1E&^ngaZYE_dYqfIOBPE73ivM>)(BSprta|_#b#3*dXL#`b`X0rM!p|JZ_&P&DhhpfaN-7Mu?)`w6-X2FZu)gUT`FuXJ&W+Y z=mCJ4ANTAkrIDfXXkGHr^hU+z;?1@_Zl zn=4+CIX7$cPx>v2x+vJ!x>${hadK6CHhs@hsS_ExYvVetC`A5E9Z6Aml}#wBBH@4gD-j$EmrX@n^N zbkDl9qq<13FDsOrn_o;VQ2EL%&e?q!GIPwOo-J3i-mmE26J@BE=g6UYIPVEdgAlZD zzq@5}V#{MhT#!$Ffp+oxC8>Mk5Vkz6bz!%{A4d6NI5&CgeK(gk@%B*MOHQGp-! zuz4AilY{0SOu*W4Kvvc-EEXA=$h48UPsT8N(%3EQ;4@EEYSpp!gfHwrn4}m|O(}B7 z!##e~R=FbuW9m;m{>&Q+RArqPz$;W?*RQg;5N~4UPHc2xo}E8B6atkir3?+Y+=j`v zTV0t7mqc<-ZgFK>tn^JO)j9-l>sG%)F&6GdcyIm`SOs?@-T3L(f;(zwZ|3j3^5 zFW?sEohup-)xVpO`#T!J7;>x=mluYNIChJv8>|s`oA(FXt*2=hZwpobP=!geB6{aK zKbxz<&iCYbMfOVby;0&ZfyAs!7HkKngFJapC8a)Hq9G?=woh{CzDlRdV1M=yvRuJ_ z0jl4f!*BHAM;MiQ2+Oi} zM=kqswCDZ$VaK7ycmx>~Zi^y*JSIk*^f3^pTLd}5cJjYgfA%g2k|qO@l-{xbPl7Ii z|D?<|k;r6y6L#~3p-1ZsRaAY2d8-HnDz#gfOvHp{IGMhotnw9LF+Gp;U{#R~wDR({?O4`y;=ZA9HKKq&#c_gh%%jLtfb^n#zW zW%Pk_R<%GkZD5{J-UUaOfF%^wK)$99A2w45bI6MDbFn+^|M_+62vAiFg6eP;A&dVDrv~k9$jZDSSE?! z^+%aBp#SgT5!NFKNli&z>XCLFcw(t8gU0FX4|4$eYePlY6O|_QvKoL~0pl|1pW3Hy z^-25g)WTbeWXwh~I<6`f#^^|U9}HplScW;XoeISRs<1p=eFU2T3P*h9NYT#!DR8Uf zW33w8oj*^86?@sbQdYJ4%?s`%tF8L|r?|joUk*u*si4Oy7dEUkU#TsRuXv)a!OdWr zK!gE|70v^xQNlfYvM#v2FgLJ?*y5W7`%~>}B$S<(fASWM&iulz_@S{$hk+FINQ7Xs z8Y3j&jXH*%W4#DZFuKLTbmWli0`NwDcrR7QdSI*dy@ei!!V*FXrS-oD(wbKDs6lec zZgip+hao9SxT2A|-vv_Wz12~H;lH9~H2eXpmyMG z^-g!U=*sB2xWD^nY;{fPB*Et!CLpUJERJo;j%7)A<=h%%ZEeZTfs&x??76j}?;alm zZX#OB$_ga(8fG>4N};;E`=kTx-WPkEVjjw%Oo^ z@ddPUt3`CHO)@^@ql17*YC%$yiaJbu1RT9Ll-}=h!S=VyF5&SZ{T3hvouTOuZ1BTMqz`kUtzC#8YmVXANX{f_*l|}oAgCAbbmZqj&vOKVkt~^gaN11j71HLaC+$k)DLHJYj2W;3)(CA@a@-{UD8$u+} z|ICnU35Rpn_|^JR?1X=j><57tg;tD~!G{gULqelUJ*IyVM(^HHcvq!{IV>vNbZB*F zTpAXLzJ9I5bXX*N&zUFW@c%UZ7hJ01_8M2(gH46gk~1{KmxgHk(arwoLEhHb>hhv! zwCyrdQ66D03l(tx|9LI`mstEiCx!}Cj%^7_Tc_h1xA7ZQoUC1x+r98z*kz4xk2?XE zr6}i;N7Q+$nBR4dA1>?1p7p`)c1gea0zXI%UY; z-q2~^SSlth-(7z{{we9)HMsFAh}6!E#{2LK>Ut~F8NTga7MHin^+&7c1KGYPaniP% z>aP>AdqPZS3m%l){ZSFP7(Ca!wzYR0s2~shXdo7mrE<|kV!icX`5L^M#O*xMOpb^g zHHZ=d;v2kV{Tat4toEr7hx7U$A9a^j?4a(a)Z*4Xs>x@amBgLbdZ7mW%O~w1um<;6 zpn`94X4!$v77G7`UzZLqv4dsQ9x?fNU;?(K6#&wNi0mA144uVrepsUo%%D zzEY*0^GiA@2>-W2)S@o{w0doy=uwc5nhAWI3yniu0%G)m5=U264OWD+79TxK+h1U= zwN7k(ALJ62@(YL0cu zfTF{0McP#*i+xOm!LTnf7w(S)dB66~)V*LOv6)Yt_Jc|j z3=w{pk4%LuKwfvSoGb%e3u9TF&CCoLU(3C{W1kK4ra!rS@3{@gJ!xXJ=y9R7fxLF2z+H<@eP%@lKs?$T*6N#pLj0;f-*}h$d8xE=6p55DYjtq=-cYGT9+5F94 zndg}ke1xK7ncD5NcQJ##c?m0T?X^;ox4zp=vbE{}0z8$~Pt07|QZ4ZyrhqZqcprh) z?qM;=^nthemNP{%#s{ln9L)0SA>@0YZv1iX2 zFEHlS`Y-sQfTDV3v~grP?66K4;rGIToU5IKdA^3!uy=6ubyT`8Rp=4EFk*1Em^qx5 zDAX&dLTS1t$jSwV6@8}FG(&m;$d8w@lBhKN_KHQS@C%8eiNzNT-lRm1r6cCTjjC`M zpP2%*ArpA4-F;oYr=$FBVhVwXkB}U`afj_#gDm`vwSX9b?^EpRk5}>#Zb29 z&Xs@;)+I3NG4t^!?`x7u9qaA!)!FD*-4({2xFn|{Tow49|H2Q^ z8*I(s(GyGPu1RD0 zp2)!GsEz6G(KnO)QCu*V3}>-)V+o?S`7|uwZPTyM{sOxQ=ZoN4i^1SKKX!$+5JIMY zT2K-tS2RmUP8T%%Odp5th6;@4Ar^NH;-+#2*4S=7@Y5VvKs0GTy*)VlUjG zA_dJ^>*UN|7jLmlS%j2x`fQ8eQaAUHyy>y$c;EHm#%wquCg|!iPN00v&9aQpAxEF8 zOOH2l?nYn~C9wjeuryR|FXg8a)KwTxNU8%qL88m=%jmLBV={6OyZq@zv z+FqvX;ok?oQHOW^a))m*zsrhLOCP>b@SguWQ=IjBc4Jyfl5BXqFSJWxp%=8GoY}uS z_m}uNM6Sh1Zm#NirfA76CD-EeA~Be6)r5vu0e8Pu9^>7Tzn(iAf~#TpS$QYWB%1VP zJ+AW7Cl2p-yz>krourW$aP?CXqomcO#@&Ux$jjrbj>wkrtdx#RUPK?D2$1$paFl`D zpl@4KGGfdcZvM1KsD5`MUn%;z8O@4}HVdeVt_s=Y z7MRW4^S$*60qwTRg!TWARy^D1(EnQ#Mq-v*34hoNMemD=u9Hu9sy2Nw_C3njJPnx8 z*(c2Mq>JMTZa2pZXBjUa8lJ`IBxA+54PRiX>o{7f{95)|%14cTt?7dLOI|mNUA(Oo zoSw=v%NVTj(WMM_x>fGaU$-RvKrWtTl>a+`uGSt)`_$B3DZ)?R4Q+-{`_0gIO1sDD z-NwUQzkDic+C`XKMwEO1R*T@;L7eWc&huZB!XEAK@L(#IUPExISv~FMLgW^IT!>kf z%*qR_{3kTpj%x8oq+yc%(=&9ZKPYVBFQ(ia0F{&c2@)l_x88~YdzXX=pA=q-0xzMe z<>#lmyJ8eSDEhO{uaY;uGqr8_^~)*BS}0x(go%|HBDI)f7biEgoxxN4ktnm;gO)Ov zeDxuZU_M2WwvP&%1eKPop5TJeA)H0Na`2%n21zqkk_)%(><qArwL^nw=~#(q-}%1PYk`*r-C1{_>b>cum&X{4sYz{8s8xfT8BPe=gX=oE!+ zn)V-h7O;VO$AENdFrVxcW$Le@li0Wq*}=6PnT&~+fsLH5-CM0SSY##P(iei17+%a` z?TWXfRt}_->8EDeGIdO(5$AJ5!TM@aT{Ke zIsmt41m4U;Xanm4alBJjY+F@(7&X%Y3<)X>~7ZiL-83 zlIMcc9E>q$FOxR*pW|#keTJ`8f+yAGeiV_ADRtJoLw7K~{`J7U5X3I0 z=J4lSrzZS!K#ZAo%8U{I;s^DaW=b4c#vtRu(V#agZVOpf?z29vp-Zzc0)AXt^G>>i zB-!Yl^kQkKFsivVNh%exnsR4wem5w4gwIF%z5&y#5?l78*fYsgIr^d24*_0E@D}CIrhnJ0o*&E9rpFXqR+2+*$C8resd37_AN5DZm+zc&wMLs9@y|}G#59Kah zV!6D7^4&j(PatoIUpaUDAAR}gyW+oa{pW&+i_Tqmy0)oGCRED1m9;2yL`qSUevnPg z%2w@er5Dadud0DgGO&^@t}50!-Iu1n0(1TXV`IkRg`Cz|uoUz* zA|;uY-(WG|g+X$^&TBllJ&V%cByt`E9h|F!@cK+ddsk1aR7VCoT6pACE#dzn>@CCE z4!<_R7HEsKxVsj&;_mKN9Exjj*S2_ZF9a#>!4sU~F2y0Z76}v!#W$VZeXrSlXa4gs zUvfpxbAHdc&n<{+fpF6SH+*@wAX_At+_;uMD%JfV-av(dJ>sxFffYvC!1Moe+M6rj1gMbuPkt1#znIg6C_PwH z!pjfVzrIL)yMYuU=1@dsfN2@eFsWDUvNlbaKV?xq4w8Q*oaVhAg9~^m^pr<4l(*$m zW@1DF^FrOtmaU@^WLVUVb-*IRjl33AEZ(d7jb-^Yw2Cv9yVimwFTMQ%aoyl=O-S>yqTPd_4+Eh{R#M!Nf&EUYqh5y)p=G+S-8k3J# zOR%HkVIAw*Y(F{*<|F`Y<^Jr9u@7gHxDCi9ay8+=)}Pq4WBN zqZg1c-doSfmdPWPY4WKjO0ZahVM|m56e`rcj&3H4g~BX-`Hl(YA4un3vg249NK>4o za70=`7pDmcq*aI@^6B$NbBcTWNqzM-Iw6vwXvo8Og9|yrw_vj<4^wQcY!CQw<_%@SUx}u@<6W*;TxwI+|8oWW{JqiC> z2tyf;lwwLkx4^{cm+Tiu`Ylav%W6oqhydrOx@L&?mC2>hE69Q9?nNmjtSBlb64}~& zA1cfHwQzRgZCldgKedzEY?3B8X`zwyf~b~S`S?=FuxaZ#iOWSv(f3m{EqSrnbOn8E z#29YqJywh`z?M*4fEA5~uI8bPIr_PscvF0N%DAsU()~^y#o{^2kxHa8wLG2YbW~qY z;K0N!nbDQrV8I6`5)@GDJ^~Ar5eG{yfU%XE2!J2xe^e^QtTa+98xbFSEO?PsRt957 zH;-TXaew_oIu%ek8e70flaV0}=CFDg`+oUy1%{Da^40D4kBdK)qqMO0WO$oQuM#Ej zdAwC{F@4Sy6|I#B24vmY!@HgFqg;#pMp)z#dCh>uFMaGnS*t@E1Cd;4l7@xwE+c z3^3-3E`%Q~?AJ-fA z#(+xV`b{}EFqT?c|3udDuPEGq3|J4T??a=`g1QopLghre$#Uqxr$#rgunu+w1E(S3 z)uTLULNN{2?P?u(5NR+d+$#|IqLH$ildg7qB;1GhY@h#67A8qIN;{x0BcNfu%)^!1 zYU8k7=AnFyj3%;OPs-_j|1^js#n`e5C*Ap#w}R!^z8?7mX@#^&k7q|*hifd&r4LtR z!6#Aj2Ebc>_Eu)*^3nRb&gbCc0|bjHYKStBEz&RT^Dz|S+JXR8`}_}biMli2U01jL zb712>fFe@0ZvM{;Z`a>)`>(nWMf8NdQD{)JX_NTQ{0@h?2R=-f8167zI&fUM zf=C{bj~=+(xoO`xci_b2rTR|}D2TBO>7Nwmm3@9GON{gJZ)cI!YlIaEmN!0M5#qA16 zHP+nh+R~&*{`T|qF=oL`{DW2n6~*!X-iO8c*Te78643Xp+9Q3*K6`Gowk{7Dt(Zr! zjJW^g>GwI2p&iY!-T(9Qo%a`)CiAz~Z=q7=hAZ=MjYxSAJ6JX~-pE$L)#z^OjR#skFm?YHPP@z|h zlMhOdMtA#EN~SQl0R2QwycE8$q?0#45WZ_(CSh?#a%sSQH zdm_hm2q0kq`w!iNSvS4_JcDrEgI$$;0<1@mL*`G9-k!p?51*1a{ij)2TT*nmKer*cy_0>BQ|=LBOhosrU__Vy?tSWY&ixY=_9s zP0TU(_wT4r(*+#o3+~<~9KK4Q-}|J~P6&q2UK?m!zq479=_Yu&0As1Ju~)fI#g75^ zG#llNdt=*aG1%D%kOCZg&naW@Mgy4FlLWu^e+o)ru4sYKYkSS8^j7A%&kpM5(2Peb zV=G1Tn#B;v7!Hblu5ho^)1#!trv)@mcgfZ|wuWaWYf-fCMyM%j`ln=jzKvRA8*(vY zzQNOsj{p37NwU24mEaPQ#YQakDu-a%hoG&-=43Y`tS<Oa@iCw zbqrhHXpeG3>M|R_e`n}^Wkf4qQkPzU%EFVka2TN5jrix(cBGXuBXz$8?W89)I@pg% zlMA_1fGK<_X`ome;l_cdtT6AuLZj8s+2<6x63Eojlj1cjxU?>VYx?#iYL#jAyG& zf}fHUh;SWs3G#!&WMM&RrtJ*b_WCn=&@^XHl{3^Kc86NijK&09?PhFlTtiT;LX}{? zqayy$ra^NkY|KvbVYIHou?6)^3DYT4FO7)vY}u*z1Llt%DvTEH1mrp2_%^9HL&lVE z*p6M7x}zlM^Zqk{4p$N-#3XD`$t55qeDex&Ia&VApyQlSXsM1-VBR2M==Qt$^lb|R z(#*92DnTuoy58R07k*S_kdkucddDY6JrhQPdeu@% zr<#bLmn5~@%S@cIBJXP6wzamhuf!eiF5MCh=?q^F@nDsT_jNILaPp=o!?#*G30d8L zpX}eg7x7*b^*sgKb$6+32RK-zHODs5!?)}N@(UErW`FF~ShpXaWeePakmz7Kz~r2C zKPN|vt`)TSzv@?}X{a7B6_d{V{XJ5GJe~b9qt=*i1j$&}QfK9hg{z*1^6w1XgR{hs z)xbW)g2XBW!D9{)t~RKr{pTd~Jqj>auBBD~9_@fb=-}4@w8J^YW#5SJIosNp|vK-%Qa8~#H=+?3p&zHE~M31vS4O;hb$*)q>+ODIQga!whGvbn7{Foy{|<=G>h zjo+SoQQTg2T?%*?eEFb4{r#P9OPX73Aa$$J_b3&H*AKBkD;8|l2%&GuR!tJ;US_Wz z(5F%tYv?G^B#!&zkLv-@Pc93WJehP378LUaxWtw=BzmzGCiUrkRQ4*{g$X$bh5JNw zr5*AVTqOL3Xx4NfBpCeuE#vNw+tDTx0e2tvi5@$3DT25I$?@wbdu>?bi=aIJf|uSn z$d=ByVxTfB&$w4X>qClxPOh0AF5CEkG*2{U*kBuu4B(AOtaY}GCFU!U-Qqu-mb=Z0 z*dD6 zNG3KgP%~8=?o4OF|1x&sV3yyM>|E#CzRdD{7sv`{8|mX}=8GoIK7#}bHB7+^5z^6X zz=_9e3w(3G25wx*jeFO%K4u*j+QTwOxA!+6W!m^=H$(2+p0-bT@ay-oH;~PIF-Ob_ z*|jDcwsFp4yQea(67bG74j}LlLjkm!1mJEytc;thc*&QAC?D%c`Ln};sV0Weg4nWb zw!Rc#q2%$=;#QF8&YNRJi#2#TYH^@5$M@TT3%D%USRGp5^%okVe<$|STQ9*bSC8bq zT6J8d=_T?CxZTRzw`MlaqnSjw0TI>6wEJ3q27qvzOg3PhsQ*Yja34DB>+Luuk_XK6 zzBygvrr$#>6sZj%{ns7a-&hewd_s)wnMY+i_v{Ca{~vzCe`7z;$x-9ED10;E2vtP! zl0H(MW3J$tc`#>13%)tgQL_*(HqLa+iDOcw9VukwaaD->JPB(d(CEL{1VV~*^xvFY zjJ9B&!T^M1j)o#A?hvJ8cZvDy)d^$>I&d#GA39`Mok_k(=-fTz--BlAW)J-i zYrfHWAU`e+U@Y8nO(2IFP@iyP;6`{gV@5YtwQvb7a5K zNS%_=5&zZG#i`{fGW(DDlduJy`>LnvK8QG5kk z=0T}nAt)$n+O)sGdbNZz$ydIHdYgAZrHotl1?1e;wZDt&+}!>8ukxfs41v`6N{Qinwt zv)wxHMsoMFr;I`LM6nthIo6uHc8jBkgLq#1fbm0e+&2^kfW~{yWf){(@J}_*x-)vM zh7%@_-6j{P&*XFk>YR-}7>5dK!D7+7J(65*5J`M^OAf-rkh7fm4MrftEZ5+`r)lVr17 z)@97wmH2OU{Qs%)q40k!9<(kKA(|xe!n-qFnetsOqN%3L?7ovMrd8@yq=bMmg2#jk zM{%FmFM}IjJv$dPh0J`dz1m!1cUqNeb z2|NqYdPhH&TuAdj_1C-8m=&4a8>k$+HO{%`yD)G;kBiZYuj@_WUJ?{Kop2xFJbMhc z4>H5G{FL720%a4ecOg+5W521L2 zy0Vsvw5MvQBM13T=*9>tnwj7*Xl9ARlT@Nl(JDdVlp-A2dnH|K)idreBwM?B@u|QK zw_W+sU-e+X1C~ktMo^K-M`q6#=@uEW3nne~q%7dpXLo%^Ho~kwl87GZ`RM22OA;`f zlvsnEP+8F5Kb>8sQHdHx19e1AZ1pm9^4pDxtV_FdFY1H)6Z)kI5}gek$g3`~&x%tm zMt`YN=F?w)w>T%;a5kT%W&w}eulp3>IgZdq>mT(R+-1BlRY({#Omg2JX6+N-X>X18 z4qE^AxYS_WU+}jhJ)y`A@n=T#9;!1Y!Vg3RDLp*9)}xV`DOE6xIOB!3rG8z7L=$Yd zN+jfOCzoxT4c!(W*WX$KD-sr078&D9mLNCV%=UlC-_pwwXE!2i|s5jB`(L(`aL9UTWys9Nz zOz-p@PWffDKK>*xd%`>XXe5zu_rJ0L{;z^2r}#(d5MK4D!9=BxDxQ>`!Vqd((F|$Y zl68)q{KW9~?mgDAE^ONN$!{Fl9?V4?-{xmsF&9qp!1VC|=y*GQU z3%N2d>V9m)yntg-i3(ZgVi8&u4#e^gf)b%?(;ks5yrIipx<+ixBQ%~ci-31I$^=yx(dcN!{cDtp2_tPL(6CEnFd}U zCn`+5y0up4={t(4@^1oPsxP$^uSB{Xq1x9UA&K(k(Gj zw+N8@G~`V3E%zk-i`Yz&_r*Kr;ZW`z0sV4gRoj%8MzvO!Aad_9{k z>m5S4*86rAU|g5VHTr zVmnQR;uX>%(W8PEpZ*AH5F3tW?!0x2EWVP67PvX7NKK^nb4&UAoF6Qp9Zlkj@YK=2 zP1r7P#Ku%r$>GUm04!Oki?o8WaMrAHN)X}A`7XLj6T8R_YA zdPZd;#hY74fnrffqPX;)%m3*&P>B5Zlzdxja)*it6W;X_{aV>-FTZIO^Lg@eS5jga z)M-ZU-j==P9>bNKKJ(>Pm=zi@zFFDrl|rWUI`^N4(myLqF2 z!^(kYapFI(Hi-Y#Zk)o?6_Nap{_ej-EAvwvEw_TW?+KC%6?f% zP4rtehHYbxLZn{;?N8dRis86QKDSoZ%sfN_$EJi zU3`CA60_8NJv0(iE{FoQ1ifb?EeMv+5sbO{c8ovNn-9Mqz|?sxf^}PU&Y6i)7Ir7> zNgU>5(0|;bVZt`VITWn1u6?Dd+e`USXV8`Q@ZoK-@nN$}a6*Le?U1+#cyR=w{`$|O zsuuM>>)RLX|LV{LdpgSaXlQ-}NZzTyGnfGLcHS|7o-M62UVHX3w@$0X#98$`r+!t) z=?+tSL2kRLV8uC-H_Ofc?q{UMgP@B;4(xl{+Xk=U5ssPq;w7!z3^;WC1w|=*$4_|y z;pP2m_7N@|`X!Z@5Xd5?vAbF)-{g~?sW=nw0_+W}`YQO#>O(9MRUMV}5SLjH1b*~-d ze8V~a(V;!`@G{|2Lib)lBoV8LJ=v=qQ~dp+IM=#N+_8PVpsdat)1xKdBSsGZL%4DE zmHyBArqFwiN9TM8(Kqkf@Tjy!6pWwy+Uj6d4*DNhaSW;Cohd3!NcAi8#mjsiPCBnz zty*=({|{YJJmlX8-+{dMmnHe4PB%RlMBSorz^lGnS;(Dg-2Pp8q<#YL}Kk`ktL2yK1X!clZ`XAA$iW&G^7@oOD^hP6cP z6aOv%^r&ay>)mWB<9|C8`0%V%G%F8BR#j24ABs`I$#2W%C@9>P*dI{^A87>wY&uE( zUH__Im~QVIu=v~Q>bsk*-@2E8pJ%3e>cPNhuEcxp>A^wy^;AGXTU)ljWw|p28qb+A zsG@?IS3qDWV2{`nkpW$;|M4T?*|M3Mnp&VpMa`1^|H0|zPrU+S5~R9k7)2gO?MRgL zAPkgKxEf@?-&sdjdN1VO`?G;7ywmHRDazY%)&uGb9tfVed)!*Pu8}+NN9}u^8IjyK z+V?rcwC_B4OnMP?Wxk>$E@6wJytwt9>>h=0vm-TL`RcSr0vmj0j{y`o!wQ5`piW>5 zASZBQ?8s+EnTfT5Su;I2u^Q_ExNb?dtUQ*RlSO!zbah zOl@P89r2VRdZ2YSiV-X4nphw_91#myPx$VirF;S6VcJ2~KKKC)oO4)5ya--}I4|SE zPuIu-;3p)A_r#0Dq#$kcJ)?v!LtM>?xh-Xhxjso&Lz7!XQtI~`{?qaJfb@L+ug)YW zFITPPH_LIREpl=Og=aPl=WzEqoH1=|%8f?b<+UwE`@?U%J_mYf7$3GSI*$RmmHF{` zRP>u78ODtJN&x_ihjpLS(inSska3bA=NPL+fH5SEC z+L6}@=VXJ=2V-p(RxjiB}|Gl-1*BybToFQ9?DI>0xy{Aig$HOBx)JP@Ep{6;G4s2~3AT6@Dx zWTmFbG^RVZ9V+z7%cp=;$La$uQpn`WX(kmdXS+J+`~|@};s=prz}<`k+wqgf zU0rB&IfgveIb}_PLRDTTfdnDj{()pcr-J$t3SY7L>LfUR_wwvt>P5Z`thKtG>lnIv zWQws>9R9c9CE>9RpRdx7+DViUe@iJU7)K$(PnpUxft`}#o{_2=h+y9SE>a;zIOG$H z2l?sN8zL;U*5lp)LoX-loR<;abQOL;VQ)ugo?t9r(ToJn|HC{OB>GS6J!D1kG^Vq}n z{c6_iOhuRF?b{HuQm;%RU--SlOT?}1At8b&5Ka@o^ZT0fgtV%0(LYD%=^1K0rs zIrTP`o;o!x+F5}EpzQIDom3!y*7%_h4zV_mpDc!snGXgMqpGdWCtV!aV0%_?A~e=j zj7g2NW!g^bo8nVgV4e-*QS+=dcw-r9H+i?9Y-+A!vXx%5UZ#b4pO~$i@z8Ws_G3g5 z?UNqN6ch;F{0k`h(@@nm6KuCvj==qe7*SGCY3;a(>t4vv1lBj}uuf zgj5P0s`0c$`)_7w#qGM{E{@w$sy0GnI9j_woG&)-(=yGsR_Qqm44lt8OzrQn(v5NH zClHKZjVHFDyQ{Bmc%SA7&OEC}19I=g?|g!37Ukw)oNiZsy6H-*I+Wf6<~Ntc4)fQ~Y6N(?!50UmT&69I${ z``new;M&E3&sNT)`IbEu8y(kMNapts-jSD8bAW+UYcN4*ESu4}^C+d@s}d zU#=H<{rkP_Xx@sg-4JaS{s9kGCZ^A#rmjE%SD#`az89j^^31a@rYY-V`HPDqM#rlz5Mc7^8fd8M~WKTCEkt$eFyB!(mb`n z3RY_YvyX8fPqPn{s*cvtTop06?`BI5+7&YzvP@wLm^Xhu8Y(tEpfS0@Mm`-r_c$p1 z5M?+1Q`})Py*!`jjpPJom7As6Hss;^-Ut0}t3@ZAosw>O2A+8GWya=Q06 zTd&C^LOSSHiXF$s$9eY_U8}ZxMy_|eg72LX+?WI}JRwHD)!}5i8jXDME|F8>ki@DU zoUZ>(1^thLvgbub>8<&tI*<;F7N8z=EAu3Ixcb!!8j&m)INP}eIb+EJ&MKK{OUO62P`pH2;M?_zmS`O@HZ0@*&RQ)hzM=2ypfJn9 z5({h%ZxoZtG}@+uL`A${*|)#@JU+DMSt|+2#ZX11eK?9b%CzK&Xc#T(%G5LiR8{LT zeOM9Tt#C<>)lRAixsKQLbs29f?W({6g(eV}Z*_5eN!7Wczhir0I|H;ouNh#q6a3YdG^*eUdWw-vKUSvboKeVH3n-}7=B@}0=`a%Po!#n**;m-D!k2E_KhhX) zuicnxUpc=%%1aq7??Np@)sOdNmz^9+>W2Q@8$VPgOY}GqKYheO=dp~)Wrv7wUj7kV=Eu%V$+oko z0zU-Iikdrm1gpi#obUKZPjedUTAp>kpVx1FmzCqP15$?^zqd2!UdJ}%E#@|7yFJIC ztNGHGG&$zR$U>A(>H}OD;dl3nWPSn~KhKqQJoz$by-p!XUC2K(ZvwM&Ry*w%MT_}` z3;KT3syJBmH_2R&I999p`gm78(Om3WmLp$cl-dA51Xh9FcjU=Le5!SRDAm08P*oaj z!edkyl!GF|bWT^5l&|^ir>XyC?3^HEe(tvqXP~aX{9drR3Y~;$R+izE4UC(q z>7CS`T1*OTlwby9c4}82OyiR&b_kN(K=H~d*xj>pw8+bgHHD(jO-^wx(Z`J-iVZ%* zQ+t%^988ZXDaod%Ym+|&lReZ?%ymxl!&OBCs#kMapL9}9R=W$}+K-e`u@k78$>&JP zWo>*PlTFkbQgc)G@i}hMf1Bf=SAMKXiK$52zI!j=x&FD3mON3Z&p|7PFxt_OTOuH=KV=Gx6i$P@J1!wOmggXC zq=u#O%P4j|3xz{VaRti6w7#ze>tuMpFWm{LOLZEwJrQCy7tu5e?wTv^3V3^S!s>ZW zN5sxnUn44zy-yF#l^AJ=(YI*<8}kx{{0gb55;G^7ZxdSJv6>y!zTZYpi;t@^v|AwT zUu)4iOiCm-Xby^r(MMump{=rZ%MFlYP&9i zOx2|rBUTaQOjyqwswTuqUT@9U6*0E5Tty$FWGNN>%%yF;e$}t#nyQ6lW!5X~i#Ec2&8rPc zG!32_B*kwc?c*Lxh<~WmZ{M6`e>0+=AbGN}7Jos~bf!td@t_Rg9!&|urifw|K`R+; zI_4o~o-4IM?+;(4uxtwHUg`=c8*al?X%4}ofx1x$p!U|hxrU#lSVj!sD7n43uzt3f zQ?jF}oWzSlvn7N)AD=yF`3+(!GhUiIMw#ok(}1Wmt5YyF$c0a_tul69dlaM>YuZg&BNCS<lZ$>iYIWAf^el;g=ods$Kj;)SZA&^Ri2Z;m%^4g-hYQOuK3>dzX?wE?1mtY zef+|O_`bZdRJlj-aN~;!D_lCt21XES*2@PP9^HAzEd zbhB^9g#wn&%UnDuXawI1hCnpHpq4fYEYmUd&@|1wI{C|%@u(A?t-7MrKvBMNLA?r; zq(p!If@}}5l3j4XBu!b>-cDQq)fv3uhL4(G0Jy&`N-s!jq?hrBnNBzyF(_qu2S^RnQ9_Cn73 zW8Is^3JA}4UW=|Kjj0doJJk%ZN3*0viY%<;vM7Ta9$Uhdj@tpBJH=Pu>7jFn{hj3b zr+G2C!;5Lcyw6%~oATPl8EJM1sVf*Ex)+vK>(18@yG~m}#KGus9c7y+bs@AD{7wsO zWgfE=3Y7l+h-jC#$_?$-Q&MKUXv{KVamm9~zl}d}15_cC*(ZP7_TMT7>YmTgD>8BMgruQEK^mifk zck_xJut?0|t=-q21;xkRUE1}h?K+ckEr>|$Vey{5yg5r+?p+U8TIHncelMfO0u-yj zRyw5DR@PMF^b^VA#+v;xib$Q5+fSgkLx?L4s*HeN$_htrZo9V=tNd6PPFQi*%9i;=Vs`f2B2+Uun}Hlcue`<6z2WpzUl&qcB5UJuCulqJUxuwS&h~sFSk2 z6&pB`0>!ilY+_7`PEOfvz=Q-kXx_V$3&oo9-up7nIU;%vGKR*GLVV#@IDBWvmaJc$ z9bO5sT&SZL6}@3~M&@o5mf})pnEb>UjaPABtf{@)ruM0c{KDXJKIYDo+O9ZmWpH)s zKJJO|qk#@cqxF3sWh*B%KzgCAGwFLMCXrfiq2rlb2riP-)YFFrhb(|~u4BB9j2x4P z6XWi@p<*98YMt%4J|k+=Wc3i}$`sLQRPPNj%1Crd6|1|LKaSt6H+GF@*S%4ww_D*E zI-12%EQsIDG;-bL+V+y-2Uxn=OEi7`c{-J&PEH5uRI2`^9rdeizyl*@w2{4%-2`ka z^ReQoC~3gRuA)WmWSwr{eFLZ5@HbhJxBgd82NCM6-r-{JJseP{zLcgd=Hx_>i@vc~ ze4W14?=%be9nD*oJ+}htW%y!aDtQ;w#@V1v3<`f6{HXB?j6NfQrrkiXL)pWl2+N6- zthGLC4yQiB459m7#RUiPIyotX1L4aTB@FOi2oo?X5$pRGdCXzj{vjnIMCmYUk`g8p zaA`mIGt426WJ0q_y^~?KZF}J(O644I#fqd_XCQ@a_)_WELE`>=;^v#P_$mDqg%x|) zte(y+fEQf==ZE&=(JK^Y@kc+_=9h|hHF(KzIMfx|8-cNQX4~!qBTwEpl|7P6FQjF@ z^jV7K?6S`fx`;v0A}wK+RiFmM+}jX2Df2NySRmPhN64}t^?0MZTS*B-WPEjB9U$8V zTWeoLBvbhlO{sCO$|q6+5O%j7Am;R;o2stvK9J;Jy#GDGf@ni-%Q&dvP@AY0cV28p zsCC`g)ReC)48bEG?QKV2qt(r}@gXGin7>^Ax}ASpOm*&T<^16M{+-~>_2Pp3gk#zP zED;?RkA?1x8oq+6h(+?}z98uK!6{mByG@D42j#<|eB32u%hClc^4!1QF7;(Me&1s! z#C93k*KYYde5_B7cv^0lyx*R427tah{EoQ6M$$FL-OPPJHfem&{&4^*w}2c0{i(!M z$7O$`5W$e??V;9UFIPX&Q(m*A@}Xg@891Cy&`5J7&RUOCzWsh9%_59C?X{%+csBB{ zE2#pCrRWuVa8a!`zG`mD`s68 z?mVEqMiszbHLep9$(*~}w`PY#+IPrL%yE}dz8NChFy8)ty_T7doh>#p$+yj?Om+*n z`miehfyuQkhv@HSVl>#)FN%qw8dQ+&Nl!eE>(^UI9xUH}TC9@ww=76k2J=IPTglSC zI#MsbPf+(Qp6rktSfG@fU`n&7>Cxvv?aniGc9rH$lWu7~%#O1Vn~=v;vokAERITy2 zDR;l;E4<7evBIujO^)ph($(=K?lV3kG+>b}8tzyaPaKZ89#aM^v=3zHe?5BBi$I+; zzJK4E6W0!_i9pGIbV(+V{K0z`BQpIl`L2rm71STcCGNfy$V1+GKWL7@M%L@x%XgSy zfG5>JN_zt%iCBm{U-$7X)sbYLTr!LhYUlYV|HyuQC084FhCJ*EPig@aQ$Bv))%Gp95c?QDuQeG4)NeQTI3zA1}< zi}fy?yDP!bSjEVA6mP1`yBk=~{wmg&dKD}^3_gk%F^vA1&@zfo84$92-jNhL=~BH` zp|eDk1-3UGT>bOuo{~eN8dAPkqWT3e9XIfSanNIW);P!w<1 zHapBlKJrh?)=>y~^sDEI9$p+ld@goJ5CJEGH`*#g2^6MccSRd|ttml=C7G%&%*IXj zJt0Rnz8k?1x;3P&kFWFu(f)VfE~58L|HeiQH!cFEjLp)JU|uJ4YYg@vzrk#Q!1q{G zT=#upD+FN<)>o-*YOlKyaiJnM!0o}|-1Lk7;|ij!(k;JOUv`5DprQ0#wME#Ide_j4B; zBAcTjS46B8q@&6~ax&8!E5~I_-xO?Ak4(m}uAxeB^Um2}IWsgiTj~5%eCXYNR4swwK?pN{5Gja@M)a=e z8?&`)nHy(|+sFFgTeEuafu9oluhTfor{rb87&;J{(B+Jy`Q)mI_CeQTFMFWlsFp(@p`5 zTxee)a<`JykX&1owW9!;PENP{Rj4=LnI}y4Gn5Z&5@XCAV$I6HpXqj@Ahx!iAB}XIu#@9 z47O`pbyJo>wB`hj>h9BN88RIR)^Gu=?2P%!^8@|n)S2sd6{T)MlZY4zR!xNoKgcv( z>gTTHll`MIS_wUp+bhShf&2^umEl~n&=iNh!&H114=LtCJA&|u{fLsu+}-PwWEbo@ zrKCg;vE_sy4;>cYS`Q0@mxrt#?!wj!sAPgSX1YI_$N^x=e9Lp1ws&eDsAsOo27NY9!JJD zg|Cf}ww|&6cb^66JwN*HL)R_ndD$?*M5k}Q^;sud0dY%llU0&bud#3@F#mpC^ z{hkgrZoxRc0WZR@xE8+6wFLWRNgnHgOt3VcjiRTpHaP0UI;go`%$tdDF>ec{hHV30 zIBB%_9%P%LXr{2vYZ#7U&>Z^ALdcH-cDI}rUhm_Esy{Axp7vhK(JlOC^$$6Jy9}Pd zaljyClbjGBI5g|&`#CSW1a@7 zRwa`I#DU|BXa&CCJSs-7+bi2_$G>l(3JiEVt+h3a!I!M~gYUc+8TpLzH_R7X|1kci z?ULl?HyUhI2nUu6DtP?7_qw*u`!mHe?7Pzt(p2#Qz<;h#QuIA*w7{n{sxBCvCJ!92}H9 z`#%GE0y5!$zx<4Gu6_F0nP8)>TyUDq%J-@2sEB9iLc!(Mx?HUAOgi$M0uZb=49dMTefY^o7)gN}i^SKRP$km@ilium3zi8*bTnk^wpCTWLEp zSE!hM74<*G@^7&kt1~_uX_9)RF5p_{l%wFab5GXkIQjV__}wXiYhK==<(CoD&6BBE zSn5q|_4iw4q^kTUUTDazEC?2kor(_VVI1Ip4zmP?=$DGgb_;LHYd+eokh{|_29a(E z)ZqMIguR7VlnvYNEeePTI71^+Lk=AR(%n5Y3|&$K4AON=cZalecMYv{2`VWJAT10b z-LU80>-*ld_S*0J?7x7Rxz6j1E$1>gafD0^TD zxRXojzF`Uvnt|BG>n|>QSufT-iqC_Q54d!`*t+JzR1`1QJjE)%I>p{bzbx}$)K36m z%2ozk=r-IRPhIPwvn!|?C7oBx4WQ-N{7e$m^MB+XSn4h^PaK25^7+v7fODQu`mq8Y z08Q_MN$ok~g{i=pa0T#DlztT`Yvia+_nxMm>~1&ZVvaeV?3epv)N9g=AZI#csp}&( zu`$RyK=f@*obJGr968g$=?*H8BB#)7Mx8Cmub7~THrzexFc4gHqXU=F5#MqDm&7)Y zX_FgKRLK@`?Cu<{exz~?-Cxt^y$5F|l@n2k8u)Q*r57gArDHtxW-^BlYlHqG>iexL zfZC1S@ei(Zi;L}Kg6U?3^`LZ9Hk`J6@l59;=HE=o4bhsWvia$siJ`%eCce$Z{@$_H zwFyUb*|FWDkeT-?kj)MW*1|O5lr2O?63EoBB>Q7oC#M>Y)6x0y4fo#WtGU{LI&_6% zCS6ACoIphfF%ngYJ~5xGz>G>}px;&kuFszue-$TMfeYTC%(jK+Gidy^-jDxT3LIgs zpv%9Li#K+GmmmljSMyEBy>2CP#R=@bNyIv;x7(dxL7kl2&r`o>P3_aO{7siO;>;V) zvvEn~soe#nY`9&6lU0sU&8u^@tSy(1$(moJXkXc_1o^t8#|)=`N^|ft4A3-&x80nb z`b-Aw;Ea6b*`FU}q2cFbtZ}XxcV_wh@RVAG?tJsLz%#ght?KQGXjY&{IQ~{9b6W-a zay@gzm3bSsxjG;k`Tdb2bmt7kw|vStMsh#vl9L>`TFgY0*=j${blVnmD@Zo1R!eo> zjt2SX`l|VyQ?`2@nG*Z5?N0?gG@%tllbf)gRSUkChX*J7E8R;EH}k!VIYRE_5zrdX zMzi}peNR+PTL%ILU#j*@pc1h?Lbng}NZFsG{97)8r<9%OmC`I!@&DQZ{U2-NIO30z z2>zc@*R`D_%wyJ)sZyFQzsAMV#eG9eVT@@S^74b(NjJ{77YCD9XcR5WWowV1d}WXa z5Z0(a_7AJgGz=MfAX4n@mZ&GkO1!N7?p=~%H#dt{KTB>fzdq*2WG8Es0M63K&QADxI|a!mL3`IO4{!ed>z zw_`bD&YYo3PSCyOxSPGv)$R3Yj={!8P)_m}m1u4kQL=Ck#j|AS%%hBGw;kvQ&=>IPpzV%OqGkU3!z6 zR}qh?VuNQSo)uzv8trsm2-)d4vc>t{`&b4Q!T! zgnsV81^uywo`bPz#s%QZqj%JHM+bIwY@|lK1L{E8*tu~BC}d36l5=dxZN7<#79k&u zVvQ`@d8A*>g5O`5rho16ClB`LwT2tdrR42GT6PUj#m73fSnNnpb!7K9jMufG$=pcE zns45>4$>&kXP9ezFZ!Ix7veE!);_y|gU)WB(aA}WYPaD`wD?b+q7zsmm$a%iz}&Zm zttr0x{XfS^_bzwJwYCKGSy!0$+si)nQpzD1hYk~tSKke46J;{#C!rIO9${#iPLpF@ z_gs=QIOO4tC%kEQR>Pd5TW)fx88IZ$2KZ+)oH@kdJp}7vI zQRe(+kwZlP{lnfCnFb=@?sz!fb$Xl!EQ$*LBI8Or{i5y{HksUVZ70-=q(fD z6fTsab}-Tx``MX~rTM4>o0X${=~^xK=um=U8!&{sPnz7sb;rZ)S^c8sJ@5BJF#}sa z?lYIwC^||(j|!Fq2<_<@9fs^oEG@wtw+>ZJ7PD?en7@yUd7ST?`&qFdGyQm(Z7TY} zX<=eZh*{)SQeoj6P!-`&K^-!q;BT@8l496u3st$=R(1<2l8xjTisT3$BC?MEUi7dH z@ZsQjP>JIwBy^k&CoD@9!faq_^Js%+M8&+bl-0_(;h3=byYgzcmBvY3`Ys3aHo&|X z?Pz83+E)w{J27t+J2oKTNlVTh)2z(l+Ai8O(6=>?tkRW3SV-yP6Fax($2T>)}JdnOo(nba;La+;0?`}1eQ-BKRq7mU*UX+C6N7CakC8XQkh$lOQN zzDBeY>A=J@gwHL9tgH%jSBc+%GMu`1G5+{k+wlAvlRVP(mJ6p%i+t+{B)>!(8DTmw z3UOlFy2YFQfoC}i*3$=*d&*VJk_FpOuk&Ur^uoHtrUTC<;JM?nz%~e91=2P$kpz%> zy8AfK7jqyyl5O^&Qp?!X^GdplyWUg77nEG_ z55-l(<&0CRY|{~K2JPa?9ukDWP#$M zV{ZL|pM^Xsm$~ip25h2!jsBAt&am!F<%7XTuGK{^_11_7@ulzrTgZf<#$V0|WU}Lo z^S+~);E=l+t{}Ky78Cc?rCWE`70s%~mJ#mPtJjz_p6H#>Hoa!_@-vI&_y_ws_OC4) zsyKQzz0YaJTa${WtMO%%^Pc&pb9}7NeDsKNkGpcjv2RzO6+U<0&&VBC9cNbUW>kHVAM}b2${y&*563aRt00}7wo|gscuVuaYgzt zNl$L9H+;aKy($vzGae;^K@c2I%dTliL`oLw434xTNx*Vu*p7zwf@8A@k&NK#R!)wl z3l7M>;G=%LS_mS&P|uNLrDyW|t~ZGk5EafP|1z{& z^6xg84@K|((@qLqt8)IWRc@2xUMp1yU<2l(Eu?l7D}ALSgR<)3{?<5wS`6CR!8NbX zOFNcWUTp9-;O4BtHnv)Aja0Bu*@8a-p#Z7M>HD^j$ww~9(;V%ZsX+K8*>udMSlyR7ge|@K zZDJW1Ww}{}CC4eBv`!o~p?zPo#9&MriBlo4sdvPt^BztK&IpGOSD;cTivDUQqnQ=B z0${nb-4Ki(8n?pDb@dH{1h!regr6T~(d}x1YZV9?xq5-6T*KN|6Rx79f#?qIQwv@4 zE^{4kri%^Wh9OsOK7V<5N#;_KnETB&T7!c^woTkS5<5hp&o;~&pmHR zWk&W$-k*>b7tAD=Fp{W~{30XyAx~^{8hPM(Uz2&^)jLjiLBTUi3zmDsdLWsJ$ny}) zAonl>^zZMXv@GwL<`)azr!?B~x!322)!Xx-h{IBlDy`w~sb7feC$*8C(nGei73+3r z`Pj%^1i?sJimtwIR(kozRT_O?m=C0qEX?@C2W7|m14?zY0_eKK3*V-I;25&;>owrg zW$uxqABmca*~mR6JsnI*PBm>BqoPl? z5@u>JN#+>GqK;bJ&vGa?{m#${0M_xNs1bK#&(JTW+}v$)CFt& ze9-_^Dnxmss&8Pc_BO%zRn$_Yk6`qe972~wt0}zQ8J~21IXJoAIXTy|=1nNWQcF?J zm05)+vx8g(aF+RiN&~_1$=Fit7b@tH4gGWIG-xX{KnI>b!ih~$bhUB=U{mwgwRUoe zE+4;}s`~&V9zhw&EWcR)Y+ByXXC(Ws#I7%TBU#kf+h^Nlf?Z`SI$g02*+<*_1EXTt zE~g%!t__{8g-pHFs!v;dae=50xGQ5>_$0#!Mw$k-ebJLfjJDKW>}~8fgacsIGGbQE z8UA=}Tq~>s)Dh^osr-3K#OQrR{m&DkV_`o!o@rOHc3<_1mQaI-Z@t^XeQJk-=fPon zXve`?Yx>SLHxOq2*&y6>t@I9rI?)tUCSea-ieM3J19~+Q)tW~%3>|jxj}QF?MO>77 zF|zlaYU?0gl4c;fK*+pc*KC5dQlaQ0j(a`U-AlFJ1mEicHsmJn$qlN`>kx!qZKDE- z()3|jC8owrwsB&YNcTHx=jI4%-w{~Pwe5vBm*Z+rccajW`XM-j)5HNyc&fd{|2KnI z)eR}^b+~WpuO$wI(FB|&xr?>ijFS!}S@zy8AMK+h0ka2*|5>j08jKbOITjP0ns~9P zBSs^=ul`z)6&;}Sm(WUVA+i|xa4AM0Y^l}RJv3X##TV7%cX>ETOJ28=jU!0mogvv; zBTE6ld0R`t^PbtFB~3-Xo!Sw14NelB$xMzgWy=tv)E=B&KzN$llQMf+sVTrvYg zzt~t7K_P2XiT@?C#y|~zP0nRzd|#>Ub0=1@R1D9KSo0Pih$ZIjU}{vWWipl`ZVg6~ z$AN6{9J({}Nrkr(QrVx;DiX^$b61j>9Z7>-Y58O^Uu00ao_nIs*zHF2MeD)_K4&F- zeY9g44V1%UslY2?&3d*b5aUP5gB&j@wnU7U6f;Xs`i4=4E~yMiXLEBb!#YGjNbEJp z$-yhSW?SNSWH5M=_c(qgXHO2&LfK+HB^At@9%HWCxwo@@z=9Mnxxb9&1i)eqkB{G| zzvvRR?WtXi=RYP}^St}e9{n)|Njku*h5)S!OtL*UFbLxAKR*#Q~K!Ra9mWvID z2Y)SNMj@^+q6w)korkC6RB6lyt*+5x!RQUeORSO^e2ZG)!|LPhho!10DdI84EmxHz z!+h_K4I)3Ac6AL5-Y?;=>_lNKJX?2V;Jn?ua22VnQU{1^Y|bSG#$5eGYEFogpnap94xX+t%|}wqh52Y*UVh)Hv-cCSUh~Cq7jw)5VH@&oOQAcsg*R*ABC%! zb?W7rp(*u0s?cYoY&}mMQ?nQ;(&pN|qL($Q5u=IszS}AuW!EWHlUcZe?WrGlkPfjD z>gZoh&%of1CBi2oIeNKt!3nJl-n0o1{LM1oU2^E9`dwHOIWcGKSrd5brDp!HrcKu_BSuywc8J1IXwGfNjzvtA{1 zPBKOa^qt(I7ci@+w61`K1j{+R0pdjKmKMW3C_Xu^a1DqRDPn>1t*iBg$ok`M*ct$W z%op13ib+i5StPG&Y<;}eKtZyv$G-bxK5L2X6Q1Wk*D~X!Ws*}@7Ay2Njc#KkbJJ#$ z@`t8aP%V`hDc~)urp1t*Z=2yBa3KsFe}I1(T`IPM5t=yy;AsIVhZ-s87CcKA@aD*d z#5g)yr!*N!uj~wBER0zTUkfCvEH=AiUkVJ7GIC~nvBO8cWab0x!=H~$(QHQgyOg>! z3jr7xh7k#VL>aLPv@Yre^_{0Z?0$cKI7Rdd_EqKKmw(lBku8|!nZ4P7Krlm~!GTJ6 zEgH6@zn+!5JiG`qei<2>#k0gIU{`6Zgt)%eeP9VUZtuQldDN&l2Ih!3EB4-{kngiLV0=yKSdNMcHZw8l26g|7n!LTd&=_s*#_P^auzRn zO!FO#u5KEWplx#*$RU_Y@&C20H0e|u91svPZ# z!dPQMn|zROkBnZuRtdO_?58^(9>#&K;KwK*BffI%9e*~!H4Y}yVo>Gi;Kt)%n)G|Nk=Z35ZR(swTI zw7idBd@E>g-_7mgj2u}c!q{lL@*Y=rELKvSKm!|?hhX(=opNQ!U{47-3;?@Ew~KXG z7%1{7DvZZ-gt@&Hm)GgM#(-m<`L=<~jhn$eSP&UfSUu205w?qcy^&5YP7x z6Vv>D{}9CAz*Akti`|T~H9H-I_zi*ZAene=k021#en6^|yC-iQ3_o$vs1><@8ctHp>mj4pm{HeC%E^0AvLre3CY9kw>i7q z+QD^=GJpxL=G*H_84;NGHq&1eCXdL0Jxh>Fp~gR2e_uP|^nvFA^I9(xg-KS`9gTaj zV9tC>lwbeYrv}2oLY8>N9JpDBho>+sSwkSF(*&d)mdWnuJ4vt53`Tzl*r;SVgh-9I?Dm+CG0p+s;EaM;qx*0?ho z%70jr5p*{k)Qm1yFKt};);+er(?HGFhpubdR^XhB=hCWZOPQ)Ng3_hyN6XHwasH}B zzTlf@|D6x)ztobP|08K-f~i z`Kn{4c4LL=l^x+J`|P6h;d?5Uo+Z}Ee8#F6=XbR)>%yJh;rh@D2Dxh@0v-aBmMr1+ype_tJ69RyYBr#8KVAN2p;qnl-q32F`)~dVMv>PYK%4JVhf@uViC9xbm_#q= zbZ0QZqqUC5IJmQ`gpewJhPKG}K&WuWYc3h*f7(av z8Ivd>2(bv2QOlY109nLZ&zbOy3`kEl*(v0&f#UwxL9;^Jn@>J+T9xFU0h)nKGQ|_y zH!pveEqtIKDl)KyMh~~<9rr}`9pE_(*Cys$(S9oBF^v2}bb>sz8efvKIO19~neF5sNCsS^fT4h^Z-H->;~ z|7lKE84jy<1Hv>Oc!zi7nko0fCmthzKFk3@Hj+!+{V-VceAs&0B}S9YhRnh8;c zy+-ZpKjxrt1Z!S`=Qkmcv3iTmd0}n#M)hhMpbx~oyMHC4bD^}lq#y)x(28oJr7`S8 zSef8!1-@?U2CY+v{nc?27ON9vB8GKv4K@(NK%5E%;yQ#(hweaH`Wx`rXZ`};xyCuO zK%P}k$w&f*$z>dI2wk+I4rHQz%2we%S!As?7jm$ZxI0#KJ)*GdOHQb5?Laz34z|eV z;{(@M2$9(xM>Eg+{<0WAP^aYnC{%*TBi6e8!;9pMMB{f9){zwdV z2>D~&yj=%6%Y1h)?1k8B@spK_Ce{?6fIBzKF{w%W_?FVEQTH9+zc>6v4SxjP(ME_# z-A%Nj$V62D=-jsATSdQm794$obA(p(ETC73|}4g|6% z0EEHWD#*<8{7;nhGXmXp<#*A6uNQk&b;9V06DWVkgua;t?uq%;AJ$4<3);B(Ba|w6DvyR1_5t(W zCq>5rZFWl`UN#m3ah9t}-H_OE#3Mt`Vw1*>p*AsN&635ZVdvl6v1_bcUS58*(-$Y1 zvUm01L{GR1m@LT5BKBc(9JCa2V)r-%_j@83lyDyq-x8dd1b>xaU;B_zZdPpiJXJLy zd}EG~(Dq4miRB@gxom#InB#?r++3+lqX(mg;GtK)>!D>-?(q3*R6*)FtyhS?%!-qg zPn%O5auTSIszP7n?b5}bSj^%}7g9B#ihHVFfi=U5>NhsVBvW(b&b@I@#?fDMvUjsT zzB~SOA}$_!kK9wjXnPJf-%^g0#E}6z9MWRz2tq?0_95@P!%$!On8!F?kgPEf+Y7G7 zS1F9J0+V7s#U#*`j8XJj{!{~9ES@>?QGFFUO_44mM1%jT`oel=rJF5ZTIqaO;yu&fdY zrd!c|u9eYc;Gc8SN%GTzv=buPI8%YJ8q0;na7)}WOaLs+HSI(;NjqT?Sw0}B>H zuHYPBO=Qay*V+-$(AM&WIS!UEMY;>56+k~x%Gt1NjGp(sc*9BFeS#=*sc=u)X7Q!m zF94LDslRAHlwlE{uGx>|viuQS=d+cj<+-i%KEq=WVZVl~x$%%7Uf-V%Fv336Rm2T9 z8K67ow5>Tctpves^t?S+buXSZbE5!zWVpT=L5gDl%NOh*@iotfB_0`9^#CD_%{crt zFsYzpZvk8mMbpKr@ ztE5Gk&e1;`wTFqh)th<%VW7y8RMT%+$$cD|4=-s3pM1kyP0%c2HC;E16cuYKU19gP zZ&E3S8)IUFRP`InPBnAXw@Us2CZZONedj({94P%Ncaxu5e}9&znDSB)j6Cc7g-*;c zHM**B=b&Ilp80h+{o++9uYtDz1Uj`L;V&5;yiH;650Tn|$Ae^?bJ-&;>bb%~jcU+m6>?D_NdVXs(ICA7vpE59ZF$za;)wYXNWA0sF zO)0-nUPJupxJj>z-huaik^7-5$BWylq0jFu;KMA>&N=Ze+khDdoRO~$$92_14ZwRP zf-3`CnUGJut36{Qo+wuey1=Rro&gH*1ePNhGe|->dbL8GM zSS`7|Xs_ynA=sv2GB1i2OyRfSX?Lu>bc#CCcWAzWteT$6j8vliGfh{IFMm0T&hMro z2QOerxgSSZ*(>WW0KU0e8!YN3bOC=NwK^%~FD=L)W)B_KZ^3iZo~d4)SFz#Wvz`{r zCcwKpOB%4ZV4PX+dTm8*ThQdWz?G>r?9)_YSRF}$>v)?}WNB?bofj=^Nb-g%Ofh9i zdazgcr5iUz-@d178XKgdTko@SGTRUbi?3(F17-a2qi^I`Ix>URcB_&w``PGEYD@EX z9t0o#byP(vR&0NdX`IeuNK1_s(IsMM)9G!qvwQAwweY+~iyXF(}syhRnRnlZ2Pce%Fx#()5}W6_oP?61WL$Xbc$0FwRi%mKmZ( zbSwIapE8hnWFYk~LaxXs1LD-&ZnvjcnI5PP?X)ua6}AN9KUoQGsO#Wzwd0E6*_U{< zzv=V*VJA1l+dMb(qm{4La`$w%pQQC!u+ph!lAe9P*1P2oDok#qbf0M(TBY-xOjVs^ zajMj-OXhwrH2Wk|Eg4&&$kG=L6&d2J00YM@UbYLG+@$u?n7TT$5eSaND{ZX)Frovt z9{Ze1g;SwtZPk>TMtL@q?zBpsyn^j4OJ_NR%NWnMQf(kEo@#))=dMZbytID!i;0Tt}$EbR}^RARMd~^4)<){k*@}p((i>-g^u^pHRzSmlw|w=6KrEB|3vx7FO@kyVjC?L*2-zrMuq#p{ zcPKKdnuq8*&MKYXp5KsGz4J8EZ8n;n?a{Gmzf!9HxnQrG>7c833rQ;pWR4oES}Yfi z8hJW6x)}Rfm6caCXi+hecaQ~sLVTjf5UED-W&Ss>vjp}{2;lXBR`lrS<Q zsI|Ha4xIu*E5h=h1IF;|NDOeW=X8X1WP9@)_q_@|4%(&nZOq4?im}JTK4^CS3qVnd zqy*9)(7qF%Ud1TylZ8eB%7a_>HbIGVU(^CbU#wkBn!P;OdStIJjcuDM*FpA`NI#K1 zyxB~z<=?ASkCnny@j8r}NRYS$RFF$-rG6DSdw#5R6&O=d9CJTVC&SpFsjOVaZ5>Wqx+uAlF7rF5 z04h|hE6iY}uVW5z^dzQxo(}QUvqOqLX?er1+4z!k@U3>&*SNDwa<{n;Dh6em`8Z9y zGhsI7#h+pi=#*~(8BQ#l^@q;EUBY-g=?RwH8R8$LzJhJ$Xy+Wpx4{JCAprNqpd~}D z=bCIxAz>H!IKmZWlP}p6b=xnTrcD2Q&7#0jYK;jveads!D*d1wz@lFbn%Z24pxDx) zT%__hEgJ<71fKO;axbcgc9iUo2K;8hGWNsiX((^i5l?GPko4x1(`~0I`HXW)z@kwC zn?@KxB9I(a6D2h^uNn~4>!;lywrcoK$@p$A|?T|ntWGy;lg(U$F*ebTk3m}l`!ivrTS zPR?p;w$A)L$>?=t;N!{ZsWaM!uAX^;HjOW!hoOTX3N$Ud(`9@LYFaiYC(o4j1S>>0_PG!%cAErT%cX8)sUnZC+$0dL7!|Mt8n^D>cA_D&fU^% z*Cw>GW-F+uC_|T1038wB$Mc^>(8~W?1ReH)-g%e^#vkY9VNy56ZJo_Ae4l6eN@-Rn z^y>meR|Ec@sUp6DxgfSf0JNGeb}ky9X`|B$XgQlZ_?^A~gfsRxTzejmKFB>)M~rS| z^QY^}`S~J6Li-<$naHQ|4PVtI!{10?3o%#;ou;P8XW!G;V`v%Y=UVb@L;&^)y-#2v}9BeK}nt zbWif@_n5f_Sp$oQ5pe<=_WSiS?uHgmKHG{xSpin10A{aXD(>yJux8+|(jKXTSxbCs9y!lrtDz zE?EzVDi?+$Vm+AqI7pt#45)2#@;?J9)Kayqcy!Z^Zf&Cv#b-g!2WFnmM1Vd=(@C_L z$`-8rjA$qh8l z)e)yYs_V5zabdOVmEx~T&olqsp+Nm0x!PNg!)$<58v#VEVKz&q(c$sE1`Bmbo`r&e zhR7E%O?K8`aj++g`76UhbGR_1Pl1_+zUOtd{lm#n0yF z4wJ1fg5%ekU}=y4QdvY(7XQ7O18)&KWWf6IwpV9X)5H~e@s!=tk z)zIsL?FE9}LT_;;5Qwm~&wFKY5pSLlXC#l6O&t8Hza4KMlw|GxExfscH+}d-sPGBd zN8Wei3!G2spUE_yjD4;aCeBu=ePII~VmiVh02AV<<{bmS{vF@DQFAvpym z85u}Q(1MCDfIi96Hig`&8qPTp2*55Y8JxFRr5n^JSzHj#xGs$uhGUraap;S|w-?_k z>R5}?)7n^dZy_FO@E`4p&->H!l=7S2*YP{N1zOOoc^++%;X09X(z&!Vk2mf%rP`Ir zzO_V7S-$}>x2%+WaRbjGMk*q^Vq9kfzJ;Nf58kW7*O*0T?cNNgDb&%%7ehv&KZV99nWu_kw4(uz7nCo z9MCG64_(0dP@&)LFAH~Y9EPU{bf$e33pGH}BpT}Ob;=9A6CTZ_rv{I&{hA+XJ4ecH zrlbTadZ+>NppwvY?3**ZW1t)~{-3{ZVypT;vWU)>sfo&v#Ar5bh2|Rd_Y0+ebD@5@w00&=j%yjfgOTwEUlXs4JX*#_Z zopYIfL)=MOD*Qz{pf?8DeOmV;pQoZVt6*MUxCThl(4%r&~s@hLfWemZt|Xrcbu zUb_>YoTuU|KH6RDI^Cprt7d>|FM=AmIJ5x>#iK;qCzJ${N@L$@aI}CyFz5Ey=ghAY z6nT=~L-N1ni7lbMg*bD71AudMjezwCpguN_hIPwCh7_bAj^-B8qu#o(Zfq2>3RLR2 zJZzEH!L(!1^)NcWlH4VQS;O;?LDy7CA?T1ZRO0_JoMZnxoM%;B1&TmYJ~)<3X~-)F zSQDi-XuCxe;}|M-Ri?42aJ&fdB9zz2Kf0wlcvbB2OP}743o+JW7+N=sCW9t1k@!mMyELo*KB$^FP10w@wxu_m%dD8WB;^mfvq+Tsfc^h*T5t)^Wa&jfjMhWFM2ot3ueV}t_+f3H04 z_i=1bkZorQ8{au1`&GrYxinBgaq-(qC_TKv^pNy&pQY&lK{iolW&;3%UKNj2X3BmT z`NNg092(t0N7S>J)@{Z>wDP+UGoQ_)3nk3+3xBbrlQ#XDm7S9nGHPc8$4)^3?&$5wC01}gYy?P zdz;NJe?QfdZ6DzSvpoMD?{awv$>w`b=EG8+Ox}yb_^oA#?L=`atn;SYqvTq@J4-N0 zN|-feh#j4t6fx^Qz>S(;8*u-cq5Y_9hZWS^N;umIr!ie?-Cx(40d@DZEugNVEYYZz+;>r@>FK`-c}8JfOer1?EHdM+_XZBnwMOj^ z$i2B`6$pf+V*;aye}0lZ+5J>JC`DaEnWTViTQSiIWhdyDv!z+gxN#PP5k5{mj>B_;`o(h#&+fpW0dcJk! zXbR2a-Inn%v5ekJxTqhh6t{m!)v1|ly(u^^47|QCyxnujij<1)U`B34~yx=)d0#6ph(Nb9Dx9u@q4>u_1vp+v3my4Lczm?ybFSCt)oVz)N z81$hZ4q>uXv%u>T5%h5SLq3-LQ2-w}MB!_`4UG3c?4lOdaZW6H7X8@G9@3x?yi=)77tJExyCc?Grfq*tG=zKq_F zBRh3wIK(*een;s+s7y9eQZul$SRYDm1IS8asncm00&48l&b2__2m>k~m^x~XHls4M zgG48I<54~JvD`5|j!NK!0(U*`)PaaT41pbTXj@o*La!$9gTKp=ndt zZMTt7p?)OI+RT+g_$~fcK5~gsL=Hk7ou7i%<6~>rO36W3T}TH8D!0&R3BNE;Ou+yp zfL>bc_>qT#DuZxQAP$WLL7eJ_2fgZH8z-?R8FI&Py=Bej?T7ybWK6z|GsZ@UwDEV&LFZ-ok zfY6Kx-+qC>IN|xciDytgirO!zcqD9d+ynu+XV*&TEbpq!Pss1B#HgaOSx;cDhi1z9 zP$9cI*{g}RG*XZ6J*-REh?l*}?zD_qxhC#k>lpXYJoLbKJAy9U_Qs;`)byl?(;#Sg zO8}m^hH7r2#WPyTWfz@l@iceGOoC`~5x)y|PBg!3Z?-CG+Uv`p9tzpsq+Ni9c&q)Y zf;3;w#&o2tcjgMY56#sWKNsqGc5Xtq85oYvZJZeN9{={uqw?J&^zU$oBc9iPyGr@5 zMN1DCx+}W~pnIfuoTpT^*-yo|p{)ywvD%L(6}tNElKL=#Ubp0Z386Mo$c(P!(-Vvxo&lssq>pXtdcku zjk@tLR65tC*bx=IljfXaz)dRF1Kl#cfvV=9m2bIFV#Lp~te+u_4gtRj0EQ^4bx2d-M)h zprUB8pmYywmkA3>WND_fo0{Q@#ge8cF{VnMb_h)zW895qWV{$ukn_ajouox>1~xb( z2C71juMlo53hSnpI6rQFl6y??(En&Z(Wkfmuxg{6!{GWu&a8}Wm(A16;krn97j8E) zM>pZasm$Llxjg7!mt$;6l4xcU0kFc8);s>QNy?k)7*?G-U4DJm{nExy3VNZi%Utps zIS4{(A__jFjL%t*sD+eQrN5u6c|qk7A~CjxV^Cj}DjjztQc(FCt6|RWiQC9x@nY+F z$;C<2wI@zSMu2=#YgQs`n#oRBKbs~{&pNv;)d+mbkUaVggIxYL$46tIZvNG4BK5q~ zHQ4f{$uP7+<#A)!FX?sVx9q7h_+FbvRA6Ua$8glD@!ob=z@YaB?Tov3CgHMyu#VfY zB9t}!x>;de={D>Z25r-OXz|U{v$FMYxziX8ks;T3Yrb`9 zvsW#O{2yt$|NEwXRHKjARUoS_?pAnn|T4Ub>l&BpVOYfi1cHR01}!6h`CsYg5!kG3!I{-8$t zDN8`DReOL^8*h-R6Z8gG^O3QMtSQp2bipsWtTaK9dC&{BAU-%qoU*QRcoR=kRsYOy zyc!_*IsVUa$l0(gS*5F@|K;*Im3AbRzvF=(5igRFlHj0@mHfqqk9z# z+-;TGvZ;o>;aL~JHSbvIA?*bn8(!AOOq|8-ZxGtA$y5Y=B|?XSTejMJP1*pLYdhD` z$Xl5ThFt$@`R5Ll2^}0i)lk|4C@ow~Touw(Z_MS(ocuNFNTz zQK{c@CbQRe|s0NuOV#mq6W9Q-9B45$t68Ax4Z6~zMO|~Zr z2lcZg22J%Sl`8(Ur{jbO4t`perEP+?y?fCNdJ`q>Xwxo<60VW|2V&6YV-VUjKO`bp zV2AFozc7M+FMq4~W#AB_N4oT*AK^?dp1vMyBh$y?IY+DG8m&7=0Fi0P4^$mJC6EI+ zU1K38k%qR8np#xBNFgi>2C_^;PylU_u%z_GpX5EL5^Jk5wp$GpubPI4HmZR|Z<(}y zp{I$MkT=?p*^)7XpH474SGZJIusD~NdZ|^}O}VWD=ab8`ggR~OCFN}!kpN-UiE(M< zn{~CsPbLV=gItI1jxt2MJqC~*vLQ*U-h~F6KqaH!jx(5lhDr&@MwNExM2w+|jwi;+I1o_y1@odMNci%g=V;@#d9%X1$|2)3J#Q?s($q_ZNP|z?Z zR&X<9_aN(%V|8e?1(0)SwJO@DhOzDsOLOZww+$!>!L|3VJTIVO?lI-$HiM8eDc%xk zaa-PtH{kW0KDJ*!>6Rd0SZWV}@e(te9g{S13i(fQYWU3QL?`pAKJe@_1SwmoeaZMo zu907EH$CL3=7Kj!&xY$cLDsAGDxG{>S8c<1T&_%VD?>!HQ+UjDDd01j!J1Xe7F163 zGxC~wWTO`~J?PgG-SD)Dq#0h&>&TS*ng=09z_r8@D0ve-*wsc&`exjk+&5C09IsbjXh;}5zMusZX&2{ftZY}whK*4P{5jqt;l|1uRO zG1vA~qXYctr4(IZ`mOz@$oIZ^IO9XW?UmIend>#r6pY;XfBMt{5omGiL4=5xL}Z>w z(Wkc9ds^SS{=MSQ9z(wj+HA@_d&9DskPKED0pi?LxtFZkMHH=#$oo#TAIry|}};(s1WZs^ff z=*Ef!j*a=6>ggn6mm?=?W%TM#TvEq}lGvnUfjaBXD2sWY@?UjBGi|~m_f#w8&T%t( z_a(JNYkn0rj;M&flhR*D={^g&Xgz;DU)+?`T zt*N@}r}Fs37~#E=2A_(CM&k^zc*Uwu6Mp4)%0NoX@6MXf9>4LKary^zuVg(~y{&u{ zG!s6j?wJc75Z&^f{M$8sDnDi_Hr>dZ!p?y|tI_S~h@@CHpfENxJX4+JGj(V@2MidLY_R5v94%-)<0-MDn3}=%6T_VYUWhcz}bh6Zoc9>FG(#( zHEPvTj{L0j*K4WL?FsrsVZdIcaAp*2Xw!ji_V`+q?BwCK#vIA-x<_NyAIkisS<)6( z1M!CQ>7L0S2-AONXS)Ok6n;dCeYGDZ@kpYQsyt2kR>2jYQ_TKsY&!pC9Up^Uh0@X| z=fzhG-_--M%=hf~BXd5;pPv_&=eR6^OJZR~_$u^H>H)_CkiHGV%g^t~`=?^H$|)z@ zBKN<{6?;PTum1@@_u0^oJX*2Xj#t7iG=`d&rm%JMoqDLWxAP;FBxm(=w{~_+UcV0e z&ueBhN7gx8%KJAlgVcN&2F@XB3tf?H&21g_MdTZ~{bQi3Yt^k5Dl9x(#(y%n)oaf> zTiUk@Tdh5)fo@&xzyAd;-=gNmWfa#ZLfoJy+`oytabJbZUFSbK9W*AT{jDD!{^Y#C zv+&*Y92HizT>TBWKSSfw<4rS5I9ZPO#&m>2Jaj*&2{;=~p(pRRzRF2_#lX|FLapGd zfHHMgyJZm3W!Pm4KJ#_S+(_c!o27rwy{$R2xioY6(RFUvd}gfmtlUkm#gl8si2s9c z#b4#~+W=?A;M;EvI(6zw&^EDWL^D}!t@~#WlMi=Syyit!9*l6VAOmnPo%SzS;>IQN zKXnKBr*h!vm6FO*r@4)z{eIsCpY~(K?^P#^FTr1%d`oRky{8YWq2{oFhkI^llCJ%j z!F(ReR->(2I)jA1Yb1x0NrP%*DYGMg*bo0OK8-Y$i8fmXIR@$R z*yOLCrJ8AA{~g<*Tqv&lR$-D#aWEr0L*}fj5OnuCFIc2*+_gh^=(t6GXt7=qh)q*W zA^xt67?PFuU4GQ)ulGs}ta`9m=4%4V&GtpymAj!KQx*6y`OE}b*xUd5LZeM|D8NUn z_Ly4FAYrgsYUcXQN)@9)#hn+gyIHDJ;}O!#TYb~;g~cfBe5=DmY@IdL_9b%TvP=Wk zI9SHW3K?K3D4(!^MIox8JJm>`$6bW@W=X3ljH6KQ))O}8C-3Y$uyyUN|2#e*_r4){ z!pzni{x0eANFw$j^-XYD&loi4xd*|{ob9x3&tt%SQ0ZSwdd?7i=(K?7j+Q^ehf_he zyVA#7KBN?MWa4^^$pHGpSzgNf==R_-t@-)-KJ-=mX=_jjc!VjK3=Cs>xO5A=IJ&WE zxU;67b!lq6ccryH=tbJVqNc41zHkiu&f#a zSjP0}skhVMHJwnm*lg|Z z%v~Tin|$#ya({Al+U?IOhHWFm5727GGZPKil*x&U!w=@}BrSO*dNs`tMjl^{pC%l| zK>8VRj2W)s$X_imaYIN5dx;*@_fvHln84d@ul$DEzuY6k6D!4x3;!gv{1%v`uF0Fldq=i zPdEA+KmB{A(%s-ZH;PS2ZHW>od-cuW$U0x*asKzHL+wAXDm7{;p(=@x*F9WIpjBj) zGetua`yFfcSemhAb%&m%jY`o%ult^6AId;eC-?bFAmYg$(NR!YHrX-qUg|rKWcX%m zs3oP2KE<$ctYL?N_mnU6SRM&Io>(b>nl$ZyFt$xsRqN9gw4@~Gdijo2`v)eTY9jOA zCdS42aPYRfXG~!ToDLo^H`%vpU)^HoXFng&>T54_1Ao_PH0vG*v2?bseSzD*HbA z1YC~YpCx=uXbsd2wpqE4g&SRtnaQ?*50*0A!fmE&tsZz@K;=5eM2~Nd2BG|`96?~7T- zr!#g%@Gp;QOpm>D_rHe+h4LNY^4{s^>F_0%8Nq8R$I1dnJ;KYK?4JQmsB zVAbhwd|%R3N@y{K)E~G%qfH5XbJzYuuWnLC%OBrPm;F~eZD*0oq--sroR`cYb~7)| zKD4*B(a+Iaf=~e=pX=Cc|B(~FxcjcPSVqsB9l2N|%Ly?;w5s3pz}Rm6nRIqOMYLWN zeY+0u3}ur& zgkfKJ-nPb^3#6;5aM@WGc--g~cuOkam0|?C4RRwp-f1o=+#)<58z%PvUY&yLi52kju5ca3%t)245ncGj; zdMk|~4|jfL*2w$wv7QJ#^6r+M96Cb57B^VuNjCxio88$ev$nPLV=ZI@aJgB~WxnPG zA%m0%A)OeHX~^x?`tKn=0+az-M`mSUWJ*SMg=%yP7ILfFGQD~zTBl$aHjJK*B}DLv z@-I)KG4p@@Ktht>Q`gbv;YFSLuW+`f_uEljyrQjEtrz}RchFh%+tCi(A0S0}@w%5X zEJm3|)J5h$0tcZbB|0}5+9sKuQq%z@0yf}GYL91@-ytex9mIcZf^#M}q1(k8qqA-c zZ9qEYrOcC&f7+vhf0{spfN3y$SDdz62b!w}=yyNzEGv<7?EZez{@=nLZ11jipJr(!?ze@Q}N4g?4$giM~LuV&n zmvNo_RES%d$%E`I6FtyldJ_=>|3;N?oYNTw=W*@5R$bWhv4otir6l*aau z4G|0W$u~S#zh$(neDHg1V1(}F{Ben+q|Ir-Jg2hMO_g1Lx$Rop4>>YezItU7v8U-E*x$xWK#>TEF@8F1n7>>6|)7DLW(3sk?9 z%g?g;lnAy^{c)@XX5xnwtU!*{pdCTaKhwNCLs`!7Q4W1q{e18JX->tu7fMkP$=rth z!bg2}e($es&ds%~(xC^de(*TIcvM%PUyge}&8f(nU&U*{(JV=;ne2TYH@BYM(y5mdVK3b6BK!_xYMj-mEpH ziX=b< zr>z>$YqD;d_+Gi!-4)gH)ef_jB;(>4!h<8Uzm;L0oqcRQE?422y5^cYQOU>r(?UJ9 zb2cIAOs8koX=F@J$y+0;;h7_${y{8(jOp{keb*7h!-j`OEJ?k}_=NTunZOM6w!ReaCFhY&R^p0{> z)#0T?`<5%VUuN}jL`6|4NAtf)8Z>@8Iu!%&@>G5oH#YGkO!Hs0K}=e&cuNja@6ReS zf}P~{&M_(5wjfxR&=_ z$WfJKE3*CSqa_lw0J>;tH!h-`*r(I3%1)lanyS5bHUnoQ?v?M&f*aJbJc;%Xf>#j% zAHES0BXJz*qZ@@sB_A7)a`xVBEy4EWEQYuMvChst1z;WmHL95iI{(o1jiGPW@BaA! zJzlU*-DR5ea6p6@xUVv|?lJ~dw+b8jo5sSNuX ztN_f1(>WT7ZH)l&ZknY}2gcY*$f%ARMu<719a#T)6|_jg)W= z1j%mZ%aaZ{uc7_JIMG1lr?1KGkO`&S{<}*1d&J41;j4e=)ry`h9XF$mHim%nqX!iPoJw$or zRyR$^R#m^K<>@2pvz}r|2yKsgD}N>`#`1Fixs`FNg?HBY@p?2<{LX(aF)S)mZK%Bo zS0D;6{?W6vXTv2HrdDF00x&(zydr#VE`ow)3~D=Z}E5#}pF zfyNVqS8Sv|FF{cNnkqGkgqwJOv3KqU>~HD|)||J6OYDPQZFn7GO6)C`x_e2n?UZ|; z)ZDu*fbEf^;d$uAHCDY|+5xvS#oRrf3HlM=s(1+Jp3K~LV_Wlp2mMw|F(rsA#&|2} zN3zA*=Ye-6MvHNiR>)B$!35gi`lGF^% zBK%p38SzJ+V&oC?b|h#C@w34=xX6B4E}Y7P5EsW4%p6`vlcDW6n+I<{=O!;OUuwaV zv(ysj-+8S^E=gKXu)B^81xbWaZlbW0j$Ld3t#+j7VVPKf%fG!2qQKQ|fJn@GY9^~R+nB-S2M~Keq}ynsjBe?HBF%8)u1A zw*0)abype#fJ8P(5Dz?_PAxr`d>ncI{6N-j!KVmF?V;R<&fi@!*l>XK-6&XV~6r zEq~CkSkLNx+zaH2n@CzciwqvsIohGkw#+#g6YD?FoGsK_6j={^)$|q>)F;UZUMiw@ zI5)RSfAMOiZlq%g<=mh|<^ODh;Bn$-HLvktTYWm&jlsyP=2R1uoqgZJv!$GInJ#aI zTuX*brem%S_0?%TeL{l#qr`Bwbd#Ld7LoWesZ8eUtKc=RUuxByPyb(7xD!ig%_j(X z?^1myQa_qKIH*d0uw{i3k5A~p?a{@rF;R3LQGD}D`s2un+cQ0Se(%mAmcw*UB>m}x z)*VZ-Rp%`NtXt$SzO$45B**_xD-}X}CJrYeP@@KywmB}$y2K!V7`$7#&8 z^K+a!H#3;vItG~600oz{5xo44lf}h5y5kueA`iHe%JC|7sdp|Y$UdeYcdf6e18pf43 z6?E$ttr&SS4YisUXkkEYv|CiX4LhmMyBDET?|FE3CmA`gV}*FKHC+!%SlXZM){?|l7Qt7F60s%u(=AX4u5*CRR(gzj zBFB)jolS3!5f|0K>v~9P9s5)ulVRj7lYTTS!XR2KHhPMm6IW@sEyUaDy@A*ELE8N5 zCp-_#Nc1M9Xh#xaP+r;k-s1X6eA(N>v_puw-t9o3_o^hLSA}$Amh%z~xV!i&)&^*I zqo%YpqmSl9BmQtF_AS_ELFz7>ziqe4Esej3f8sV?u%cR9FR-ykQvP#dR=3@svh#_= z&nbIpHIP>CPwJCCe=-y29vgY(3uPNc<3wQu!x6^u$wU@&Z+BgxKfN7Bmz|W&md4{# zV$ZF&(-mGd;w^!dICKV_gaMW;E{tL1M++U9{&>tXz&CXgzRL;fxU3HDIe%RC3f|UQ zMAQYRB{1yTKY!gVJy0t4t#P}P7seQKKC0b%&w++9uru!4|hCQ^V=PJt%KK<+bB^(?Y%D)Jw1PrgTSV z8u}vgSbsKKle^&mB!{d$0=MD*u;A-RRP(v`WBhrOX;o0?PjQzoJFzsv-KgwYs;RVD zoJvgpTa`iSr@F)7{~3sR{bxO}>vL-yQf9-(Amzfv#ns*FeE>Rid!#Fla||B5oz~Ne zLxxjOQ1n8l9@q~;YyTQK1yl|(OU`=9Y)M0OAK8%^kS5z-K^-t>!KYS3d8D9%Ebq?em*T!ncsihd6W6H1DkF7U1eXv*P63fcUbjJsu>UbN8wMnUW zR!J;_wBCCamUH<%g_o5$IPigbY7cVM$j3NiZ+CYgPgdS(-?%Dms{7Zc$uxS57KMqG zGe#{9=pcH>){C~g0tO%)_x4$B7%zvsSXBiPhWl}XWD~Op=igGMRjj*_do^2*<1Z}! zMn46p(*%P+Qv#T*vxy-iFB2EPga;EHJ$PsU9W)uF^QouOxtKW$fX~~B85VhBRa#iH zCfA)x23rWqdghXlN7z{>=(2}RnT>9y=5&i_1W3!aiRs|wl5X5?T_}TFR`H{_44kUm zxm&f{s>)QKjuy@YoM~`(!0yt1UyCz;bu`D(;tEsg~F4gmyp)#j|(u?-JgxMC?-<{QPY6`j;8z8bZJWr7LKE8P4O*A>K z?A0WHpcn=OKs8!w6+3W_indXkvmP_x@c+P5^0li#)TU4{KF~^q6|R8;%`;>t|Dd(C zG05+14W{na*53D9Cb2AuiK*&e=h@Z(FaDb3w;)T}H-#nIwATjjQM3=6V0Kv(cfZfq z)vYnmgt{6LhYsXarok9WJ>4uXIWMBO4l$E>kcDhPL+$)~xHW&SuH3gmn8kcR>KbUR zO*SDxLXcbZG!C{n=P-fdD6#796y|c!)j}-YcKW_DQpc~AOrj;UVbELo-ORJla#bg) zhX=ufRl`FqfYuwAK-h51Fu>9eT4f<>CIwK;H{kr&}fMEgwuI0VB6cKOC4{^ z8`FVm5^R#D)rKb65J;m}VL_$NM_4TAiwPKptTTrLj-hEVke z{!`lB?>OyjjXnA}GaVn-7m;b!&*)zDfaAkROjTFC;!hKj$^maDlbSLXseDDYc#gg${8yRIC&bcSyI1E7o zt?sneAuR6jdEpi+*>R)xL<;TIzGLN_(IG z%U1XQ?8S!B+3mv!o3pdBAE$~Z-G2}>NXto9_VgtxJI__cy?7N|W^5lj_n-Es88=kv zE)gn&E<}Z869utwOg<@O-isfxtt2C z$mRA1XvB;)59j2K_9?9uz#<@=)b^WmPtll@(%5%}nW@aT_PJIovuC$uSPgle^7=MP znGu|+6l7IDw3CxcJ@$DOAjp0^@Um7plP$gx<}+?>_DVo+MV1~0h^2Q+_E#POvG0qK zI$^Hv9xxvP*76i3&f`q#^JtTK&tm`=Q!*}pZc4yi-r0Dmd@dr^&gxlA$3H3xv@UI- z+|`?#sr?hVc>U{eI;k)0$S@5Qn11naSdf(ho&!Vz~0Tf_#rF`rCc1pXGAo{p0T+$M^MCP09FSaaW>YnUh?gc%! zMk=&6=*5x*<=ySR+J7+LnzBCJ6M_-~q|I;m-o;a*UE(IE7+> zL%BS&^4Xa;J7B{@(G2I4ONRmeu+NR5gY|)3zF`I3a4rez^CPMHq;< zXsqF@>dP1V9+p!$kX#T(bMRFN(1A!_GFbODHDSfBT6P2(dgk*B*pQok|G4*2jT(Eu zzVCj~4;MUujr4x1Q`TP3?lt#3OrTVB#gK=A=-{W^ShbMGUw!X`L`a4enX}zaY7ou3NcZaO!#Z8 z6u|95DpxmZ2am-ODOLN{0p=EljQ~_a(U()Km(05|PvL0w7hzWmn7IMhePuOdY18@a z%yItgH&mDi>^Ht|S-od?mC5?$uC0|ZWm!TKG48gmyq1aX{ZDUWO4izv8rCZ6aOvxhq&*Kwa9kIAZ}~RO z(R2NeS_V%Rx--V-6Jt3Ru!)uMRF|%Wc6d zH0H=lvn2Zo1-lqHSZR^JJ*ub{2Ee_CUUFLQ(h*$`6 zbMoEEgExQiwnp;|Ua%LF?41vvwQFLAFP}eUUl#A?n(^3coMg54X^>U_7+} z)gIAFe$$F^c_Tg_AbxUd$0*YhFIccRBG6Q8Mjd0vU}b29l6V>4pgL@tKqxXi`YSp6 z+i@{eUzPxhIXNCe69*S;g*Rd@-w(q_#9@SWb|Wz_x<~lKM8I?YMM#6emboQVLrvz% zfyI+l7Wnb@lwR7$4l2}?VLd{UbFB!^J%s@= zo_D1epZ4HBcvE_7BA)$eK&IqL%ccADe2m)&uqNj`Hq1i;?)xr#+TIPqv*eOI*HQrh}cYKpmk37TDqDH;7VU z;v)c}p<$qD+JL+gq$`<^;RT=9v&GRov{ErJX`^$(0UiqM&p}sp>xXjpi9YCFUiUel z&$iL&EA{yK<412L6`CR=q)lUARx@>-Rd7bGQ_S#{&}7u@7^$6!^lp{PX55GqOJ9vF*MyMqtFdh)(YpoLUELb6ak}c7Zz$+vr1@5 z_BlGFqkVislrs_bp{Y2T^e84VHQaFumm(&TW5s1ID0h*reh8FRiAUwfIQryV%}JaI zdIps#e;K97Js??`U2LjLUwkYD__B`T1!#1u4V#K|M$Rb$)s8G-&=f$%#QgQkervbI zEc#i+(1Hd@Hwl%%rF#je-??b6i=@qslEQu_AaOKyq1=r-KDYZfk?*dNNyOj@u)jvZ zSxBLhDVXtkvWK%e#vaeWkly26JeE}a0M&d*WdZb8d!VLTDm`IKyjuJh7QnvUx z4Hcx*m_aF86A*HNGni(N<&+&Kv2K{hW%jmJ4-q?`Iz$>bzyyOKYN3gSm)O98;j)Rlh>T&e1jniO-pD`7A_W7-tF|Pt4>VaoV zpV}A*J|9K`*mlD5c-bwC>EC9}@+?lAl#Iz5lF6}OSO{B|D_r7t&685ZF4}CxES7*w ziBB9?J9T!kDo6wHE+f0kCIWIV^dxV2EJqT*>gn9YS`oKG7T!)9Cp@;u9B+9`jr-BZ z)CxPg>9L_E%?C`h;{Dn9u3w(bY=nY^2W(yA!-fDGKv|d9E{+29=uTHr^-*HYj>F)& zQ~84Ui6kR_8o{U1e0eGDW80tKy5IGC^C|MPp>`)eC8_c z*klQ9`_eFUlB>x$^VJjW?~su%RB3At{c^3LDJfz~#|X~?`VuCN=1S){$NDp-TSB1{ zS@KVFb`r+|kO1@V$2yq3Eg>;%@Jxy9~DhDSJ9++8p=9kC1)e|lzCsg5zXIfBb`7{0;-b* zU6(`JogUV!>@rL6QpH-J7Zx1E>gkISdi{g@TQ}BWavAHacgT-<;~Ap@O|Q`r$++6lW@cVzcH>OP+`a5o8|;CvY`Lm= zBSrwlV+**<3sB@LvGAPxW_b@i9c|e&hHN8?a{{#y9@>Jw1OK&^NZx|8bDB+x`crC#650v!j4|7Q2Cgfz?czLB!S_ zSJYP?PEI`3ep|U?Tv%+nIEP4>Gs)9U{Mw&R(E1Iv1Za&+GcBliY=Wnd>96ePh&NKh zULwDgaMI!%6tk?P>{&nDKZTPRpi6~DYD;O^$bI^>z!9=!amn9~|NnS=AtTHWi%Q(-`vnBZZu%H6P{b%&16%|&PK-wr!MrK^nt38Z-? zt|3O4YjfGF`mPC+&%$%0?iQYM=eRMoI72;K;*^oj2%ga+6AVH@!t@xrym@D>l<4;n zAzAq2)C#zQ**Rr@_5TK=H5-Q&oOT9gMn7G_GNLbKW}uijh0L4bzGdB&5^>+cgL zuncLB$0aM_;w$>pR80g=9v3 z%07wQx=Jhx)Q)X1QT>?bF9A3)iu8O8&U&oNDJ3ij#WGxeEsgffOiIi)LHI5q%+(_K zS<1wt+QVL&o8LU#{QAcn2|(@;mfjT~9b!TG5yQ1#g9q0>51mY=L{TJS!#U|`H{8~W z|I%?KvL>ixZ@C9CwII6w2h2}k7RHc@W};F6pjE$l{W^3Y`T`-$pB%lV=G60{+tK_@ z^91-lmC}lP?T}R=S~W{`Q2f=lKH!^znr(rqQua!eqy9p{6jG4Fb#bzzYLwCBi&Np3 z;9P1#?*W~ROuiZD;*|r_v!xKU$iUPNq@ArF_JF_EqWhS9auk_@-i&D(*$%*R2ZNp} zca0wJ;-(l3BnvBSYy?;CAiEk>sH0(MBuEUNF*_NJw-Zz0r-?8ol>>5aLks@Z%(s89 zv0{=XBZPwvs*@-9#9{0boL{VNoQ%RNt_Kc**UJT4c^S~LcEVf$t}M}73#Gy((~}Q# zkg`Op+_b+8d|OCy>C#Fw0O7Q~$-^8mNTZ}qiw^2uP|m9Zd=ulrY3}Gt10lM6LZ@q& z!9CCWUMsJY9hEj6es`m791tvO_2jdnu<*;eRi;BA=eF0EzebWZ;{`sRx%eA}cJDC5 z{RIHqDB7-@I!UW4Qcasc_C8gJ`7KdbgAVm*R@-CDkDHHJ62q0q7Zo_5cD6NJQwu>X zRyLsTh$#x+C!TU+(ng#CdGKf6?@&heRP2ELN_e=IqX#rHm|+t)UZPa=#AWa> z!j35oWJ;*uIREoB)eSPA_Vb>7B^9zf7F5$co|z!zx@vC*y(-%Pab9{w9<(xzSS&vRp4S`OCy{Oz(t2iu@sX z7YKX5;pt^LUsI78Q0`37!*MFl?c-K~(JxX%-Cq3)So?~%4 zi7%9>QzxF80K2u$O-{{GR)7m3b%?g!O-x z@4aGo#)W@zT+v08-d9;bb4abT6;UwWKOd^()%dD7R?CE7X&HXBdH>Ky_)+09=hqC=gV%<_*4+uxkbc?+zy7k_pzUE{3+~j)T43pHJfjo_7 zm0-vMo5VYk_1;y1{So>MzA4T{Hq8n4&lDMcFJQ`qR0`?9=C{&HvHT zzNOAM4WN+wto#R3mJEzQ%Zks~ejmDID`N~MQmEu7cD_EGuzFw}KQhP}1#g|6fo6cL zs4PI$g6wRVNSUs`O=kpNdfnU}iaWZGfyF$sQf;8WN*vGO87 z|25S{5EQfFk6LIQ&Z9nMz_V6&LXby*YHNJX{C*0^jj6?!h z`OqkrueC;c){8j8InIGuJHlws?a??~kW3pclE!N9P#V$Q74s{-1IJ(N5 zIDJf3K>~RA^C*ILuW=M{l00@SV(L=H9SR(qVrbF65FglU8PmJ9mTr8=AMJecYAIDQ( zhYp5F6dR1y(f zLX7gn+iVRdFKVNcJ~LYVEWAQmayw9aSKMj-N-XZ&Nz#^J*&L^tru*y|%tvH{<}?~O zhk5Ohsn){9jrBcT)0y_U8da77g&696eU3mDbOR;UE(p7Y@m&JEbWVR@7)_zcae*QV z%+e0RLkCc-`;hA(tUaZ3yMxpRkUNqHDf>|Hu)m)UYrMZETCyqWudacyM{H-J&5P7; z)mxxf;R=^I&0M--D}Y~|cXc#knt!gW4ShAok*tf3VAf!zgtSrKKIwH{PQ|&x<|&E20I{7-!uDPXDqtS zPEjg;BD|lwPVQ<;eYY{0U94dwnn$@AutZ2 zaE^H}RBQ;jc9z5WlpsXGazq+yVo&n?YvwJnDZW7V={x_MXIPuIp}8;MnS#C~eJ7Qf z0EqG{C;`BZ!;A#E;B6-fzsCz*8nkU#uL3hCv97(HSu>o!;O#0199yvONiQEUa;tAa zt{z#o>z~MKt3R+k)14F@N3743s92~-wk9eD=ex#M@w4$joKih_bJg$-QG|G^3RSGYNv zsp>wg^t1-Cz}z20IBY89-tKOsnwJoo8Q1BX5_>VQysAWkTBDi`b4lLFocS61VJF)C z{p-UKi8ZfRn`|rD|8NZZcX!1BxG|n?f6y@}={8Tz=a46`T~&&GV!zFC?4t(`AOO(l zY6Vty3Yo5OLFf}h7txJeh~jJNG4ryr{Rr3FuDBm1;?H+ZUKFG9Dk4;vf<@FJaI{|T90m02w0Pwh5o;X+c9u_U>DwqOq}UjIv9L+v|S$9 z-;!XWLq_tOf@(mBS6co_UuEcD{QofL6OGqy{-bw0Jvy&z$w0`EHS?Oail8^40tW`h zmE=}jcHfaO2p>PZ@Y2I;Ld9+1E)zO`lG}TynR(Gvy(biswJUOe_-+40GXB0p!t@%x zIYUtG`H}T|JhC>TN_g5t_sbRx0&xpKc%w3U-C4)U6)UkYTfhc-^o|zULh;w+){&3!Dm!LOUf+w6+|5t~IIL zAkz_%3dw=ZkNl>-uq^upam}5!Kut4(P!Gf_hSWym=sVvMksS19*|W^w0c$A~uKXD9 zy)^O7fWG*g>CC$j_mIc8T@mLYq@xoAW1RRuwSQ~iwB9d0)yZcSz1&^Qr@ou$*xz7u zO)9U}jWm<`vEeSY&i$l-#PU-L)8ex9N<6qXS4^u}sQwV?i`ypYh>}fb%7zByYu2Kc zqhB+%U1>m0VyeHJbbf0n;CWDm{`w+#5Tf9=J|0Y2BKds`dJoo~AU{bT$)lfR%1#FJItY6i< zs}uCJV~&XxM#DXpWSxEnD1GNWRxpsmM*tIAeFkd7zR3*DwCy{`#C~_KoSA73>P!&` ztPA7f2fS&+Sa!wu65nln^r-U9eOmsH#H2fa$%hudfw*5}90X+}J%a0|y0wKiAy4Mh zb`pOPEcH1r$TZ+q3ygb+Web!W<-ovqMe^5*9C~xdm6z=@@{v+@5!^3-8<$;q`JyqWrHlz>v^OWmCC3zidUx-wMIU8}x!1Qfj7@_g=up)>flZg%0ruT1!0G+@@X zmng!TfU>MxSby(o`Hs_ml>}i=SSC|9z349%`5gT^!R3(w+E4c0|EgokxkL`-y z+(NQ--)9g3lcs;YoE@6UR1TVAyUKLlo8pjPDX6`&EGWs!>}UMBGY%~h++2G|NJ3jK z>jw0XgBA}~ZG?KkPx9B%w}7T|Z9SS!FOitH$=?`pXbKhsM4d;&p?n08-$gkZxQlWX z?xTH2^S|%|>v6DG1~G8SaYa!IzrQUtpHGogp-cEz9&Jb_w9t7aW+3jc*8C2L0Is?w zMo8*pjB=@+!~mrTZ#b=jDVL>kObx}zt7iR0>2l3_dN7n@A+`1HC=FCz#|tR~g0`wa zdZ24B0sCo!cP;Oz3R=FEqI`iYL}<2Y{Ho2BGY%g(B%p!rTB?q?JXs*GzHR#x%xIRG zTCke=@2(;AK@st2>)4!wkT6({S`Oas^T;)~oo_j|l&CoZ?aI??i(wmZI!7Xn`#Z4z zA;h8F1YrL%o{N`-wqQksCTn4_jRbU64&eS2j$=)&-qY*zCtjQ^z+m@EpzN^N$VL;b z(fg&#*-XUyoM8-V#nk{zrdG*wngeCX1%x}G9cSPvNA?+a#WosYy9g?CzZc6W?aZmj=P-L!w~r*} zC;CJ?j-47eLJiB(h<4$kQ^u)JUA!4ANcR6~r1^iWblK!J5)U~@CgK$Wic;)EV zkxiyrgwQ~W8f_yZBL!V_@HzLqv*o2gR|r@F3&L_)sSy11c&VAWS>foAzWJ&kZC6BO z6Gaz`$KT4zq{DD8)4NoQbF`uk>Sf5a*QRdElFkrrX-(ijGyCUqeW-HOaF%IpouO32 z=ot>}jPGuGmgbd~I0Gp)T^)(~`3(Fn23$k;vX#(iu5jK6cO*quZK#h(UVL2X!E5?2 z(Vz;VzW0)t)Du*f^QBq0*Tpg(l3S0L0aEQ%o}-{NM6QUxucj>_YzNjYcJ3Cv_y#L` z)ZwUbf3SX5smAYkFH&&W8eJCln?wuKwTn4y$##x1x*YTtzcjy_$i9+z$Pmp{*uCBY zZ0Oh=M`|CnA{ANug!P=MMTI7>l~6V%bg5jT#hY&*^;~zpXk_ zlQG@`G1lJVDuylTg+A+fv2SYd9;9dX?!883_mMaUI;Ly5j;)zC&N6276Grrvz%=k@ z6(@dyb}Au;jIt=sgR}V!uBQ(>Q{Fetg}sQoJC?x+ABp$T=;is{akXr@4}_MtXkAAg zRAt?f9B(l-gK^FL@B^9gmJd^jBz_xKSL3-s|1!GI@0-%^HIeK7ZdNX!ZgruA$VUq~ zpc8|XPA@k?W-MmerK11A9Uf4YxDF}$4L|tI1-f|Hsu~ABV>?vFYi~lr^c_c?U;%!& zz_cA+Gu~SZoG-Y`jT`NaZUfIk_|cklB0AdQ5+UK*1X9N~^B)Ga$v8m4h~%;pp}<}u z0R8fp?o<743jD6Q?+|T;=&MwPL$>*5S%`Cw`=N>L?l5D?+nv0KK#DcCHQ+xP*Ah@2 z@2NVGY`=2`e>cUHbO@36ja(pB=gk}cUClO3R~B-R4fGdO#*p&h!w?E_ti-|1w=&{g zNY2Srq1y8O3pB;?(%$`va(DeT4K|T|@e4k0y7s~8;c z3CHoDMUxN$rV>G>$(bZeEvaQqOIRMlok4@5(rs5Xu!mho6`h*U@~&R z8mGvi(VX5eZSy_tdab3OqLk)YCEt-Y#E92ins$hn>uFBAFN!W+KI8~)4xtU1)^4<| zV3=L%$s z@65m+Es#Ajvs<3H1%Z^8qMMfT3boNh=dp>&QbxLbmI2*E8RxJ!Tpf(3UA5}@$HT@yUPg1fuB z1P$(9xKp@P0Y$;o*UxnK^t8q{-8*OH7VtT;LkWff@rpLnxr;p<9lZFmXB{Uos1a#LDjTxrQz!{{ zXWmFfMT+WHsJ(jwBX^fff(z&^R&Dujc5+nb^4eC&h6b2~P?05F+5aRw6dhZkmYTQpo473Bd zJ)WONp6l>(CD8^Tz4Y7GSh&AGtoY`6SaEgnOZ8~-&!3_`pdiBn_VY%~gS`mMn6Xs#^0%0O?wNMf`A zSojvTgQx+zXuCvB*q&|>-2U*3uN?=a;w~y9yRRDj6?jz}l*_sUN^EAOp>zbRP>HJO z3*}AskLa|fN|@PG<19PkYt~BfPV59mef$JEfIpPdg!i<_AUw7o?3^ZBMIz?-iU~T| z6?iOj;^_q1OQnW|-v8>!<~H2LbkUnIP41)~Lut_M9o@63H_( zHxe_)xK6L0(`hu&QXf}XVssC_sr7e$FuRimZ+o*(>2@7Pm4=tX9-+k5XG8wJ`$geS;QRWb$D;q=?$TM`Fr>Mt)ht+3>i z$k-Rx87g*#1Nora@9fRVHF4OJ#6aB&A9H@4G9fI6ptDy62F{u-v;_57Y~NTVf>t|E zoXLZzkwSB6X`3K^#|gi;c%Pix`Y(IRp@}+eI{MrN1wEDZJ@~9b;axhVY){#WxNvLKj+lmBKLC*v4XO#vHpCfzE~ zL*!3YU)56>h$Bmg6W%nPRV!8DbN-hNRPp_`{^UjidIm!%>;Z+Mm>ztC4`Hh}G*0c! z1TB=2Ji*p86R5#r7k`5&MC#~$Uc@qGtbKr(uSVyAHv-a89l~kFLV)M?njYbbZci+m zR?pG9S+lL~1X2^7pgh3eRT1IAOxY^G4&DHGa_|vW5<0IPD46wO0 zHTJ#c`EgGl-|wSWg9W zUnf9nXnEH}c1d3Lq4Mjvc9?MVrFDxUsYBc&RT#Aowz0dNx)5f~Rv!wjUb@&zr}RmY zJz1I+)y@6KD)58u!Q8@5=L&)9>fw4IJ7^zi&M;t&`m70z_^G+wxs z*Af2||0l7bL>I2c%LTgL zio1`xqeJDa+5slOmM@J~+6;D_h~eJI7W19EAn-NG+?r9)h~R}^!N_=!KEO=3O zl&Dv4rR;WtwVP->r2sk!F>_Z!|`7PqN-{E%tKAO9I5;hY)pXmN5-!YYKrl6`@(* zhYqy2i8&!p%$XiTB3ISrhciJ?E&!1gHeRRBZJ@P5Mg(9Lg-~6vROs?A0@Gz#Fv$ya z6T%7ZT2EW;8=uTmjn?^709(bv#TSL(sf-pc+a1!-c#?0{oGh?4D;U{MsvvzoAU>hJ zUP4^i;gr?bIda9~u+FoEe>I5n-rZwi?~N*G&5Hloj68>@UL3irk+W@>h@6eGN><{8 zYW4Vjqza0i3fZP6HS3F9xwtJ0*(&P5hzKaul#w2BHJo zK!-k0UE*!*U4gKt)W4;)u8d6jBQ%WewO~yvppQuPb-><5GDA+?ug#W znmoopx9~apA!83n?&)GvxEY_}+RMR^<4l~4zg*6`*|=Dpv7NEn`Jqub&EY*U_Zdaj z8634u2zOA^P_4>ZMYikMB_*i?(w4E*Uc1_>SR{-LlqFZfW*w};d&-(a%8^olaif%MewlSwAVn=Ue> zw-j#^UPazJS_V|{1ag%nVIWhA@~&qFU>7YlSv0aWiZ+hxAV0RVTG0}-%Noo=Wq4(^ z%5J{=T%fc$UC9Zaz1%YUy%~X=H;Qb<4w6S1W9GHSBr;xnBJaTsC*vu@t`BTa+GF^P z{1j)gc05M0{JAy2G*0f%51i)NLla~R(woxSy60UvNz93RBbeXu8&?h$rMOQgflV&I#y4)NdZHRwyFWv)?fLW+?7Tew~yTe#MD2@?ZoG!P-Cdbk1BEZmh07Fo&EP26Ymi5WZ+ov2dIi z-eno%pK}#aS*R*P*f^2`8vby1Wj{Y^di}ojx1<h3maX;LPg}Qsmr~V7rGa(^8R)92i@x!&|{-7n&C**TR zF{cYYKhwVlv12>se`oE^%nltfNZkUgRB%BwQ0pnH`M4EElxF#o3pO@*vdg3)bQHdJ z?oXbZJ_T+AS6OrqBJ-k1OCTvh?^4hG{2q>q^WRk=Y$VPm>j;ZtI4*a~D`Taj8NVYs z*dbxSoovhV6Kv;PET6{Eec94%B#})4l!7eCl|=jBz(f8i=bO8n@yN{y^|O=ANwFzz zEh*`WO$d%Is?3r}R5!5SlkB3UgUDejM_PTYsBhrCu>BW07VmM6vk#x@!9%ryv67S4 z&Nz}GFL!K7@E7Y-K1a}q>pvU=pY+Et&Z~DfkCD)-nlqkF!Rp*~nzI)!gie6rC?4Hv$aOARJgp)U@+@-h1 zLsZCPd+v0jSPzYcM{NO))T|NsAq0uedUIC`p9zg@rb|Vr{lF%`I-`Vfk~EO0j@Wch z!an@a0QsYCO9Q*_gPHPgs4=j`-d6o?Pg0POUvt4J0i`+hB&?wKG^@ZVpT`Y{#Twv3 zM_+8PQ^>~%1nBUowE#c-be0&YoksfcQ>X-(j2mw?UWKD7W`awm;Vj zI`^+E0I`g}aY*hsNGyHmqC(!27!M7Fac(vN19U2oy^iIR`5_6FV%JkTh_`6yd;jH; z;}TOUoF$FnH$7=X{{HU+JJ=H;X3yACq)Fz~PKJ^yzZ)x8gV5)`iCx&R-)gqbpke*Y zfF*l%^feXp_cpNahe{!cLyy_WvC+#0YAyJ(xeH{Q5d1wZshCEjk9+~zKuu;9m>l}@iZcyuAXp%QU@voi^=&a1=q zX~mH9RK*&I3tR{;?S9@!%h%-K z3?3w?`BCD``KzVgQjN$V7pkes?ePV!3YUzBesU%pOyCPWt;vQD#s3P3@oCvZ7L-wJ z6U@k^TFh7TmD&ssp1vsW1Mt{sa+rc;#W*jxuWRFPO9a6`z$>jM1I&jnD=1`~_pVkb zwmtU98@IySBl}qQ_sW3$X6TC3lk1f~axNE4l0!d*7V@DLz*}Q#+asNGf4SpBH$ETs zV#hFMqgis){15FkNJ`LK=af_G?~C+9s#ql?CpQ+F(~$Ez(n-xHZKnn z1UwP^`{3e9Y{rV2)0C5CFWL>tuxJk0;}Fe0xTOj1?+DlmdPy2{j?7%W?*o3hpEqJK zq<4%tYoJe)Uf~?R3K~U4XCGA*uWf`=BaX;XJu%`S|KSIp*;A&=_E&6`9BlQHglbp} zvsoThi>_)D#tJ1fpLEEHzYt?Zd~xRo54%I~Ixq`CmAgB>E@SP_n{;QzIG^LtKP|B;-h1o#g);->U>zdx_GB{YEPm|zPYK1%uixDh(q#X)X6>I2oEr5Q~B`34~<3f%|(|tDw4Ig=|s-_Q9pD=~QbEJEGcu{S8ym z@}wW~#kTze5vPt&3KG=GE6|TnwEAPsZ$@d`J}E4kKLd_Plb6#@hs5NiNn0l86br`p z`rUv64!r_b>FGp>l45)F&4m5}kISoG5OGT}+)h-C)OZ}2LGG}(POfnS69#xgixFX- z+`V<*=65H3dUUqbVb{pY8rYP~Z+YMBWe9TQ;}2ySP*k;+zrjQM0?a4)#9-Y$(4|D{ z9ldz&BbH*kS!C|WEOjoO$fc}=f;2-E!WxLRsyk{J$)JM#@_cLeJS61#wI*r!BQY~q z!A2Hi#y(k(zOq;TTe4Sg+)f7@f8`DSQH!d#(g}BT4qQER7K*R#nJ{rwxE;!JT4-ev zAe(|Po|6g{n*31-CP)&oMrL;GYO3D0FAi}DB93VAWvYet!kv%BBAaflP?qxyz3&<2Llb#5sxni_sdI^Pzdf` z6z^bRmSm}8eg^)U93tRB!9gdt&K+?zc}E%y2z0;SqpeiYp7qv?1-`r~=^&0D^GfNw z##+|XrRjZ>sK`jEAybbkw4~)XFx6C&+O~7peMSXQ`ARP^s=@Ve2uG6Al!L1Y#-I`@ zi&Vc2M*$L)J80>NWVdz(3V;18C#|V^c0NbS&+^Tk7gC1wVth$x%x`{zX8g%&Y`NsP zks)KqPF0@~kdTWJHlK}3ZTyBWd!)7su7>(vrQs)g`|JIpnri-Qg}2*x4g`iIIh@R& zH~Iq5?tG1DMIgv6xrrG#ALs`_B-$l!j9My0d|x}I>rt`?{JQ!noJlhZ!uRv!c~==h~@@qiYa3UZy6o=Bx3E0@sQ33LCWk zRkddxX_>*AA2)5qio(4)bFKX7RPcG=T39i93ow43XL7lftn!fRle9Y^h12t9@eUYgL~AW$G-}wHOjLaX{qk^2FlQqpC=J9W;tG&>=*kdZ zqyg7&l*kP06RXhv(eJ;b_}(_AmR(NyKAg`FJ#6(gi9@$Jp>j|ZNGU%UJ$Kf%_N5aS{ny84C%rjAh|5_R&ioiAJI&E1(2774>E|6Q{y?| zWJ=vbkNCwR6Fzi4C%llGcHS(bc6a=&^#uRLLLbh_y2mvrY~xu1rCEumlMkgUS2iLX zfkEr{nen4^nl)ek`5zH{|5n)lv52TlpinxA3X8%kG>+n(Hw3bOsZ3;Vn`b%k^w+}} z<90uYeupX#-PXotTp#M{s9p&Bmt~ZXuylV)hcO-is{(jf$MzD8RpY_Xlwn581UuCS z@ZUO@dw-J&C}qZ@=(TDSl>2$XVVXCB&`X%Wq_4U1&rh4*9URUMph z%#Qoz|EkAfage6?v2)2svnA4(f`U%AwQGc-X#kN1QVbE_A8cajP!_#BHOd$YPH|K- zw`cO*wG?8m9+X}|xs7^ttkug=X3pXGIkhmN59x{~_%(o#0m}o4xanovC`{xAfN<6U zUK5zp#*{nT@bu@lhsOITsp|k~r{3Jq%C?9hDKh2^4_XQdk}rfaBNc5oF&MgiQvdC3 zsa`TDToJ6yzu+Te4@G@#`{QaM{H@0+@(W&H)>Ar6r&J`}7dRzjUNL^9PbhAnPgn!& za~^(y!QoXJo&0D*Our55Iz%dd6TV4RvXMFAJ6h?TdQsfDMZTs1R;H;0I9Cy%F~>-W zzq9=)jhApB;W6se5rF)e;`{7_eIovubfN5$c&O&zWycqR89hpw)H=1)J3^HeWvC83 z+TjsS7dlhTv`6Hm4hMOOZ#6S~Y5-Es^~mWszp7Av-=g$TT&Y^F!?HJ*o(h;N(sZaX zY7|xr%CQ9%=!|tz`J$cx@+OeNu6(Q*!B_!Kg(Kdb`A7=wFE+eb#ZOCH=&U0IVT9_p zSiPZ`#rPapDQ~kvyQ|H!C!N1L>dOS6{+7?!;Jm@?!4YfUq7XF9vtU-v6H=vGViY{< zcI?hwaz8fHW;w#s~cTw3~0M8#c5 zM(DIR_$?NzJNuM1kJPZUnUGGF)_14#cMy6z)$fu}%k;R&6DFD`8L$d!dZ@Zoe3ZHa zehKw^OdF_tZzng(SSMV}G_8(+H(CGX%VyCnXfsl5{U<|BGZ z?;vu^18Ndl$of1W;=z(q`2LvylQppR6xsJarR@Fq$3Qnk`bX6Csg968~L+efkfm(g^ zX8EU5t?y3ef`DaQ@VQX=p<7IijNXSmA)2uVmMJ5=SMX%u%B3HJ0P$SA6;^E*@-g;K zRW3c=5!j>NVx!NQIPO94D1^~u))z7_qB&zFo5O8jW~2Xr9|$CO2aZRA-`uA!^mV;l zI6Xj)bfiF4t$FGkNDH7Mbci+s2ouX4zFnz}1fIJhe1D{$ETl9v{SS1R8UgSdD?gJ>?-!R*sLK-C zz|U;Yx@tl`slt#^!DzuQha#7+ncR5gm`t;UChnUBrh5`?u@8m@P7=*Qzy`akNlzyP zZ!#p2QBkqzqB38(XB z#N?JCA4I0}#aa7#mnYS0(EGiAu$S-MPgl^FK%{hjH`7?)BIR^4htAuDsY0ccDMbD< zp}uJs@?FVtfUrK`|C3<(Z0<66sQXD(X7!j^84n*5d+V-tBKgpF=V@0hDwY3^r7haU zKOaL!WqVRY!AOyEba*lf43sD}{Pk#eRJ7A6FxG9hlARXd@4pND;0z34fnm*PSCqy~ zeC*STL2tlP6tnk0Ngs7~Xyci&$_Pte+}RyOUM*!YqjG*~J4o1S(asZClEeb;a$ER+Z3yUvsQ!5VDSXG<)hi=sKHygk1lOq)2ibRiE(t{&G^hgkUDTDv|A?);@PqVlct% z36C5~nls{@U&a|8EL-bF;WUM2T9oIElv7MAz{*}A0Yu8}QW-kV#<+GAy>(LU7A~1w zm)Z(veIMP0g-+GvIPe<*N<`rrP;nMRu@B8bp@rfZ;*;nFq1Z~sK7X0}C|(QyIOxeU zSRpm)uSLlie;4cqPHo>Xg|g?Q>=-WXcG;>A>n%Dy0kY|8yXcC=?*XwyC^eUq$J52x zRaoWU3=B34EYSfUd_s95z8#wB#`}nsjT}oa)3J|B9IQJo=hw$qii=vn)O(iv1E&e3 zq;i8TE;;r`i%VPt^IF8B*~^)LXOk4plcVRl9PTd9+M6G+cdiXz#j$HaH&=p2c+XnV zJxdu*jNh+#;p9r={@oEBao?ycnyaU^a6`*|c$gVlsZDfKpuv9q(f2`W z<0#ZJ`hJmgQ@ra|YaloBh#YH2C>>LFJLDzOgc&OC_EUyL$0fQHeExvF&-`Kc9Q%h4 zBdl&d=d;_Ic4h@i6}d$tHG`MkeMhplrW%A{J#JGJ@V1BQE9qi-l5ch;Gn~M_JHpK< z6*JcCPxc0JtQKGg8n~ggQ42XWb(l!U?Lj-?FN}+#=(f(?r&DV0vJfkEGQik$n`qt? z&Exl?igdwa{~&uYi21^%T;WVmm%#p>+WrjGdTH3|VhOT z-%TQ1*!|bDs*+4^PJ5l^wRFNbsyklVtaD~&PwndNNQ5yDVp3QfH$8y{`gP6BB)V{L^zg&jOO4vvb;2vw>5Mh2 zop>+aXtFslcBU~yCJBq&2}EVgA^ZW|VFiT90WgRlFdqV8di(=_7V#spkD50tv@|bzvz88;W}Y6nx6ejh}bD?*N?;@6v=+!N~N=ucSIWqDwDSvTU={69ewL@u69z&k@Sg5DeD_L;^xEQu;@?~Jw#_v4t2sOo? z2{)&T*g6tQTY1<6JjdC)y3vGt<-p}|%}eYqw4Bm&8pa8FdfSUewp$WJzg8f)GN=N$ zCZaUmCHWZJFsY@+ZJk-yDbNpVdwZ}psoa?JioMs_EaO5fhPjPZpg5%KX*$zWva^ z{_xxHk<$hxjaItUZnF>QKxb7A8AFl%^csHwi_f9~2y(~fas+nbfxq2eVEVaMIrg;G zLj7L{EWU8+larDn2NC!mtiWH%gs}e!^1X?^aaGN8-P0W! zmRQcn4m)4dQ6i0IC!tM^NxNW(Ra6>L^{H@qE)R$$tbUrZ61l*$D72_%bDv}IooxiCpIEi&@ImmqXTI?N->ML2+rU zTW;Q7YC!Oh{mSc)v$<6QX z41SihyEXU=LzizJYbGG*eWA5$tg%`_KP(m=e8y`Tcc)6ZLmOY0cB^V8J-88|L(XQm z)-eLjx^dD{CKwaf5|X51n9Wvru!FwWjd;D1TF366BH*H!gcJ(ARKwLQt1X}E>uQpR zq$7UNvagEp&6bz$%XCRPO>1Jb5xwN69QKiry=!9HWK!G$L#=?`vi)7gB~lby(V_1? zUv6bzxZ#)y=IbZRR6Zl`rwU}dx1g&K49@}s>GdQ6)`{}wgi5@-S zwQBKE4h#^bi5Qk}URO^hngRApOc7LRtm~-28#19--1Z6ILIqc3(y{aif9878onNd! z_VIFJUF8yuUQ44BF3K$`omx^YX7TaZ|Arq>)DzN1(G^|(LGFucBg_XPxbV#*?THcr z#=i%>_E!%4diht2G8@A7MCmG$PC2bk->Is0r=KdE&kj~8PZ$hUT=NGDpIsV{zRd=C zijgd0NM5IT2@i!q7p5mGbh;`ZPrk-I!a|#30|3<5yLpyNWN~*q&#%nCI+!X+aFDXP zl%WxGmMsxGmDQ0xmCq6PcaE)xL}6vZ$`P4AL1M^Kmn*;+BhaL=h&kR!Y&OFc{w3&m zsWMwKSt)g4wl#-qcI9Bj?71n=x4ZbUYb++^oL#vkGCC@Y$lUIvKCh1Z%>10ey%s|t$>~+0QP($bJD~>$KhGxrAC9M;e{noD zPX?rx-xE+$!)0l?9$dwMWXFH6vAyTnu02uU^-TP^>I?)l8I~dySH6M~Rw z+P0AfJNOSD)q^^B6$KO`b>xTy0g~EFXDVA`<)9(+Td?;h5TJzb5;_Hivi?nZxfsh)!trpw% z$`X*fUjz=p#oKkYCXi&5QviFpeFFHW#Yk7J(FBvE2Pd|a28{BD`VWc&Y%kr!ozaGc zfqSy^Lf{J!??#7}g`e}uK&_m?V~=y@0ueZU@V!o)*Y7yY=D`kEJ2{7mg2vuUD?b`2 zB*b;2aR}nM@jSWi6S1t7anxmUz*(A9uP@wGr-t?>6ziof#b*I#p}^D2Xz`tdGht}f zGeZ20fl@6j?8OE#?Bjnh|7MMk4}Fv9s*;o8dQbp+vOQhVw{4SC!lKUCubIUmT`y~F z&U(0o`2mmVp_o!Xy_bLO$hY65g1$*{C$0Gy;ztqre8sKSy07-!qW*0HP{N@WSw~3e^S)R&ts+weygXWAWF5j;~<|f;&br~gX z{jQqKmUnWh9+_Fb!qY(SePtI5b=XNGjX8d*U8$HpR`kjid2YVTFZ2(4?XJRc&w1<} z1Xbc~e|chE<{A@TH%QuMlPTQ>q5E$buf*du@8ixhb%DN8Cc0+DTp8x;Ch`c{6{}er z?Ilx9#Tm*`HkoP{!k-okxu4hj)9V4d-|?s=SZB#Ntf%UElG!xBlE_TorhBu-DT~_3 zhQMdGO7w4)Sa`Dx&ml)Y1aj*Q3g%zL*0(zE`yRzF?Qw652xhg#L&XM$LX50=oU+p% zcW;Wsf7F#Oa#4VlzgT#Pi9|#`Qc3ukMpgaDq(7B-t{*|Gp~%Vm;MLr4VIn5dx57rk zrZQAnZfC!LP09`0gg)B^0q0*oIPASQtoF5vves(7$Xxg=VpF@`s1Z|A{Q`gUH&}Js z&#?ILG4xENin%rB>CQPSJ6p*2OzH8pA3Z8b9XfCox9N6~#zKXM__xrT`0(rEa-?mn zz0W5_Q+_RxHYTNp`syumGL<-EY&5UU@Q0&3q^{4yQbqxVE}Hi-K4Aj#eo>GayqlYy zkJg9Hl9RNm(Y5E)i}kI-C(OOARBw6nHS$!hhD~-%@?WN1RJ>*_?Aq6q@?ni*cKQWd z|3otE9{^fB8vV{Kw5kfxKF_~+(i&xb=Xti1_jc|C)eRN#7NdYD?-7UP|VPF;| zp=stallm+e0oK%vgRNg@H;x{io7^%~;p$&5`;jwOZTU)ScR&qXd>iA|9?h_jI#s4b z7IrMF{`6b3vF_)ozvMZRMaB63U4wUX(2*CMr{_yv`us@U$E-bGs8X5)U(YZb0TSq=Fjm4OObz3781Ppi*F~V$O9%)h=@hpS5)GIH{@wH<22X;*XbyoF1``?WjyY;8lc%efPC?LPOc)j;eK4{kL76RrV{B$*iM|j4KJg z?u#rdC)x%H3RM0(V9|Ww)_WHxx8#J%Nwq-C*84Aj=@WLJqv3ZPbFdeSR?8*osRH^0 z1$!K2C#@+XN#zP8VYTv%F-k5P)H`tVZg9OMgR@wx$)ZO1qF||(x#WR*7L4LNyPAb} zLue9!?RT_suxt|tQ&AVUuWI=K?>h06?0m{lI{eUl^t#$rQ_B4L-nd2nw&pHCerr#} z<}%>o?lXs`1Q!jb=)&TX4EzPM@ZYlshb-#m{E_(vOXWtcRJOpvw=v&vq>r9kdZZN_ zBbpXAKrL@p4p$EzLW{(xR=71yj!B7K%>;0hQa)eX)fmdQOp|ezG|-L+YI8xAU0XR| zBbAG#?}dwAE4iYZYDq*XfLpPIl~lt9G}<2EnuV!qWZlmET%6lz&=9 z2k*2(aH5K5#ZP`KdX|sJ;5ZBXM(3!(t{%{m(aF=TZdK2(Gn}Hptw3WF+QpP@oH>#0 zFWaO{l~QTwJaS%2v_RjXe&)he#7^=x@|EjVmf}rcoWZummB#>h-yb|DKVp?l&r@Bz zIGw2)@G@PJ=ZUs#tL8l-xqEDId4~rzdY30OySGt62x1-V6EGg09^~L|2*V|3&$e|v zkYTKN6F-(sjcIYRQ@=4}-F>h23o&`4ZXk-Bnt>ggVYx_|+HSS!RCwR29;pZj4=%?>lX;Sd2SvVMF&-}ShfJIG(#`Mm6=vfO~O6*hE^hdZRNv;dQYug zu^WeG*9VNdf-49>OK6x8C3+ZtZ98q2oS?0ES8tkn!ZM0p>13yH8Vr)V&DsVoh|U$n zm2nDNADc|6iNb}uI)7kf&suFTXQqSBp-ffR15Pqm8M_nm>;0onC29Z?>W`(3P6MgNIZR}TB9U+mB@CWCCH3$4NtFTDs~ z-nGVEC9jeSyE3hf@AmSE;nLR!w7bvirb;9@me*KD-X3xju@B)`%zS=}%d))D8?N-- z|8zyej$=V{SzCB5mL}@z*WNxd-#seyvN9`ht_7>(21Y7xTix!r-m@ zGv{1*Wkp4zsM=O1aOdh3Aq~SDw$(-Q>T8)%!UN2I7nlFx9U!D98{*&njcw9`;vWRT zt!C9EBDtg1MzZMV6^H>j{^K)}TY8agDQPW2|F$bY~ukOYvbFsW#a#0oN~1a9PhRe#m~Ff#05f2X8`nv;3_eJ2yOj z81LM|pf=KL4jsfXADi}GgA47U-&0P(Eq~wbq}(^vPj-7)ludS>Ue?B#tWNou$Upwx zI54DJ#zU$+Y zIwO~UZFBHEf+p3Y62G}U`m+LNiF>atlh(23)oy=n^#eO{^A`7k5qQPbqw~h(hvrP# zIyQjIqWX94o5hEBaDv66{vN@?vE5sfO=?1X&)b^7#XJ9JiS_7uDJ~SpCz@w(i05P3 z#Zz@9Qn%yN`j6&;aplvyy0KBu)vglgn3m)LPZ5FSVk_i2(Ulh%VfWu10-ah}i2_&m z-p%A@n|wkC$#~f}ign%nJ?q!Mm%Cj{88fCZI@!@Y-(;YkTiHYbXn~vB}IXCZv@MR|Dh= zsJ>5Qj7isbfUoz1k!_G~ZD1^E?Fztv#Bbcz-`2^RJ|nJ>XDaN5++ytbA|%TRZ^3Qx zlVk4%Y+tTAt3mrq-#6({(MV!byxyS58LFs}D+6k@>+>lA;}hHmV0$=S4oM4z zxW(l^jKC(7JuB-}{FC(D^FU3^D8FYa4i4k9l-te-s|G?TZ9Tnjh`rGL=1~2c+n|Ws zp((=C<8eMZGGc$e-s!&k_XP(JkH-bRXL3r4wNaZ}tpH%fhWq0i0K!p!=KOH;4Vvnr z-(a&fVy1xPsFHj^@*kTr|GKjOcd>Fp{zp9^n}=zHpacNWb%mf&2b@KYE5-ooW;qo? z3Fr~`+lBW#v!)-1ZUXYtIdl|+BU}jRN*hrlj_r>k*$wI{W(MOX>g-r74P23fIx!wLsa~(Sjk1D-+`UO|B_)DA90O!RnH_-h%dKI^S2M@<9N$ipyQ69S>kn6MbeR zCL0`k)Dz@f6|iggictv!HD(BJ(oHSx+K>K`1I9hS-Dy za|IIF%oS;+-JT~oWUdy2=@Qy!WC8rl>jDqQu+b-CcGsaQ{nfnlKp=H_p+%%}r@-n% z8Rt38&B1K7r(Q}~wuzhYVIS&}c5XW9)Zorit;NLh0s_otx7OAe;uJwhkDzWgEqD3B zXnvPhRk^FQfIx=-i?FYLYclNn-Udi1A*F1TNDEk$NUJDF$AFP4g2Wi3dw_^VNhuxD zU1Os|T1NMPA&f5R=AG|zJU`y|a~%5zwq56WeZQYv8!3n!Q|f+Hp1TbW%C+s!hUt>xRFIv zaXwU8BL;0^-c%C7p>+LOBV+3x3M*XvL7 zrc90SDFF$z<-077xK>dO+&z?G7HZWwDUH&e15xW(L-^8E*%-PibzV_f7~p^2_xAu{ zKisW!T$VPj9JqEKH2#jp;B#wn6@YtAA~4t*6oQ?|8&%gFbk4kAq@LqqmhbN{e{Rin zZd#o+X;_?Jo;8Thf_yrhNw#nO@V2H*Z9n|iug9ONN8(p3<~uyMR*_EERj((cZk_%5 zu~c_^|LE&lA6D>h^Q9Q@SDlYz+h-Q{<$j*E@v=;}jmZ)3z2$Ul>MQlQJ7uJj+DF^Z zK6c@fWo1jz0hj)iqbN5Lsa#3&T$)^7oF|`NQ>=EFxG#$!phRf7RlPPQtMo2;2ZDx* z(xa$P|I_RKf36pHU2Z`22Yu|-!!pb=+l>`}qc>+rP0%PRkj>v`v+920-<)DqnCq&pYc(L-dA%Av$7Cv8?qIf}xaOme!GkJAZF7l({&C`Q2f-dzh?t zO6XOqy;y{tw%21vpM{sOA_)kNv3bEqW+qEJ&xqZ}&xHAqE@j6qo?B}!TTC6&JtJJn zxb%hgV@^Q%Q0liksoL##;?xa^j1 zFDmP;?nzB$eyC$kLC|S(oFBD=-GR<`(-%056bg&Pf!D*gh!c@kH%IeS62LYJV#Qiv z2D#@Ay-BK?iaU9|m(QCvvhEi!Al5|L=C+%ENE*7OIqT9-KEZAZrK{KPeCrr!QgFp( zSQ{MtR?jjNLrL2zeyGyN#+1UE?DxBc+1U3Re2_({t8Y^ti6#p^3VUF1m;O@T+rz4_ zVb=8Ncg+@gF_^Jq-->*jpOmFy&3+VFk?NQ$<)mR@0lEm8yDtDbg{}R+Lyt47+L0MCYL+Uw;MYR@O%yQy;E@9gcU1HLAKBu-TJ1jU?%TW( z9DCwKHy>E>kvLC#%p%JDaf_4Hc@%Z(e+EJS-G}UYq-V1|hV*QXQNqnZ2NlaV(2d2U zsbyUaNYMgl*!ISr|BXe~!e7N{B!1gK!lNkd>P^M%r3|JB{)`{VFl`(!;6k!xDZTyt zY(4uaeL1m>VRdhP+(#!)L`5OMhi0iCM!)yP%&F$$$IftyhL$opj&Pye{nN4_| zUFL-Ih!R-6`3`AOww;ojc=1bEI;Zy&`WIcmiDUg5cui}k<=632Qv74j%|uU_bc@7a zzlN&`E2mBJoL{qEK5-`?idxsgZn1Z;Qth5(xHg* zA)fd<$$2FqmPRlc{;6s?sZiKX%@ba)zm8g*uPy$Nb-dF{Zb#xmV-HdrMXO$Btt7bX zh41Tkc0+o*bcxQgX*IA;prkf}ouT7#*cy|#>_vmn($Q{xitl2LUiT-}nVP4x1?#a( zBBtS0Q=467>4VYRh4_wBy3vW9EQ5ms``FQ!n**nuzpHehPddOfqAOKtzLqnLAvuqK zYR6I@suF4~8eKy)zH&S3O>!!9#D=9gm=l(Va#H5UutN&-XD1ZHjq?8ZOV+LE-?DG)d^T4Le!;O*~HIiG`Z z$Li@t6u3j^i=Xs7q7%5o|MI(~%7?4Y*g0j*>Z9EE|8dRxAMPtPOA!E1&FZ&C))|(K z8DaLo0TU76CUM^@gvHq7wYrn1?shimCr4e6WYatBxAeygf_ObPre80Q)plS_CuJ*ED5V?nt3+GkV1aTmofGJdsOA4pjF@JY;-8k+4np)hV zlCt@X?mE-(2J~;G1bL+nD<#Ns%;yf(X6+`{E(^6cO?m)A{UFhhm-1>utDJWHtH-8JA zjkKnx#;}k+G+z;%CPxRSYyIJC$G`8Du$9C_w0=YnYI&q2EcRu^7N=%B)$xk@LMLz| zb$;IMnKdvkGU(hiG<&$~8He-q#8QL~t9BF;8-A{N`QD#d37Z7H)$!*ZwsgjDyh9+s z(C#VCsplatBVsK#G7G>>U|p)AWRY;rdmFLG!*nO%|BlbSeRO-jv@=YcZc!n*>xhdOVOdTBa?0nP+?3)ZwMGt!;k`EAHlbq{vp12a4O<#rUN)j;|Thn#J61JO% z9VO{b%)e+m%)ofO8gmBOSFZhgThg0bWEK|l=PzQ?%l!mjpe7#lJ$_y5zES=nnty@? zYC4B3mT;QEhs2xKxaOVj#5cNPr#QV97!xHO6M9CjAm*zbNiXGtlFO44wD-PU?eSvb zMuL5D)DQyIGtvI|jMV41v67RDmard{9+z}o)}GhV*Z+xD-JsT#l9FxF`>$(Vlh;2y z>G2;^u~2~HnYDsdttY+hYn}%R3YW+zg8Ho1sIM31sh#NC`OeAWj{C$(lY9iyqE>zi zo*d_>4%MGh##*BtOUgLBO=V?zvR943GM^B3)`9`d5llhI4y_U62G%{=9bJwlGCIKT z1ZwTl{?zE?R|0Ny2zncsKRiE}164kJQ#Dz{4J&hEa5)x$4abacxrmE7cRHgA4n?tX|N zgQSag=(B2Op=q3c#7F0ABN(aPF!;%&N~9H=fyk5k$ZW=N^;HLpw8VoyOhidW!omFG z_*PL19vN!6PxBG@*V4^^?|UW2@-iC>{NMN;DkJJlawv+syVsJ?qOd1RM0FcWvP4RD zKykFNlpc}gxrO`#x*rS2x}-rq`%uU*`$Vf-G&VZ+Ps^3--O-33+pcGuVo9%@Rhds$ z1i%{^5vI^AdqJ?r&H3bHFZB7a(&?;MR?Ee<(I+?P@>3M7tZAg}5#E$(@7K8DyEAGF zV`IMO{0z~e0_>V4u1a^)7{W#O2nl!gSxSQqrNw@z>_wDjQzS^)Hs9OYnG~?BL1@O- zOJ?E|-qC5+^G;p4dh2$s>F1M|n5XSu?WA|3a> zzx0Us)lrcyiSfF9tv{yhb={AM;?#s_2<4MO)wKKbS_(3n7C#z?Zld6EeTLxwLEUpJI(0EiUL4 z0Hy+3DXvzxyQU;+<*a_) z5kAo!E3yG?Jc)H_nkGf5x|13YYLgs`OsXB(3lJX1AdR2sCO zNx8Hm@9^KYt$%az2hzzY;xP~l_B$JqkI2s#7R`;TefolxeS#A4Z|Ca?h!(mY$)n6< zfvCH>a6IT172?90YrNLxhkhy35l0&O$wC1_ZjxTUaT-|R9&_I!e2PJMAyW{HucL4L z39wpd%-Xmbw=!DAd;Bq@cmw*qR<8RdMO~9y1+4Vll0em{Nq@E17q5VL(aN)gBzKwVIzMz_(*}LxDD<7{54uTZ6 z9a1g(%z=!oTR?!Q5e6!TaUT{~V&1;3M?`*YyExDHh?tBi3ORo(+@rgI)4imAfl=Rh zmlNsr)NHUYxY%))zZVErux*IAa8W6-{gkb#^-g(8UI*(`uMZc$MC182dds({Fjdgh zKt@XPYto!Rie@rYIzUr4$07j?o@}s~FM~OD@VVH~kM{-Jr8?Ca!`V<5u1Fr0JfEan36Xz{*D zhL9h*9Z+3L@DjgsJaRAqj*TlRyXm2qy}z1Td{3=HY%s^B`eDw4$U!o}mU~D!07EYz zK56RNhH(Fl2M3A@3R>(5>VN$zV=2L7NrMmsHv*njzg>%@Jh~FmSIG#nd#!41Qq#0r z(+e82j|p`&u5ugx9-xlB9!eiera4cAKL8Vt!yX}YGKA9BzAs+eB|~({jjQ!X9-^cb z!LQY1F>HDU{D5gClU|vVo^!PyBob+-jEP2|w$ZS^Xf%wrW5-oPWM$HL=>goAjevKZ z7ru75cBTh9w$b;`p2VF;6vI69W!Wt3&4zRieWb!v*5agd6^aTj%`#HY7T2e$``-2@ z5L@K_D3e9{Ey>Q7k!%sV8x%8o?w@+5KeS%Uft8roI~;A#NxH4-F9l^@g}_T1PV$4_ z9nJCx*nX<8J`y{hd4atfr z)0WetJ=((Ml|Bx<+0jSE9ipNd76a60FCG_ywms6&NY%3j((ChuLI!FLQ zKxk9%Sf>wm1C z?f#~TSAcJgn#fjWjy@B9GIJ#KV>Aa^voAKdk93*)2ICdL-w$kd4UB$#lYiw_+S*6U zSsq|o@w8h*+H)@A`f;0yYfQ9;BDWUqJ6Vu%ZH%6n zDoz~oV4hs8_{aAR2&{=(0e2xGgs7KV=AeW(QK5!m;&D^zAl@;}p7PLu1g9eM+c#$U zkURAyr2#+ktCK6q5B8T6HZ8d}mAJuPy9gT~5yeoK!hn*^ZaDa+=ddvTl2RYwmRLP9 zek&FWo#Zg67uDBNedqD=xb8;iTj2^9g=YCM;6B&1NE%O(Oi96BJ`;)@4$+$_&aAa7 z`cetL|ETqfE!|#hTvGyjU={~bt7yt}9&c)EVaSS&Q`4d;r24kE`eP2SZ$?pOLW=7A zLFm0uC&%}XJ7I?0Uu0_77dZB2*cA*mw}|CQ=1RA=M|2N*^xd4waGvEJTd1*?Fyz}F zGMzV1lO$5(QgmS+_A{g1_YAlrA3Pb;FvRHZGy%n`MaBrLVIycY?4aTAe6(+Gn9i7u z5&bmV#!qXHFoS@VwcY(W;d%31rU`o?hWD-4j*N}@J>psfS=}@$bU?4n_CL1{gr@&g z(D(ssn4LMU@_iaxfBSpwQ$)g!tgBi@b7su0wzhL4N`&FAMSn&7W~eiC*`I|}63SZ| z5%y?cXe`X-Dx${6BCNIp{qF0bQ%MCKXh3Q@;kBO|eZ@U-Ho>|(@G3ND2m^E0iL>Jb zyrO1$^rDp0&n^q-Xe@T5ihW;8zB#X7Kt5?lhoobuiC-K?gaKzM@v^X3@$2JQ)tYR3 z-<_;?FeM!V=c%Txrg0v3NkcII^mprEazx^aKS_RCZ98~0n@@r-vD)8y;>A6V$4?G6 z?1yr+q;{(3-tRWT7T`XA+t?g0&W>?^pTW^(uRCZq-CQC!_Ev^ABh-&nzWsxnNEpCo z6xGkauy&_8QVc;7(`H^y%JsdAUnCQ&{L=e>=+)^w(=0#EX?1HB>qndV-_fUr)qjYm z0oUA<%d=NJTwW0cVFLvF(hlewx0EgXCieKu<@Zm@r`JMQA+o8D0E!MV zYf{}B@Zr>>ueIwT3h`5lic@J<*9t?{aT|IY#4(ZeV+B%>c5V#K#aGsan*ujRm2lVb zMqlEg6K79j(LP~GPwefAI<{TcB>1Q4s2A5Q#1WYczSE)9;|rZx84{-wyZH{pQv>(i zX{`HFD;5_vY7#l$d$@qmJNH44Oez6Oqfqr-q(zBTkYbTvDSa7;tgDwRXu-lik7zF4 z7`$~M++4}GD^e8P1QPpM&~7~;fn0WE(Ki+o+?|7UD;;`g5i6q#vZ48ItGA%hIJrCA z)v(C?Cv+t=N~n?mxeiWW?pf>}z}KAX;g?1&yYHU!`-_l8f=rVJU3C{t-|@EAE(9aQ zmiAo~bDUr4n0IhhTj7M3^vlN1nZhysDCVu+0tC~~tUcgGRDnZ%i=S9B%(`Aelc|Kw zw!g~HjbncM&l(Z!=N85>VQTOAS+^($@Nz{bFzg!0p1r3y*x)08I$h|{>AI~c4`l|o z!B*MLVzc*;@gmwaPjR_ukVfdDp>Uee_4EX+q|87>m%~) zNs%Ml(FAwyglaJcOneEEybW>#+%k>cG6#i%*-sry@K7*%bUbtmI{7=kr|4ON2Uu8x z|Lw$>({E=U&UmWdxKsmct_$p-TES{ztR>5oOO zr-2}Tw{k1&RH}+hb3;+Ez;?oC@cp8kG(MhN&Tabx*ihm@Le6pH)23Zix3E5@Pbnw{ zKgC5m33NI1H{~V7!S$SQXe(p_upkh=7kZ3r@FJe}6l^J!Br?zJYfMvqQ7A0e;4+xH z8K$D+Ttlp4T^~3VV{iOaL;3ks$Tf%CsDV54^49kL@s8kBvnddYcYdIdTgvh!-(jrV zByjEODJAhEIO{aZWmGMC$z05Z+cRUcK$lWVdLh+ik?eh;^ehqSlQ3+8Si4eBn_*LaDe&R zFTd%Rgx&AlGd%BVJ(3qok{o>_O#RHVgPo+;&JN8#bC=H^T?1v$y*!=h^It@kgh6Bm z+rQqN$2->h9J)7!?NW(&9c^1tPA&E(+PEy?sz|VmbPkNP_TsmUtgO;&S+&PwXsjgkjf6WI2VfNN-mAH-!Og)c@oa_VXZTYm##8Ogfa&Zu+sO;IB6{AscQB6 z1S3_s+N4#?6J4*yv?RQ9pp!309lvLcNOQnN9Z~Ol&H88~SU6Q}{?v@fcJ$=f|4aS3 zKK8w@9R;ZTq_dQl0G&dQ{Af8ZI9( zKpVjm4Y&lqPOl4t-HK3n4db}1gh73{ixQTVhUkxNnLBXRsOL-x&JC98~1yPW08(G6W6+TKV*jdHR2N(dPPi- z{rU7VlZMhWZ0EP%23V$B+}g0%+ag)V>OqQNMV&jc?dr?w0p1Z!nEyeLeQkzMw$_h&W{5KvxCc0{Bl4`Ts|GPcQqfmx80b(g= z$1UnXUJ2gLkvTgu()az<$G2^Ub>E%L1*uVT7i+`1qT0{P6xr+l=A+`rja6mP#mV0` zxaR~`ch`?u(7J%P({*-h>GI)v7BmKL6I!-eI%S4496xhFliwe}1fExz5-LI!J4UMC zz^XTCdIWMSmFRxaFVu+`s8o@$k&+?OU1R1row6)75bz~&Sd^J>~eNiY25W1P&GJcPwjS$Cs?IyhCr(1)DQ98 zz`l~{NXIs^OB2etDJro=yAETsvIMxkP3=0Z7Xs|&kGtTCHlD&+8&KJnfi&hK-`+i_Gg zPK(!P|={2gcSs~a0pnyZ>%7GQ?nUrCU!@R1_a zuZFTW8lM5XNV!D|%G$_F=ijnb&g~lC?>3Xzasq^eh&q2|bc5qD0~Z^!7rm^aCU553 z!zxHlN!g~*3f$#~D})awR{l{hLoICXBr1 z_e>|ukeBJb%SI`n*M}y@*I>hZ$UuIMKoz#wS|MR#L`G@*Ac9>nzB|W3vU};%PN2~H z0I~zq--~XyUP7#E%7vag869PB-2CL96h}*8xl;~-OaE+;7wy=@@TtyCFe7#PUyX*M zN@=Ee;&C0`k3F?g5oDF8066 z`hG=L470xz4f44(ek(PT?%j}RFIST#>sg^sfHx?Q%#9lhv*A@{aTNh4?*fVZNs4`4 z)`trf*6z<7`PyMge6N@U3;ZRQc&B&FvX?Sn>I`s+YT3coES)Ypym{QdKd0J7}meR;1&H zGPfOsz#|;PtF%9A?s$tv4z0)9kSh*ngRq_O$kWZhTHslGrd)?BDd>&pk;RGG$;uLY2yUKMI`WP%`IjZZ& zuK*z+!%;IXjY1X5O2xH5=LNA8veg3Qb`QmuL~avHQUzm&)TJ^%bL1TOK@aK0q&HP? zcHJH|?3T3E)2l2jQkkU&x2hRN+xf+p4pD)ztG(NEpY5Z;wBylV80&O3i|6hOtJ1UEJcr zz)|nAy00DndXR!2~ zx-1b~!ScSCYY=JeFPWcW&66j_Gk9}KpI;^Cje*cs5lt34?zQvLtuRqcN1fOv47(=q;-vfPlKw&&% z_z|l`KIJa_72LEjN4axHPiENmwA~SzONFaD4I+&_Wi6Wwg*BIpzf8SaiyqyCq@c}l!7OIj$WAbgq`MFy3+NdTQ*l8By@?UvFst^Gp<*6tU_ z7hhO#`$y5229~W7l+;2s`sYW%fE~?VrJdIODEpFB(|5^k7Lr4d4xNc-IPIp*tATUY z$P~jDrxytwhqJM3o2skx^a7|j5hBCcG!w{XFuMS$kTJWr=3)x-&+$QiYGC`WLdKH6 zmsm|}+nsKd0~!^qTzTCnSamI7S7o?EB*tlbb-;KtCF^Y2gSfs$)~NITlWF;zPv@)9 zMq~g0IcDZD5hJ9B=L+5SVIcu)*pUKgL5jp?b(pWiS<0LVtQww7>bZv~&U;+VfdIlD zJ7H%3fahX?z5prH*|T&~{g`SPdu(CEY)QNS=^yKm|AU#1PbJwg&G=uNb72_0X$jAA zpR=+iN;z$|sNSNa{c*>${5jUD^~zMOM+xoxQ!y|-o*jm8{n9KP+-Avo;H+OXc57D% zcqPdgUZ$INOFeyKXF>R;PSVN_>)gv%_hkM3R8*KmanFckh6mExd{Xhs2^-8w&&O`L zCz$d|+|P}?{xQASLMe|3L`-(9tN{~oo%cTIeMUs#7(Y&^bR^g-)nS{uE%(!n#1bWF z7(Zsq4qLQz`Ds{90a#~xL)3}A6GXILhX)yc;r?+7 zoX`KrVmEo0ntNsHE7?jUe4`ecWzOAM5{7eEJG!aKj>wGkmmx`pjq&Om&`XO?Zc&NvmVaJNyj?13 z97+5!8$Z#wb^eM_B-VpE&SK_tF~WkXPv~hIx_T}9ciq{Sc>i0Olu-Z z(VozUctdVWhmsYW9QO}jm6BAngy>c}|4Mr)n;{QSuxW(8bE3wWiRSJFH}+I-f_T0L zJDnaROMsplNUGwBJt*+}RJrGboa0eFnS~wlzR~6JQ&N5!%^dbA!<+I+Dg-RY3p9R< z`K?v@&S}@RF*6qly?(0Frvk%}F`Pr}keN)+XNuR~zgUnXR9xEMpmz?=HF2sa`*Z@) z6O~9BTB!oZw|*CvrqW_!*W_=A-F={0>XrdurmK8Sqk7a3eyK@U0P$=1yEfq-Qq0va zLK96@n!+nUpiLloY}sp1qfEi+_o-l1c)qKvsvv)L7H{WBQ?r1>2ntDuCl3vM5Bud6 ziJ?d@zuR}Kj~qj`nSY#~;YN?h-ad#q{VG4p+b{7l{q9YR)h|vl8x61by5;)(XcR4S zxAV6I-|)_HTwYuXBQzxy#~uMEZq`8KmO16HhhM9-aW&~d!>w+!UkA@S&tJ-cA9`>G zln)zHOQVOm7_aK(;-;K%WzCC#vk}f~xg3Ohk%G@IJG^x&eFGn8HTI5b>0>bJ%%g#xBrF)~& z;j~Apna8wS(4brqNl3cMPCxCwiGNp8xAPfAP@9gYIh-ud3bsC*sUyh-&?KSXVPd<} zG%2fUxtpgER`Is?y4!;kA(ApxlJcmNqJt}QTa}*_ewEmB$$EK>h%cr1&sLoO54HR+ z`I3=3|0U#aAe+A%^=KV7!^^h4t=~+7d`_+M@3>LjyrbOHPex!$? z@HwSB)d6F=Lm_Las=d{ZGjmLIM%%P)SzWrs(2?^h<<)hlv?A+we&hDf;s<#p~oIKDE_)b+yh~N!hKY}Njk?xG2%s4obMfZ$*1rU%qH5X z5O07ql=fK~kGFOT8N7lVPa0~r3Zq*^Nex+CAm;+qRQAS5caFYQiGrRLy{|%F!{yLp zwzqUkai0+|2xiXp*4{!gjSzJu5aUOVfZYIAvmmj4r?OG4VP$-#)6q1ihw8wJ1{z1H z#S=mLvG*b|B4R)W6obL)+E(iI?>?2oleGHRB3mgfUC#|B=rHG@#an|gu>|TL?y(Tr zz1V)=rhGPHwwH;eHAWdU<6DbLYzO2FJtdO@{)nKEn^9Xp6ue+cT23=H86-FI3PaZT znPCS2d`(=I;Dkm|kBJK8m1KsiuP(wtw*^bg0|{4mndGK}1;W_Gp6_gI82Hy;1Hq;7 zGmHqVZqtU9OfjnZYcRdmYT%Sv%tQs?&gDKakaviUh5wOe=i=D z{oWE3m?>joMELAD1Eu!o+o!HKX>3QyGb%HNf}4umK$y83ySRRtgc2_5m!i-Vsm%#4 z+Zp;HISRtdq49f9FsgyGv^LPmy0I80jd0x7*=-Pf;q$ya@-PLLb-6F)aVg-A#5}<+ z(>?Ed3Nja0fNrLZ=m(OziE!!1j3KIKJZ$V(q9kD%7g^iofe_U@YMWoA=|!|kgZJXjVe^+HLMi(}QQ^v}}Fo3~~FT{A)6 zq+Z%NA33444rMTNO`vbDtM1?mz@Kya?IzB; z(nMSA#k!W0f74*vspQBLasQ)x%OUK<2RAu+Kqb67>j+&gz=#W#J~CSdXY{ry^GVWXllv}$Bu zY4*Sp-i0WJ#&A832LHBuPSOzGxJ3ilH^itz5(}gZCw!0-Mf*(}`fFb`iqK^gC3 z5zYL+G$NjBVH2Y=z<;;ZX$T6IS9z;s+Oy@8-6n%>xjZ8AB+P{a{vJj_s;5O@rlS$0 zBFN6!XH>%xi-JppS_2w!!KF!sMUjrgrg+OJm*N-nINpxAAo`5Qp=PeBeoGx7ILLzz z;}x2i?QE)A`DS;Pt8VEi>U9XEQ7X3x?Pq5!*uM)1aPFz^ z&kL00x6P%Q7olKp9a@)rPY>0s@Bds74>(q5)7>FavhGbq9$Pb`4Sqg36qez$rKA*L z_NBRY|D(B34*mJ0JH~l34X@g0eExhAF!k(;;#Hj_VJ~LJfA4k;t3U4|wN@>O{k+{2 z-XASB?HWy+w;piXd2_7aDb-%vljgsIvY5huM#}r_{+ID*@8=%*omBP=kTQTtmT5i} zwerYL=Oo8FZ%-nlN!8Vx5OETx3+b9e+XRHUe2i-StQ}m)T*1)$M_%)z7}D8wFi5(o zQ-#+;Ri)%k*Rz7gfj20L!s@(B> zjCMW*o`uXKAbHt196O8`A+{zT;i8cpWJ}j0JWn$wS6R%rc)v_McHh*&Nnq`bxR)Lr zPuYgrS3|Tmh)DY}{t#-)t1LJ0BAS+^~OjWEOf_hzCh6hooD;V+{|d3GMKd2~9*zAw~a` z^v_SxcghXCpCt4W1K&!C^bI)h;m z-VMj^(aY<7WQ7@*Fme5HTCJEVRMd+5yx9a=#YB|hHYmuA#TToKI;yd+U`f!@e(7?p zW3C`5O;%E{V>a9*@iAY|=CNUMOXZ*Uvb58kZD=L08N?m8p(F)^jK_AAV4)EbLJ52amYxN+ z_h=%_OhkX&r{!^#PmL2K-6x9>OAjqFB-!8@-wfU86&?5j~P>jHJj1X zQswz3`Cw=!xA&ON)ph?C<0g@CKo|-5F92D5KaV ze`ZmDu>9L`nbq{H=LdBiB{$i${PoJk6d*pFcFSL@rZK2dvwj#oPj9obA+=e>{lP~%w z)E`|CsqavcG6&i%*iJS0)k5fr@Tlmgs+=pIBjOVi*O21`LGHbUHnE5K`MXYq3+C$w z#?~D@_20c4mUsM4)j6=#!d&SkvQqb=}Hg|>(8C^k`^OxMkE?OZm!x;)qVHDmij-1e=cD72) zWsK!C)kforp$tAI;A-_%%@>P_*`+?WWf$Bu>%7ytoSU9PFj1`=33(F-W`Ug=LD#qH zpzvDs*d$}fouvDGQ=3tllxfeNH;sU(UD$e71Sp3-ALcuJwd~rUUo;VyX`SrueF9fj z925>LJEkd7D=r^sEr&ReKk8)!kSfg6)}y#cxXSj570;%?&OxsMy!;2qWq!f;WRr9y zWki01@G`Hk6RDM1OU?qej~Xz~eD$ zv>!%)Zt zI35D4QZg0Bfa>)?pv0`u?OX_oH;FinCA!NE>f}M8fCg@Gq z+_UE|T4G7Z-^{X2n5=^<0a#6aY&0x0J<@jkWsw%paGS$bd?x4b#!s(iQ}YUC_}uZx zsCnd$Rd0nzNmse4k}H9G!&J2Y;MbPA<1U(=(l3-Y+(KkYSaUjuMR$-)3UFjbBlM~D zTIh@k5#fWE!RI@o@GiWev>582Dc6KPvDyT}itd!2$w%2UciiOR70^g{!^HoxR44P9 z@!{rlaytpebbOyeo;&x}XCX>8skO+Uz_431UZ8gs?}Xn9xb#_i&kM0`^gmB>5v9W) zX)GLWr(PbXZjd+}^MmR z-gVO*50q;0EK7Y#aA;#8!<5-hWIUx6<+Sj;A<9Bzh4meMx4kBz>b7$EWmZ4*jCx@I zQ@jg}-h|HIE~zER#cz5I!PNVZ{1eEzqKNA4>(~A~s1>~KchFg1;SrEL$B=Mt6*7HM zWm*eMqxK9L5@zA3YT8292|-6{+6>VC&Q)fa2){8u(J2!@Fd&NSOER(r;4||Lyy6D@EaKE%4vwLRqW~{0fOMv^3-%_0isjnH z8PrI3vyy{KJ-8rYnoed#vd6m0oc3sq!jIo}?&29RoHOX%LiQ~S_PG@RBPO8OAiPqR zV!@;`a#zRCdvoRfX>TUqs2ZxIn$TRuRI$G`@GV6sJNj>Xht!K4&SJXkA^`I@-+3D$Z2v-t zd%8v)2X=jgfqZqD+z$nCMVvgAYpKrI%1KDvJkV63p7JNm<1kY8g`=QeVnmOb>$-k2 zi^y%ha8VS0sr>+Dtfa*W(0}urdZczJO4Rb|)T4LLpO3aZaw@~M4tT^*jzEbFa0GV8 zrM~#?S8z=wD&p-8onOK)`nvkBwfp%w2yZ z(z^-LoOk+Nu7eD^w~4_>inz`zbJ(1jwB!skjrT^x@nD4UKaLRBwe~5bEl&Ac#LAdh;ID0jPBMUpC;s&x%CnqtGy0hf8 zH%sJO7Z!()4jJ}H@*`9VYRU&`1H${CG}1mCy@}R1BuKt1e~RmV92?Awu2P(YW5c0T zf(=5b#Jp_`{qn_DPMw|DVE( z&_o}SaN_{)(sxr+#?*e{4fx)x_hcRy)eN51?i(gD_Lr^o{`25jgq6T4Nx8})>;4iX zb*U;eAQ-yze=xaC#uOlgY(bm#X?P>PV{S1JtPGgyoOLftgn5p(Rhcw(#14$N{~<0- zWyGFWd^uf;TU1jxb=_>v_Sdyx7fHR9A?Azo$H0?{D0=%N%bmL&7(6Gq6{~d836k(U zG>j!BN*nHA`%+L5N6~bLw4YojUV#bU7FS7k{T(exq3C9lvbg)Mw5ekmT<&1xoPBNNRSnr&B=+}EnGDFo ztjcm0%ANmQ`wAOm6a3@0BM!`RfZJsm^?P1(&G6zL>mTDc0uMRFu)#qF9mCn+k}mgf zIRNPHY9F$cWJ{0giDJ~;_Y8~vgcKcy5%*>dQ{JiaQS+@$dudP_P=0BYP5Cq%>a-5T0e zRi)SD+>m5#PyR}h>Jc5o_wR+yf^pF_IVto3HM-#$^sI*CCLtU6pDvU%zS9Vs@O?d8 z4UUGd=A}%<#hxM75oYjO6+Jo`8}w~9=5=0{vR526X`MlNpHCn|BnvAh?^GN-%^Ouo z$OCI08X|w}>-&8bg8z&LJ&{g}qq-~BKaL(U3Qvr4Y^5+n1WCjX{qC!>oe5Bzbn(dL z?1$wkGR|GMu{W0&xl2~p<&^kG*>xt*zdAm8tx~xf2lcTyGt1*(dt*0d;buQG?Q0fp zEwYqJdzfo#vqg)}&KD1kfjVJYG{fWEV+|IYhG>8**7=!OWs$+n^aLid(>F%;eplc{ z(NpVv?6VRj?Ygwr~w@83uJG@WbuB8#5tZ zihr^|I^u%AmXwI5YsGhygMUi#@KO(aT8r_9-pMPEvs)T-B3X7-?wfuzSzNj&GsP-W zxN?>F^l{$ZsQH`~bLLz~-kF`R)RQKSnmTPEszkd>as(4<<`x01e4bRW{T)@ca)Prg z6bjzXte9m(FyR}PHX&f;V#$4{;KczKWEI@NUulo)?&E2G;41kJdSlolVzFHfxQ8#) zuhmF%&0Xbz&b{3TILW&|riHYnyG@ydnnG~oT(0dU@6{z`g0ljAVr8Ekj)_Vwj^9mQ zy0qoijbEzZ;Z(RddhK&@yhK7Wtyc>ci+A#;-KQL%aY{=1*B-SpDo&DQA9%0hx_3RT&bS4C#aOI^JEU8L7_iX9CwZ!c$M_9mZocEbGmR$5~p_bCO?PB8ttlE{c`g08@7t9+x92xonu@4kWiV z$_Pa5n{=eoEJxg}r3r)6NpF8J0aHvpzCr@EIq+K!v$QGOx8&Yu^0WddtNmZdhcSmQ z4=)CwVvgK(^A;cMLYC$GUSXdwqpS0fWGu0iPU^_C;Z@*Y>CVaXbUJTm=#>qHA$J;S z((Y8OMHO!r#Rm8c=@H(CZ3bw}bi>uHCqKk0`389Pvr=6Ne8U?F=NM|*guV#gZQolh z``9o0sKrQ#Ubf*HPZ;eg<<~GN738_b9he16|F%!?n5~`)G-j>qr5+<0KPi<;APr=Y zV9V!f87OX1#~UT;9G_}M+BsP(euDhYZl7sJc=$V$l?eg`0f~2b+Q^)qevj;eH0Sal zsraUU`M6w3T5@q{;hR}3L5#S7p_$;S*|gHGj1oDm{yR-uqUMX%?BH$dqu0W04DY_@ zy)zKD^Lo75HFyYuzn-bshA!2LcFJhK^x0Jj{jHuhr{f7SPiMpUS#gleXEu}}Dh!81RR2#FvThZWCVO_7 z&nLJNF43MsHeODh(Ga&ulDgyK896V?x-_NgAU6hYhGx4NJk#X8XG!D;zS@UvK$8cD%16L(XoCv+Ws;{fMli_ylm_i>Fo zwqT-AGCjvl+t%d18qYaR zOBGU@^+Oq7rbZ79u>B;)@Ww~gr9}YDA@}_9p~=m1Il+I0MI$I#6!uM=Igk|yv)#F^ z^q8ow40+y0BUM9?HHqkT-^taZLV1JvMZU8QU21ZrGc4x$6~o7?_{#BjaYAV97Bk-F zjzIq+$oYU}i{6E^4lsnkKg#eY`A??bL_dEi8vmT}SVc8;1n?$xReoPKtj6pQQC8Z{ zSZpp_{Dx9Ryv#xJ?sM(EZ4SmCu|qJDjcx3_Py=aH`GB>KfpYl6x)YNVHT>%}hVPF) zo?ph&ZIR^JqpCd5f9w7c=>( z7GkW}B|qsu;-Cs6Fwg>UarjH!BanodRM=ze54+~GjpXlvEyFc=;EnK3f3m66nuHoF_P%x!_ew~I*txe}UT6%^DMR1wf^0l*Q9>z$Z=acuq+2Jxq;Y%hv2q4Rv+>`Nx;k&nSsVl^ zj!Exg#_PkpyXc>I;&{(s8!;vjBp#zfBQ^J~vkv2Wxj2)r6CfB@)LB zL;(zf{e7LkptTN$7;*KMe%_$IB1*V5YhvdbC{S=CTbRO7pTrzKOGA%2H>u;DnO~1) zN}%*xYg#>dhkMMzjBjq^zN0Zl((h&CS zVE~@}GXWIA_~9LWCMCQ(R6`VD6ujq;6E_~KIHsui6&Qa)*n@`kmJZPWkPU=Pm6*PZ zgrmI0#9bKZ29gk+9|0b3VedabM|^@#+EKf{Cx3^2Om$eT4=nb$*JcB5W=t8p!_xRo zqj9h|VGTCq@YUz0K&|_w5q^0Sb&6)ci7Qrpi$dL$9`uKuK)!(PCt))YB#*-9u-M{r zh!#C3{^v-=ST^SZ-Vp7OmnfLP&%{3>pgF%qCK8ZN9AWYP1BkwxzY(Z?abBuG6fxc0 zDI*P3l*xn_Gr@dR1lSKKjr(ZSHidNdy%O@zVQHrTYWWL(t=})>*wMB7MV;(7ZYw>1 zH<>$^);49uK|&PGwIq4w^5Lap2I9ZJcDRTPwB~Nb3U7UXeU%dd)A^it{#{5#y*qI8 za!abHYkycW8f!V&g-gB<#oBI~99AkQ`hd|gG^6=LxDGe5FPl-F4T3^Gv~k%i+k&h< z8P|n;^~1Akz|Bf)o#Gg9$kkXGZKjN-hSwIf@O^2_;;;0?F)+Tirie2252yWKzFmK*=KO5DmwW8$_W3Px1kwv zUr=!3sV9>qohPV5%SGR49JF8dE{po58mW{E_$<2EK3_y3rk2Yh*TVS1C}M707sXWJ#%}t3{N_Q0aAD&T}Ui;_pr&c^eB{ zhh{|j@SW>!T$j)9HqE_FyxwA}wB6{AGPc9SCYP#x={M2dKiK*y=r~WEx54q&X!;_Q zcSEQ1r9R=F(n7?4IJHnzyt)Zn-Ob{-4THL_ED7C+j~-8hhg(+Br_w-mFex%8@gLen zr5Hz!`aTv;&`kB0{%gaw**t5iyn?>ldCe;!F)ex}SKm6f(e&vY;O!9|m-zVynVg8V13S1Ts{2#%L+7wgIhUj5R3x&cuhKm4)5Mu}x`3GYr z71V1dW_wkWhC0crXz~l4WJitT3juwu)nEhbpu4Qbf;^O}z#tk?Y70vXR}Ny6yNd8% z`rA5WL6tlRpRKjtT4dth6wx4u?~WDAeYvJnmQO3HKPnUg54w9}i13kto>=-y+U znu4b`>hh_ynFrz>n=t3&$mH|1fC+_Zu$`N68sB4YI zT-5b^+1$!jRrB9_uRC;5(N3$QGn)IXoqF^IO{zn50=rLYO6kthhIKIy&ku9$ToH@- zfRPNfe!O0Xj`MUct1OoJ<-TvtRys+}4;Ay8)UO+Ru>{^7_3}?g2gTC%M7#StZQA|X zTt=LbvC?y!?hI}!O zBXX%2PO)X|AaO)aeEr5SZ43?5!N`Djdv#*DaXYbRAi6V`@0eCQoVI;1RqBica|d>3r=T6|MMV#{m)v;s-sJwfetjNUODOQu~3~ z@tL7E3 z7vBEE;=-W#!oB`(s7FU50;NQuDP7Uu+YcbY4XkS^GF9s58qv1=it zmI71#?uu6Ms9+FOy(p^k@sO{qj9zT_T47F!FgDRDe2ANvJUPyk#nhVQLmYiM9yL$n z_$ShNZ*v9CTB~+jBI~-K6?w;5_QAzw^#&fAzKM<> zbcg5>C%9ca9ktINcNOYRFDydxYNtR3lBylCAA1AUA_$zS+-2B>oKx+=QNN7&c%fV- zh4SB{1IQB@2UP!fW`6&EFwl$~`WtQ=P$t==rPQij=?k&3^(VRLjv+OnCP=nJwl0sP zeWw+WcUVWD`|5v3i|IEQ@`{6i-BtABW;efl=S5W~AuAhJ zLg8u+N9E@D!P!!?XmCizIp3YbJOR|*r{L>%5(kgh9CL|a$2p-S8<5(>8K;r9_PcC| zmHxLV9xU@$GKVD3H3w6?L!H{GD>j1ueOUVEqDVsDwJJ>DtaTA%aHc+AQ$gfU{@1nr zNr-xgKc zUNV#5{#Q6%Lq{`GQ@^R1owoZk6Z?9%c@k)NAT*h9<1og!;pK~V>~M%<`=3wOiT>dHj`()@^p?<1! z=t|HluRjkkx;JGqS9s+mUG%?i_rG{K1oPrxbR$k|OJt?d(G2M6-FK3!K2Hz!u_X7-C4vH4a>qGJ!Rw8Y@nb90VCPnoVwU@U-ZmKQK~HPLdK;1$1VM5@E;lQSU^bd;-o z?t3R822V883LXbN+x!1_lbMCWN_)g4r~u0zT)917Z@D&J zdTT&@d-@Oyy{*Q|u1J`<`aX5iSaJI&p>3?tdUk6}dJq^&lwX0e3I=#Lz{W`qiMQI zpQy!GwHJkx$;({@LP`!YQ&s&^?6IF2f5KP}=zT%GvwJI-mz=z*Wc5W4>7j)%Y|(VB zda8k0t^`g04MZ)?l#+Y0yVQc~CbTx_=JFnk5(lIgGqbhsjeu9@9zR$1m){z6HyVyh zb_Ew5&m^VFKZ#g6pIrc#w(d$UqSep8W{&AL94KpQei~6Z&aDBVMR%31F)nyr2hx(S z#LO0od9f>~NaRWX$MZ35gG?d}Q|#FOa?@)+Lv>0aRSBObE*&}&iRj8rOZs5){aJH> zYZPKx&kC`~Y*y;ck)Hl3J^hvSa_ZF*|K`xT`B?uyXW6r6S#uIZ%%)Zy)G6<{{ASQ$ zPPJ}v;mlE`WyWW${=NQ-E%CcezME#)Z4N*{pBkCbn1_S}D@G0Qe)&B#&f79&H?-^? zZWlLD&Mc7l51>=@y7cv9^$@5)A2c6jopldN}s+cVFXX(UP(9!FG?PNxRSRwkR=mfbuX{fFJ51-d6=&btlRLSfQ8FWx_ zYwLVGwWt+Rn%am2=zw?y1!wr*zOD8<-<@rBx84)e{caF3lqUsTJUcrxmqc2a{ndbY z(y#J_Fx~&lRP4V3@eeE}0-Tmt5~BHzsG&nR1H0vNSb8*OK56r_G%OT9Mw4|KX8M0t zT2D;Mn*A}zw~!VXRZLJ0yH@p*H*bPg^0g@r!`qGKX@2f^Jbn0%ShTQe-`Bfq9z+_a zH3+&U+h|a+QOXLBw^*drw|KOJ?r7>?8reJu!OV0Pr2mj7Sp0<3`6fI5`F($@M}84F z$j#oEQabE}&r5i;WQqIB;M2k9XF^uWnE*tpQ+bDS;>h_p=&okAy4Lc6h>*QOgIB_7 z)x_xzFI^F*Mcz?cctWg2tvP)(g-dcKdvbotAh%Xe5~MnD360}ZfX6hOt+aI#Y1oy| z$v5Rl_)W&sN*>}gBIPiqNU{kvq0jNMC~^=_^+a~mkeyAfR8d@1WQLw=NjM>dCX1x*c?_V#1l*o6>%=Q8V28@CJswoLe zg1^#wCzzQsp$~h{A!fs#(^>cX(+F?Vt_E>6VA!ToPyE-RY>h^q>4&@^?GIZgsx9rb z19jmqiK<-?8H?SN@3#!s%1-B~zV^k6GI0qPUIDK421`N1=`zL$z5d5EwZRa3QIh6bmZF!bHvqJYd|7HmevBb zA{~g2UY3iK6#Cx2@4YQDd_MqkfOnkabkXgI!_$Y$J^Np;@827A>3=&(kv6JnuJDkm zm9C5(-y3!`Z)2VNv3N(z8~mg?UZDmsM1jpcU%Vo1Q?I9MJ2}68gf#X~xBU^mv@@B5wMDxjqDDIg?TVNE;vWdfmZ@T2Q za>+}G)P>{MtVQh#`^Hxp{i^2ojRian$}_Lg+!iVrm)aV8&vcqUtG|TzPkY~8f?B2E z3SrG0CNL=8-l2UlaL38@5IfW=WEe&@?+f@d3S z)k*`WTnQZ*7Y=7C?ZU9}n~_bg(-Ul8ncvmM_1#EW*ydp}uI2^UWn~dy2E^&j%KPPp z_pd6e|9PBQ|2fW?65?}pt~!OLZ(JTsS@%rVR?CIzETi=r zZ0%U)OI=Clr@MBRXz*?4fm*i4TZ$^PD8dP<{1kUZ{x-fdEHenAy{{so#cN}4sM__C z`Y*x)=T&JonmvH^+$I(_YNh1!ljFF~BRK>>EgMsiCdVx5qy}@=qCvpz@#4-Ayxbn- z=^>l=bV`-UA}=nQvSJ(803!RTX)C~jHnpCl9= zm=@Dd{z+qkUOYm_NVg)_ysoZkJ4?-uTcE8&e|IFLpm-LQv4uk%H;vG{$Lq$f+O?Vv zsJ5o-#+FO!0n!a%?`*kCNz|3-lv#Geu`>Ogixa1<`DDP3Jc_q-Styo{T z_2#tQ>xQ)OQ-|gao=coi+po zPQ+AHR-AHTy$=fd9>6m+T6gJ7gugvB-lyD>kzgDvNceu$0Qr3WV=9XzvMnO@LshP8 zP*y7q7bfZ3eWonQfac-KwVv>ojmTM@2%`(~r+>tayWj!Zz&|osZ$5r0e*11lJoqut z3>qj3l!-%@hrN-6LLYvjkdJH3d3^Vge5k@uCfCF$0I!R7j`AO+Y0DDMxEhpfRL#2EucsdIx{%b;UtunlJJ_ z9HTbc<+7OVKI~o^u&mjU*4S%FbBdMFosyhHJo&NMNs~K_2X`6YJipx*1Jq~l)hfB4 z?_B*DGU)hF6UqtM--_CerhqxJtuq-VisI9bno7Kd6U3a}LzC*yuWp&nK2Q;T;G) zZ1${ga8Tl5xyTmxZJj2|cQb1!8!#N*f{}ZPu)G5RQhNXf;U&BXsR% zD&=;z6+KQubdLDzL)SX7}2EwS$Qf)1_I#_4g(f>@G3jHhF} zsSB)^ki(N1#QiYAM9?)q=iU?TZ|hlsL;FYN7H>=??2;WvEBR`!$Kszg)qe~#N;8NL{uzK#iMai^ zfX|Ju=mm4$2ifeSbItYiUrUM%v<2-_;J4}v~Zi2>i3aefe z=mN7v%|6iqO+U~D>VCLdN&1Mtn)3O(uH27n?Vyua+BXjyaCdo1X&ZVn%91tRuY&|5 zPh)Z4H@o`~VA3!{zu>YUqpyxQB-$FOOOCv|3n!08@l4CdcOSz6_IXaurp%&!*^AoJ z+p9X9Ez#aOUS%3Hm5yV5-^KegX8W*1R3u+A0Ag>wd}P z&AU1H5&?=vHRFsjwx~C?&U+Pec&YM`_*%H3&AY(X+;Whr&j^7lEi5Iq{QVzOVZJhp zczzsg;r9=_Urs=O^r%$D;JLh|j+x|X=Yt!F{vL%i9d`(FV+p!bMl79Yv928UF>KNe zJ#*U%Rnfo&Drvb_Ja!mkX}$AdCt!2WsB4F3++vT(-!hs?nzaQc!0RQ{{K@KSx;c;6 z!UrKr5V-0q6|8kmYAFom5$+n9z}(Wcyw6uu?pE<_I}5zB{=KkK_JY|`bIUMURUdt- z?GJ_g-ngwUlym5=vp4_`A51!KmSj2OXx@~+?Y15W&8r8JAqt0$NAcs7RCX>bwv80J zT94f#$J1PUNH=;wfSt{)Ih54#*A;=e(G91U)1Rt3=g$?ov~oDY2*P_!O>N$mkRUtX zL65uyF2s|(Wm?9)ZwO^bdbpDIGIB+`$FF_?=T3&1FM4RUiJ_%_yF8Y9HZkPn3)xz@ z)dW5u=_in>cYm+3bQ7zB!{Eiv>#o6igW+iBW#EC(VLH-lf(4UbeDFdIaC=dBa#%SV zWp2P@`rc+KD$In+LzCHJoM*5m+miiO*u-`$flWw=2Qqe;3%%PbJ3BCO+ck&Kh?sx! zBqN>A1+sWAX;2C+zdji#^3y)Edvfi1TKe(h$4}(M%igu)H2j}YgJL}OXSoPGz}HM1 zzNF9ax!RUF%ixuN2wUTlSJ{$(f6K9gQal*S{pH&p;9ZZEOglx<#0+ekh+BBAk8_@= zCyW)j1&-u{`tYz6#}d2GwfngGSOPGLv%(ky|3yvy-vL|hpMd=`C&O`}T9>OF0)ZgS zul8cC80Q>jDt4B|EQW)=HuJ=Xye$|ScqJTr4ExX)V|^NJWfmLiy?Onj{b}rlK6br? zEnP_XvSR3e1a79X*!5&29&Ww+0V0<-M5DT^QW4<&Jk+ zB>PpL_PsU7$P z4E;#k^4TPg2qCt~{xaOnu{(ogmEo2&O1G)5OT3Zs~0fE69;>lTQeL zCq0Gy?T6=UPK%?MS{rO%R3Cne6lv;Eac=c*oK_wlof^C!OT(T!%y^bwA)Wa(zm9~VvVl?7dWVSmJL`RMc9OHjc5je=RJ0uU#m;Y|#^=?PUw=ks zO;!;P_3mm|xjzZeLNfH*rbP<2rZcsVxHpoA&7&d-J|>~Ez6!%Z`Dpy&vTYAG2CR+= zzeuLu^;iM*yqYzg5&;{6y&H5^NkbEM)m4nuItw927MZ++m8Ns55H9qVG0jjWm$!o9 zrd3(6m&wbb-$eD+rm;U78B>T4gY!aE!^@^`C4aRMb1+Jfysk&(e23s*sSf^F>z+5t z?Ro9+`0eSLzNRiuc~ChPmGHEPRfb`%^c>!~clwPU}Iyj;K=|4J6yD0 zzO4qZn``O3e;^ri(DLLVolj$~h#?d^l|3AGhcQq;zp0U$F43wFvs23y13)n^Gy+~@ zb7|&s_*|D`AN*CQADb4gE`JWIeKDP1j}y^Tb=R0E-xe6}?~ULE$HAi!m6YGo6|sYE z)o@x)4_t!u6Oc5TNeTpYrWBlv><%SkEB4N~-qb3pfuPLA?&=N`>$AZVer zZC=+zgjXWZcFXUvLJYxCo$tJ)dX&JsvxsK?;qoZY>H({CaULnlwMa-c25x)01Z>f0 zcj43a0d_BmvL5_Gofq`2s@qvj#0Q#JS9A*UiDnxu;_S!3H3FZKVC5XUvrz-=lI?;< zrtPNIrf)L?AW7zJ74wEQ+^m|W^*bT(uV$0ZB7A@TN=ygZb2|cWif%E|A882|Sg(>i zo{0urIxRswOIL!Q9=wOSTZ~uQ0NgAreX}(djz}|}Mtte}--NExNUrBmeLroONEPBS zU0V_f=WmjJJWaU#g-zG$IGQQg7(o{_4@sW}_C=&P4zj4F&m+yxme-iCRs(7s*(gzx z=a+{{x5tl_uK#`OfSz7kj^Ll5@IBIT0>QyVbG(z2En1k zDiY8zdYTGoG|T17OnUU^I~59|+-I zPTln0oKN7CO4Bj{%u3{ZDHCxk>K(0LOl4HxmYIT4H%)wn#Eiua$ZBEGipo5_SsxbR zB~skhe*UHc-|$t6GBX(LFIPWmu5CZez&nyG#zJ*oznJ;=5ijrkGh@!RQn*JBsP?-Ij5Qx8hYwz=Oa_K zO_IK{9keVO<`L6>lPAgvja}(%=_UNytp)rX5u=+nvzVy*M#fG|F!Rm zEL%~NR%jJ0OyHafdEnLM7n43MuR1G&xJB_oEgh7$L}2qxLX^3l3GqK!jNQu%%5~my33szT)_;Fpp7Ri)YOjak9~8z*nHrQ>>{8`NI5sReb#bn6rTD#sk;<>|;+G{F)T@!VQC)9r-S=@Q4R{W? z^W2%NupG>@?y5ODPqx;6vOqsQPcyJcS^{ri>9})TvaAMgluEpccrpEUmwXX)j!R9* zD0g&$uIG$VgCb0nXNTBjTV>J$0b8G@q z2fwb1g*}_CvP-^rXEK@PFPMHz%}ArLd&ImtDFjQ!4F|wkMFi6ax{?DU(Ni@obTa)i$O+#)>(!g)W=ZYh-&c>Fy00Vg;?R@#VP?Guyun$Jp!=>qgxd;1Du)KPa%Pb^RN?-XF&Axo9?dJ_VgHI#Wm>rOfEeyX zTHhVtkPe=m{9Ma(@%be!L%cDqO+>v=rtGT=>Qt+o{l;aCA&WI!^Or7b1sBGL(`s?Q z_$QJ8iOY=S?t5%sGy-bGx2U^wNpH$JS-yOpinmfCQHf*zVt`w1u3YKaKTP8{N0+AG z?yXXmM3lYgLK$65!(QHWDK7^8@t)A8{g;0Im6#mAPhrk#t6rMv%y`mnX@%(Y{jCJ! zAAL|{7a%K!=3|Xx8-6d3&_bwlaeOO6^i5w|-R=@Nn(!~9$pzz2!&_EB0ZCa@>_qvP zaSY1`%LGT$2pWcq;&tK)CWY*QAp>}{6Wz}6LFse^BkQ@bv?D`)b3-$}OmCD#(8f0> zs^=3dXU3jwn8hUns!5)H-P+d^s}Sp*bkt+uuXaaaWy1ltq-IhTU;X>*Tk^Q9ox?Z8 z8<(gqfqU50n~0l=&Bmb6)VH=8;j(dS(CoK;tz^T74n z{wvX-`&Q43>ppTzk&E_6)>e2;yVG=;4o?@!na5iW*)d|5G*XwLG72}{7U1QA9gk*~ z{)P0@&3F!xCZ}jIvh-^2QhN`)FxWHIEn5BhjJOK9`@=()eY+d=^YD>olM|2b--kjB zj2cw5P`7q0`nj*H1vXhwwHel}xe9|yH$HG*r)X|cs2Nw4Q(J8SEgjbPP;bj0RWyOsGpu*H=H>TF84`5NVssA+7ym4362-AfGS+)H%blTgS;qBKzaj z{hO)2?c`*{7VjD2LF>%<R_R7KDQe`3?Wd0HG@C(Hw}^NCYl{$d-f#8lPR+&7(^XHI!%+m#NnI ztPQsqSq?Y;HD!0#&RCHQ&G7WG6ASA|eXF--B~iO;;<;P~P+D-G+V^TtlZ&R|5GrE7 zLEBUqs*g{^VQ^`_7?H~Jl)aG5zy0RoUpy)e;TTkstShvkz677!LLYfx zaXZ=g&$%#l3;G8H+G#zEYp&$6I3HI7cOMWw=Uh~R)753)liAtGGpLN}pnI~dGSlD# z(9z|Hdn6B`!-W}(sW46?1N!6<3dULL;7!f__0m)|Z8R504U!Crh0|U`qkA-{^fm{s zS6-0L!_n}WfVjQ27GaFPY$k_WtbIZ`Fs4T!!}hsU3SLDHq(RA-q@I)=YQoo%iV|4G zA5$Utz#aIK3Onpym zd&i3cE2Vd<@=td%kJ0N>uW}^7Khp~@b%Q5$0s$xyHkSudIO?luZVyxgPzpij(X@$s z_8U9pqq!TcV9SN+W)d$mhC=CxB2knDb^}Xv<$&sMJp{RpI*qFNBGcsw`C9U)C$#uflguSs@8#9^yT3{t`E_^Gn3bpGH?qw(n{yzJ#IUW z=cWwlS13c?W%oa>y<1^q5O+KIw%6c1l0gy5!sxWyExUxZ!A1H*-@>EPHyc=wdzJ7_ z^pmx{D+TiuV1+)}4@*?!x|M>K_WS-KnaUFb58d7yqjqCyld70K`p3VqiQ*>ZG-<)) zUzc8|&@bPDgCxhGK{*QQJTd*@S;F7}?>a-V7wXONKtE_>KbSo0 z^qw&3a$y8*nTM=*mXa4!;;&QsODG>{LBqUI=-ME0E0H3lAKS5zLR(rob1OMBYoS|~ zV9U>>6aU#5CwN5ra1D*SNz|DUbJ=vq?Uf}k_JN95xZU=+IQ!{3;i;}%uaw_T&iqKI z&thr~dx))D!+lASOLU79{8QtXz}rop#Ja-uPG8~nHz+)VfQ zC!#|ld8kdxn^U;Y|8CSZ$od3&SM!oiYMCZ_$9n}Z2YK{mfxdr%%kdxgCH~nu6Z|X$ z?Sb~j+SmJXF;3#qNjWLSQ`aL~ZV=%ivu3BL*Ile*z9n~UL7TnL$n%PW9&Tsb0FDDG zCFFnW#EP$37Y88R)Lji(= zTSX1fliT-@_Z;aNw98>T3rl)u&f9eY?S$4X0|FdLvrfG;%&_ByGUiV}ZTlAK)MK2M zONu}L+rikB3*bJDtzRXkQ zaYf$WHO&Q*WachZz#l2jcbYYMNb#)_*VJN*7ZOq4y6=g&ShcN0bXUK`E!b_<&q9;A zZfQB!h^~GS+vX8`^Sf6)qSC6p^FcAUQ}w7tF_FdM4gZ>#Tw7d(TMDj&r~@w{k?yFu zk_1!vO;M|^Q}IGsW68uPHRaa#6!-1#;=MNSO6f@FE$ROp$`}KVQ=A_5PkzF6vvrlR zZXk`(PKS3l>B{D%;f(h5oBA!v1`r=JoQZQ53fqEf`{LG5bEOL!i3(;uq~d3;w#%5- z*aH|FMp=O>6kU!oT+@zNH=z#lHt}#a!26N1lex5A)9t$Dz9@-nw6k4F<$+xGZrTt5 zXV^Xa?hGi8bP>+GXG#V0y8|Au>Q*yt_f#0fVpD5o_dGCe5fcjIC}a!}mXjO(2+zcX zu%wwuB7J&w;X*h=o@E$2nSU!yz6sGK>#7zQliADKZ~iR!d_l z0CbBezr2N_`vBGU$QGPteb9P!uLwD28%TR!*l3YCltdiF6o$zD%eirZvK%` zUf(?mO(Dn})*;de$n!twXVUs4`@6XH-tm{!S*pYmh78A5h3-&XC&vn*v_}{OW|acfMK-A>CQwWaU_ZC+FXMn z`S^#lg*~#R;sO&O_E0>Q#(o)zcufd4)7A*WLiK$i9O6vxgDsP+uD#6fc1?ulz=5&$ z2?yc@3wXE|aO_*;9gph+JNC9Y4?^c|33PV%`X!AxC5@NGY*z0jlZxR7aIE+^GqvnV zCz0*ANJ{n|AvnaAdDRjb)rikH5_=N{AHx2Z3aSXj2V zuI%_rL^jC!7p2#C{PkV@=jtC<{Uq0B>ntPU`(E;l&%1OeGhEd~KP$Var~G#6fb;a7 zj%-mACg??tD|{JGQd6PC49UInCoB+bO;771Nn)JclU1d)8Y7~ZTJb4aqR4GLYIVOHG zzu6aqXkKz@%yNY;++M)#EZV#_zH1*nmLzr~&c~&F0Avg_4}H8rAGIV-zcMY!w??T? zt6jYfg! zSK2&XEbs)R^Xb=Dl2mdyP+T}i6=_pJJBJO8Il>MJpB3^A^ku@?=%7Iy)kNi%HiI)z zCthP!K7F;M{B8KG&zdVlgIH5HXM0oXk7Vq?NnnO6v5w(#uo0`vF1;c>u!v0034HZ_ zxW&hEv}GwM(dA0&dno4%7Ai%vQao8qz6YAfM&a`##n|yJ1~2~)*=ds>chPF(teks) zv{M$3Xypzz=RI*PT@sOhW>Q4(x|d#J_Kq->eaQ`$T5_;1OG`2g-?#3489lykOP>dR zfy*{`CrZDkJV&%Ka0or-tHb5<6ii`O&4iVlOcSF~yCdUv23Wuq&ns|{2uXq?{<$zd zZBw6?URq*G=AD6+`&n37=w<-~r6?%TMaOb>vTP`x_4B{>ExH z=EwTjcj=dsOo^G+$q#j)nLXF3Bgfn_o|YPaxN;f}|2vOwA{OHZ%**h3(1A-njW5h% zGRhm&^*=Sp9jXfY;WO|9ig& z$pKyz8sH%<4)0iK6Rn5lhl zY%M&(SGk+EHW^bWi>&8Pq{%CIWBOvF#a)JPe3T^M*s`Ho(8a_@MuayM<8FHtP3c}E zkg+Fx{Zh{+YFD%9RCb_?BY4nrymBIO62V^U`-SmGk!eL(gO;NPM%|1>#qVXq6p-y^%?|FbafGb zOV363*y6m(LeLAGbaMC9gwg9quPEtYh3+bUzWjcfV{Z3&Yy>Hx_*CdBS1-ajb9+I*4OyiLi1~76Bs)fnuDu^bJmTm%4W}cfJ#wvROZGDJ0W_vlx!ew*2)~o)q`Lp%8*< z&%bn2D|aa#W!;XtqmE_^Z(xRHF;ro1)#wE%jDL#L8JB=C$kuu5!9HIQ=2jj|W=84ha%xI8V@g^I7y1ufu7MyB@m7b)1$q09>qPR(Ck6 z_xabXF!uj1BUsmVGo1o;*TUhPsj(dK>W@9)t(A`3(~h?_`nZkEVW3rh^3#&WRb7yy z%FNbOAC9nXQj!+N48x0JCBAMlGLQB5>LtD_DfJeL>$|<1pV0=B2Nbl7Z_c1O`wL*& zO4tG>>D}&&<5o@C=mBmJQ`eiK=xH5?=r0O9SCf3o0sK3<7nSONp)rN>TtBR1Wxndl zE-P2wrfFe4-j;E1V`&x{?VKIU7AM$0@b9L(Z=js!Y&UDTPytsO2HJwAu1Y=>xw9v1 zk_>|Lm$!$`UPiv?Gnj0Z*kC2n_#cd&WmH>H`{i3o zad(Ql7A@}X790W;D=qHs?k>fP6?Y3xf#6cy0u+bf?sU?b|9qSGUGpiQvR3Z8H|Oqi zp1ptjkO#u>y5}LVpdS56OM-7R{(@kRP?M^~*q>+7plL9SLUtzz%RGdC2Tt=w{^lph z(piIX*I*l={^pu@@SO1Pck{UNS{Ql-DKc6kQ2@XpYq1!p<`0d0$mON7Kp%?_0n>CMw zt&t)X5mL$C;l$JuIL+C^Pce{>G+1~NB(X;Cbw4a(O8lG%|C}>mFea|g zdz$A;^5Y&@#e=QmEgSV+H0&wRS5TNxR>Wka`j$2QY$e9o}S&pNXpc<0Ty3IvnK{J1G<4+!7Xst8zT#mz zj#NkzCOC+)SKMUf^A4Dj>q)raTruz^vOwHUmASx6HWq(QEX5)&H8JeyWh9 zDmH&)VHvwoMfg|_fi1-Qk!}Bw40jsd=4-h2gxszIQq;}Ek-DKPpUE!jci@z0CE1{q zYgpgtxFn-l+b)jcC8k+Gg}ar(ig)Hed%v5SAe_IH)w*vB+Om&kSU^?!&RJ+$f;nym z#Mj~;@o_77PcM95DxQwQ2SHSyX6lf#a7)MRP%G-Hg`!zvo0h1k21l#OXwy^8sWGyh zfC?|xCeiPS7jz}rs)FZm>H)#*a&1a;8rlAfL6HX|L>8X-UiAo49jVl-k!!AmPSvLp zC6rQ)$&hHWetxmTG-03ICH?KQiMpr+ao>V->`i*N-xyjHnhd)_B@-c7)?%UZ*6^H6 zaMJb{1g!mQwD!>SBBFEy&m$g`n5+{OvI@PS(Cr(^soY8EC--d@waJdp*C zh&)YUrDI_O*0hHL&jby?5ev-enR)~xWW;|*tNYB27eoyF_ZhcLRkYt-Ce{`a+p>ZNXCT@%-3O|z?VE(e4 zpizh(yi^6wy0|*Whd*_o3ic=pKboV9T5|p~J#%_k3tfwiz{Nr%XM@+x7&U5X&Kh#3p=Inl8}YBvPI6Uk8j1FR>55M1<S{YGRPd*)-c#-Ca!a*_Vgaw=i%Aztk#hwKnoiFH=$w z3$2JX;-Y0931lGDcYfD`hqF&x?>6~4bv_OrEjRijU9eO|5mgy_F_PjSh~gtfMv+VtAWeQc<|q{TNK%0GEz(KUK-Ajex?1||C<~ZL zCTZCa%UK09vWU9oOnR++G9s)GD@JaR%2{t=*tfEe%?8QVJLG^!Tayc?5^O?}QJ#Pvs(88q{^M zFZ^`(_blgzJZmde*^e?npn~sOteEl%xiQMDm=!&rUu|^I%w|AXU>PN{k0J zY*r7lE^+~fLWe|F?g&%GcLCXy2w(j5KIi|?`w%54^T4G}x%-FUYK&<>PfCVA>)uZ6 zwRU-dz%tV;cO>#`(MtR3Y+uw{$3MT=c=~xPZ3|_NyR=u^o}*betBycNGSpu(wo_-| z>Vu0AlAs39u_ zGpWpKCrTfFXjNcM8V#$*C@5&)sGv> z;5|B|!L`Kv6CwIG(UJG265Yl)jIAxlX(WzP%d#kZ3#|;F*9;0f7rsYoVK+G2fsm-5 zx#x0jzE|!CWTJ1T@2$)<&XtZ;ONp6`G9&|tYX1mO_8CO$#hG%z>iuj!%H%!D;B4+8 zvsBl{2?u0g{k`CZ1+ChL66aq;43~VvU4dlF&K7$RA`0@g-L_cy(46^_A`FJ$Py}VAnmuIF~5%gt_ zJBvTad!$c9ikNgl@K~n(8+mbXb-yS9_U@Bfx;0%wT3!fX@iYQU%opbTICoaddUFar z%FAV>vcl7!ym0O~F$GFVE-3|-ih5vBAvXR@KyJ}PH`&i=`Q-l42{sV?5Fbd7y zYO`Ny8hO1g72CXZ?G|}H@83gi)u}cRNLH54#IvHFz@yNyyggpto_r*cs$X_J8_(os z!^n~61DkhjhLhOt3@3pGPXZ)FV4KD)01984-R~bkBCq$`s>r1+sdGIQuLw?j{c`^~ z|MxcN3>hXP{nUC*j=sQ@PVCp{sTPL~Q6c~5`}d()kGseMU14nI`tI&6J|tZ*u}g%) z{^H)&IIj_b=IM=UA`MIAk`3|XKw=f_DO(e~m%GRZFSDIU7-v-KzgR#L(rZy@h(pul z9CpXfS&E7vyVbwSy0LW~nC7x`5U6zIDL`T<+eeOgxXvR>MBDA7Q1!pATB_4e?F`A1 z(C=Zfrq~tl|^8ZajNFE{AwWsq#CAzh?qRl2y$~L_hG~*;3L*GC&~04L^aTVv%t9>)wQYC%Z5z zF}{5W6{Ob@vu0Pvj*%K68>tnpV{_q?4}3-<8E5!db2*-t0f93~+q(t{#THB_5dmM@ z57D{GW6}z=yUQ6^PAO4TypMaIp0L@7RwY35lGU_0eNzr@aSx+sDQFMWbYG;pRUbl$ zEaw<9{G&1HbhA(+$!@u}t;c=WI53itEpzegsUhq5NeCC**cowMdKuIX(zZ6j0LpK6 zzVXptaTFVG8O6>_{XaCuOFy!Ol75F}Le{AvLGM@+b zo{2`+R9)3ggz`Cz&XyC_cx?MbIcn;s7{?A686nltJix23n%80l7i;09gH9kUS>Ae6 z53}zif*PV8F>+GH07?H-agB+U5Z%0#1akP67yy9m4z+*nQs-vUft~R=v^RhJqu{+D zK)o(vh?BFWG}SGG1)OEr?!)Oj5s2I{#j+>H|I5OPnfqaXmHAYCf6<||Sq6$5YxF_?%J>~TBADJ)p zF1|Pa@Wkclozdk7cj*j=0v0?L!5*Q8AVfL#^T2FF=?4Rk3s+jvT&4e>Kd+>cndQHa z8x7SoHZtsRteb5m0aAiZD)mq5)y%iqgWE&ZqBglP%33qsitlN#cQ$CF7&U=>B8%4! zMOxM*#gfMFA0o3Cd{gZcB!$;wW7ZI=-eLdjQ;m3}RGyQ=K3Tal+zMUgTG;0wE#B%XtxXlm5YTX&^iF4X{+|HOt->r($3^M%(vm zIr5-4_&q8Cnw?Ll9W53kIHfxON|@OkRSu(suw9&)9@c%%^kosI$_L|4rsXKyP9ATA zl|~#IoTZlkGcuu@1!m*CHKH=?hsF_4o8>0|QiN@t7u}DSHdAe3`K=*0Q?ghG+m*+;Dc5!-dmf{Q-DRNeGGGn+m(e?=M4gI zLsr}BI8F~d`e`*$`^Y!{c2Pg6`gL0l+J0Sh(pGOG^Sm$vT)Bu z1<%Df(-O#^y!qHw{-8R|iTA$W5turHwfa+LoWE9a*ZYLAeviHY&4&zbj~Bu*x@(Mx zwn+VX2jI-WIsS9&R<-7RiK7pinfNKkuDu--b9Y!G99QI#XV#OLkLc=`afUr&&h%Rc znO}89bL8HiX7gSZT4DrBh%~ICsA%!Q#bSd%#dx~c-|y6lo+YA69$pSyBFW7p6*7yW z+MX8BeD3pD`PiX%`F~+|WMfAU7t-K1R9%47nk`$ryy~#G?BJgU>nX0&p`@~x?Q#;5 zH6yMGt4&n~_zc@^Y5>3SR!#Xz$#x;OF@$9UwTvUUtoS)0vXbq{6in2mJ!)t|sq!xz z#~Z4#9mb69(%(nrtG#pk?0k?145)T2y5l-NuK8aonZ-!JlZk0VBl)rI&DB9KJA}&% z;SJ77(1lh6>52vK1>p`ET<%V#2in~kT`aAuNS_6K^Cx&bljyL;*ydX%HmZOpn@9!V zc^sH>1HG@{=38e7B<{`SUidOa62)7jG`Gfl!^E-cX|XbkK>zQ(v$P0XfNGKWpY#8(UZ1AI8@ za5q9~MC7g#dy-(L1Yc;bpg8d++;kw|7#C|8F3j5{c9J=gRon-}SP@F14Vm?>{Y69J zLu%ETNVY?zO*aGC5p0Ifs6z#@?bo*0=$ido&Zy|uSk0OXaMHN0^_fV8a80mvQ(|Bf zdO4tj{NH>DCdNJ$mmeb3$^@5ieF0o*ANnJ+0lBD>6JrucLm{hFn|g8y;=y-edXpB_ zUSS0*X@EvzeC zrH1at_9(>6nYy0(32RpMG$5@N2~O!$d$&f*s^nzAF&k|;044Z%kK)f(B9R5482V7? z8t*IQq*?CYGsVq}s9gp8!TVXHXNbS${8E}ke)=`h*I8&4u~1)u_UIDSXUB%ZcWro! zif3=6;p%_y733FIIZ)IwR;95<>vQjv>Xz7)NzAzjX0F{&x$TN9l@;Eq;Am=xRo?mU7}g`lI0J;JOdqbjcmVB<3Zsdf zIOvcL3tTb>s(^_-z$II%hl!diectm1%A?TcV$W-{O1|hCw&nIvLMyq5H@{`);Ey7% zP5K7a3f`_iMc-|iVc<$&N{n^a+_w3zhaTAD$=HJ=n&jdv$JDy$%;0rY-D)g;9_5Pq z|K&ugkub5C1I*|<L`K=!?diVfB zl>3Z!Of(Xed%vJu(*fG~ni-tOOOkvwEtJ=|KB*U~6{tj|$DaYvRtjjWx zoi^Sus5(>A_DJ9qHe99+m85~Jj)<%Zz+|M@%JW!2o(I$NGz&z1xC3e z;`w%^6y=7`NTjcw%@+<+}vMgc9bQg5&f z+79Zcx~ZK6tp8YOLax&PTFlf)poeY-mmG!bAfznzZb_lLAvaGMwhg@a~_s zLN1#(3Owl-BJnmeE?D34am}rXIY)RW*jv6-g=kc*QDNb~L>195Ca(mx?cjQGpk~C^ z7F<`V`1VJM1v(aO*!_~0u5vU$`}W*==WK@yFG~gGWTK&vdZ5vbGvIdFO}=bx8~ChL z=R*~49%`6Ov%mhzIK^8@Y6Av476lkl1*Lw9pHK8uiqvj-Q=;vI)Ij+CFTn{8SwSOK z^7S?!Y7Wa1g@i?=F}A${p@9$Tm+gv+4X0}4a{n`2Mh{9d4=HgwX^j?A87*A-`G_cF zzQ_k$rs52o(2~fd2M{sq+u5Kp!V)4OxL=*gHWWpSl@r{5ydE|1H-Gw@*snilX&aZ^ zWXI%EYe=U0640!yg290Yyvc`2~FEm!zj}nAF-k1xV)D z_DYPcfjpu96aIt#$H<(eAPbX%YeSLBrDP@C<6l->xs|#}dC~c$ZetNNDKwcunc47t2C;Mz0Y z$4kOS)1}C-=FC1&Fvv*&lOf3*Fz|P^=Ox<@mfDHG{vhOD1=C&$gmCRTSf3xWB_ecx z96F%+=?d$$W#R?#T8`prLiozJVXc#*gdG!qRd8NbCE1@l@3&&&9aB1Nnz>E;;Q#HC zfw>int^4gCME@IoY!2&Rl1GYu%fP*#1ayti4phwJcj`-W;Ho;zLo+!%JWQm@#A1$9 zcKG3gbE^2r^sXqL%MVB^Afhqu?~_ED(N@LFrWiJ2K~ye$dBfG5*F$#r2wsWO9^0Rv zJ2R~Kgn>xB*om!IbB|H@BNLg5)5SPw&BuooAxoa2{JjzNU-B&!1@P^3Hsccg?lR+? z2A|4!9r?f@^zFAg;Kq1g>` z`QuF#n_Op%(J0`6l&6NnGd#VFS0XnUAVCgon|i*uS0EO(M~0}M@JAn1b-Y=ktS4O3 zRUxi)$z}G-&g!}fo$3fT8r`Hy*I!j5vql& zp^FMoM0lK{%12R;C@$n6jw0zW+`Wwx)GP@MuFJ%D>zaD6>;(!V;TA*>lL^laJjV9F z5{+vsCWS-5d`1ecKt=h4cCY#i#{?yQ2Q;4L_(Y?sJaW*56Y?@1#%Q6gSe>bBv37Vj zQHDoSh=p@e2j91ci^{?Gp2peGQp@C#m+<2cOtQ8%!w(ZHEAXsT|SRDvqQgvJK3jbFI&Qfi_H|$Ly}_B zm_d|n(lc7Y1t3xg;!hD}o-}z&D^vQ5X~0Rb5t;9&A(f4I=&0_1clKY6q~tx=ZI-#O}6tEpH0ghbc(=w7RH&?ge4dOj@6lHsL<9#%)i&Z3UdDweCqUYg8> zuy!7?RrRrn@PnlxWBGxP`Kd`BAuhSzpU2k?BArz^{{r-+T1690Ux>_Y@FJ-#S~w7G z^!mrTncYv7D17;64J1L|Olo`aw@b0y_VsMmmJP|GROn@_5ZIJm^otC(hp1dXG!7Qe z;SpE3F%rh3QX29MC2W0gV|-HNd?9r@?oAL&XXDhh7hJoFaJaCF-%6SIvhi`S`A~3$ zQC5J?xsuHD)8{if?(W8Gg(=~au02LrhDo7*_RpOts20IvbCy8T8Sbe9leYcQHL>=_wONl zz)2OW|At~c3yuRj{1U?LDR1P8dDRr&Ez~iO_+{A&ka=i{5JY#WHn4&nxmv5PaQa20 zc@bE&ml`av@7nfq8?etWsqe(%Q%8Wes6Jr-?zVFe#$?egSxGH~5h$_mGMN~kC?V<# zhi&O^SacIQ&ArM^8QIy8%-fvu-YvHzn!lU6(R}vdB9~P2tC}FkxmXJAs__g_tnYbe zW|d?ZJ#ssv#NKvqBjR&A5a&!mBI4Z?)Ah4cP&)AN41RZUg8&9*bGE0&|2siJy$;Jb zBRC1cyV>*IS|aK$@O=2IzFZ`iu;LK#WcbejWcLq?<9qLzxAATbRlGuM#kk4#p8P+< zNyPs={i8E(+@Z|{-dgr1Kglz#UmMACgY9rP-J3^AIr?Kg+N-{JZyPmp@_mFoUs@|CwM|$Km zQ7RR>ig_A4k$^Oz=f%{hIBaO!sNp#gu5~1Ya2MQKTr{WPbg{l*&+J^Em>!`2b2fNXwS%*#Ecvcq*ZF%vlDazI(3Xrd`cC#mBi>QI z=Fq*0v3?>(ku{Ub@{g#quwsi3$H2bX?{q;MQRxX*Ez9?P@3d3Qx_D%&X-nkEow^V( zV!wT?0BNTlg;#bVGxi8dBn~IC?Lt0jg;OIzi@ZsKnfC~`7%JAfA}EtYgB+i1 z^613H-AEJ7swq3Oq+FvqkKY=*QTp%9XcVU?y#nh$SKj03PkJK^->jo;VZKJ``B{!X zy@`DZ>2=_!pKl5;*)kAkZTsecuP21mzz~(WNEQ~qN3%D`(L%6e;Do(1PKY(=e6dVu z_Jxub7BuGx`>gTI73aZqs;etONdZxZ4lk20ZkypY$iMKz9fES(bd^W;K)a-oM?=|8 z8w~g>Xv359e?Tr?$ZT#Y$Ev`1_rOl{s>1^gt(GP{U`YdIGBcg+?9NHdc*Qh-rJrKyx6M^_Pv4V7;+@0nLothzl+WlRcGD+4A#sZ-E52w*tdE7FTG8Tu8~$54ieF zx?MMv2qjGQ_mb z&>f%~)9yB~4o{z%4_23;){Ay$*X%K16_P^utZCPE~@FW zQ;@V0tRl?ea-HW1YFaN!aiCMA4niW!QY!0->Y3(awk%J2$+j?JcRo_G8JJe#PtS!& z)!?LA!Hzih4}QOU|2|YgC{_I=>Y@eh{UkN)dqka6598dpW)2ar^Exomuj1HiN;<;>r;l$ zh5$!-_TLUtU)JCfG5Z^s4~N+iEtYUBH|Bbm8y1@&4!782A@=EV4oEA1GloFEIRUBL zm^>(UN_#aTm6$*}+=xo*m3W+Onb*~HC<7RFnyiS6-iVIK9v=vu1c;t}BwS;ujMKEd zKFqqKJsjy4vcU}@5KSMMMH13mQTpy+Xjl_~FEy1L-E-!D0$FRMFL-b*7m4Lln^wY| zNa(1p=r4Z?jFQOFS}V^JYx#D5NS~_VVV@A>8|_9y@5t*YL`uIccWF zs)+p=VC1IzEIdrfMB<{ZKcZa-RP>LAtA7Ki#;h1=!J0zXpcujbv4v*M`tetz$nMTh zz^Puz`btOXoK8I-FLz59BL*tvmjL%8Cuq;7QT>MSQ0$dux&di ziW!7hS=~lFYG-t5qP2C!y!4Q<@)k&^pww`J_yUn8!tv%%i3}nPFi)0JU?1iNCJ~v9 zB8k37dQU!I`4T##fDYq1(YBMO==(FJa9W=M<;D?a+B@laO5o2R6s~Xm?Kq|9BK^K| zW<6h|^uBC<(K;1atzT%m-PO^u1h&KZrkW6aBik3xI@u#`qi8Vj(XlB+Yv4t1Xr;S3 zayW7frhB8YtikR6q&e`zl}~d=xdVCvHzRC6Z+pv+1s)JHEr2ABaQ8OaE-2q zE#S>CNH2I=%i*SjcC409bdWtv^9C!?TLIxkE0p+KA`@4)$DebB@M9du^}x5r)(ygu zL6Wh*%ZaKitI`YfWncBs_|S^5SY)vD^%FU%GYurYN4|Ey+l`tt0f!}e5_!n@ehPI} z_oZ)(9b;i^Y$VFITrxs_pm+O_w@u14ivrdby=a)!@NgMz`q?c@)MZ4c1xsGLPE{IYz+BrXA;68>tFZlf|QAw(z`&H6sGO99ZwrEGCpHm->=IN7YNe*Vs zA`!vE$z#IvVn5VAcn;ukA!16ZiYrFesrxoOCJI{LVQv2=9R;uXW0M^Ja$N|FY-!(_ z7CeBHNCp!XDU5cdB{o&u(e9M(`5@gy|4!vJ0r`?B6wey33Hem_B3-!4Cah)l~RJH-VaS{d|J7%96+M-uH*if)o# zh~N5Lp2uBVRWe_q~?{V=iX?Gb~I9Iim24*wPN2ZyOW#!D|u&_T=(x2=@t zW9yGxy$OjJpHA)P{)Xv9?@`~< zDjAtqe6n&3795u{y7lxnP}ZjpR-K~3uJT9E4HCwu>OgTee)=SasoZOHHUHL3;l4RC zrDK}6cBf-c`N^1bSEpXKcy87qu7^ds`!?>@?Bm`^A#&L@T%Rp)$hy|Oz=Z6V#z5mB zT}f%4l91=*j`+^a=a=R%QgfKTSwQ-Wap28|7YG9j5X<|O-&FYpgc6dd&Wec$9dYoz zc!@$~-c2N$eI4yOGq_d9*)cXc#XMy2sA4+Zz%Q42X`kD8<9$DP3rM1w=Cy}>rSLs} zE4?Y`vzKY**yzI^`h_6WAuHy_{qg#nVZB*c47v{t1ugdj)+;cH7f}s^;ViV+8A4-A z^j@08g#_o~k%W@GVYWH$X1a_;HIbWC8i=pIiSdKf|6%n1zv&I?v;SaOD%nXkko-~E zs?iDE#j@QvS7vtZT>0Qe!2CJ=tEv2({!>rIJloloRm)%LVfq0s=If_(kMQ1)eb$F&#YkaE zmn>%r!&pgQn>m(H-^EFJr8yJT#{M>_6x7mF>>9uK3^U~Q)0okl!iFsoeu+s&e(F9I zO46~A`;x#?ib@vOwgkO-Hr=XjBAu+EqOylejkeU8bTihd9rZQ9u>4Jilu5C-Mou$P zP$V2G&}f`}a0_@iZAHAji=$OTECbKDp0T&qS~|$X*2b&V70vfkm^w^4`RfF7Ms$sJ zy*8gVUUakWu&^RJJz-LOPL=e|w@K;tvZ?x~3-|3GdyKbM$k_O9(!-9o zF6x8+iG9$>VFBp8chZqeHDQ{-yI+;fIpZtX4~vL}ilGxvAoex*%(VV?tO8eDdyT&V zx9dW{N+|HjPluh=`>ZSEnwgPuB3Q3Z&OBB@Iaa~_c6~$iZ~jq^i{Hk&kgN)XvxP`T z1{@stK5<>c`^?DOsLSzmB|T*uMP26M0LHeVxIjZ>7de=cONfGbn0$ zCb^SxAZDSjUG4KY;0s>tB554NIbpfcB&v(|BtOrI%wxa~+GCz0n(Q0v@6oKC0Ya478Cm*&nr@0aJH;Odp(@CMijZ zM44+cZyBQHA7MGJ+W?*u_6Rw%DGuHZj2n=^^!q`u_tx^eq=Ugi!Cs^)jo-O=*l6+1 zH#1T^HyhR1BB**9bnV7Q%>AUMiEON2L!6H{e7o=@1XdJr*Hzno^4`b6IrXI4kJB0L zWNsLSlEgV#XlZMezCt`g5znZfPh^9a-P#MrSiyYA#HWIktwL^c4)y>EA}V<$g+ z=V6N1uU<4Nf_&}(vP)JKe)BgGpFguv(nnhk)2|a-lcM!OipWn*wT2?m4#WC_E+PGa zn4J6apNOqdZxmAAW?#;Qw2u)d^1wfaaITQpyd=4EE_L`m7W!*3bo)Qbk=Xk(?`)}- z8?6lRA0vrs=bVmyJH9r&xxcL07?l%B{7k#z{GusFb*>)PhTxjrEq?v*DuM7NTG(kI zxMr5Z`r@T{60ZEEEQR+MpAm}G1iuC$j><<;$+H6!+i!{V3q|%y%zVgop;rSEbIVFZrnbI` z+VRJlPLI3iKb@1zulr4_E@ng?r!+E*wwtE-4;YFWZGr|?T#jHNYBF_(u%&nL>N>=* z$A+H$2pa3CFoW1v7O!Pmt-+{B)hcyL%)t0cb+I(Oc+~FtG`+SQU!_koPgT(;MZHTI ziZbUgicXPph{QVwMBOJ`p~bdGFV=aSICH6rnRCUG7jkn*GfR|kNAmAONl(b#3{h&T zEo`%_0(yKu$ngsQWZ@Tty=`^U?sO>X(gz|Iv&r%D*j>vwH8Csvw!t33H-&aHUG3wj zg>Wl7XWW!`z-?5(UZp0laM{SG=V8y5_I4)D_gHF=pBZmnfdqnxx)*b4zs5vD#36fU zxU1hUETmT?jJlRb$VNJLp(zUeL3;012+T0*=41=2H;sO~2qE{DY~F@Jr$E z0VX@?IjdU~!u95#pOOqCP*R(e2Xk^iKYvo;BM7%1QSjTTua9SgY>rgZM4AP8N~gr- zPq7;!PhlkRTrbX+t_d%v`EjW(_fY@j1!Oi35~a8DRecMBg^8{&#jCCcQSA@swvx@f zE_=$Q;vqn(WnSkU`L@3&XQwF?t%+D)AxitEneuZ^HPXae1K4+VGsOxuuhT_xt1$Y# z5u_DXF$NM<&+|V={%^zZ-sVVH8%_;a|KaT|@-{iL5K*{#Dm4h4CD8nvV3MtYwp=0^ zuN5_W)$7<^gKqPw6X~;c)i^Bnx?D*^(>$4pnxSSw)bRj%YF9PJxqHds2x}N>7oL|z@c7GNG?4)+KWAkRa-aX%qb9JNzxKVe zI<1H#st|}=;gTUcUj(e7EyG5( z?O_~XBIC;8aGksJs9P6E%DtnMDAyO=QTKKlxgf_mk`UWAV_I`#rOZrzZD zM>(e*R%kRZy~s>6KuA4Dr!o;&{!iXO`j^NReB=_;x!jufBYJJ87yAc*s@!|Jg`X6q zEJfKh#x<3(Cfjoo0=m2!K@>DcRIlSsZWUEMXQPK>i)u#K8T)U%RF1d|kLt`Rz%MC6 z^W@GN%c}6spSl?xLX8Sd@g=}5Hj8Yl1^jk4i#2Z6EwK9Ckz5-e!i;K(5QKQ!5a3n) zn((v^vBIDTG<4Xo@k?9`W=3{)rD%>*1+SJXtS1C;@TL3=`}^xKmgIlagpSozu(}2t zJ6Zo9FBh`fv5}oGJyP`+ybetDYU^xu`kK<9`NX%i))p4vAA~a{G=Ik9CZj7F9yj9# zYv{#y^Z2%|G%2i#w2VClMxkdebBOQXFX#s3+mr?PGP9puPJbx=7q_G~^pK zkYv+sq^nREK+3qUNxi|RvwMIblbo}w_b@_J>C|bm{ArR{|vt-Ji;og(%;z zRKnbBAHX2Va%o`e5JmZxmelM%!O|#Lro}wd?0&@33J^zoRO+1YQXD;BlYMN)z(%mi zyI&;P)~{3_kymDB*HJ8JIS(z7;kESbw$rq&#{<)HYehm$B%ellA&>{rj#ojT>)?|4 z)n<7AeQpC2C)epwlkdl}z-$u}IqFq9iUY!_5Tfwi{VKT?w;^e%>$J|O3&#&PYa0oJ z#69FD5xcv?jzK#MnDg|vVcAxDIXfRBRbL$V?l#`C+0#BcpHyn#eZ`=$cNB@2t3us< z`QboLv7u%XZx~Be?`}j4Rf(OB8>`Nieri8miOJX&-NkilcVgMlPgK< zPHyNXzbpt1`7GqT6FHG5vI-6lu_(&>sgf<|mR%ds5M+CG5N`gTh0G%JKL(wS)gIzP zZK%=2O_qzLUPFs97Jw<%)U+X!l6pJVWH2tXyB?_arj&yTt8%K_Tj}^}UGN zP@CID8>)ZmW;VIu+r`DFg`VHBh_p+#Z$sxZYMadT^Ai+tUYu?ooS*;XT-G;OS-N@Q z68rgyie*fVR&Ch#Le!)8uPicqPPq=ocGa{BLnxwqCOG?k^G$}`uPO20r9JI2U$5yN zHEOs#JWn6!^Z9n97H&sv)ZmzCZe_81=#Z~nEpL6Niv})Gt;g4{YVQQlJ{|@~4e&t2I(yMggCIu=B|0Vc$(rq(9~@(}tTn}^T2G>f!Yg}Psx|GF z647jCm5f!V$0xqB+{Af)HC12$G4-gC&H7+d|=gOdxa0~Y>ZHz~LWzq*1S z00BlB+b7OlTOeQX?t?49KM!=sQqp%X;vq7+&GzKeei{R6bh!q*f&Cs}^M6TI8!nX{ zm##ggg&*IdT^JNP`S{2;N~Bkvl4A|aX53F9-=6NyJc{+Rd_Tk5X=Xm~*&i$FjOUlB z)(s#O#bDWkehT|NHdUVe`4v)wk?IVqYDlYGVD3H;Oa0h`t@_{0Awa2r^=}`tVt=Ro zkBJy`i9`yc<7XF(8g{iTrL!!o{E{lsp|}5`nrmV6appu2+o}tHsyZmu#t|WpZ#V^T zx)jA_*i$~R$IjMtLVc_XI~LgbHea1#Yqwj|;E~{JQ+}`4Y)#wY8Kmp)S^y60^@s>$ zn5CRj+%3Jjzu1odRH;!EzPqy%Sr-%86}@9%DMIN`5IimMM}x4(i7FFTgE;x1B0{LC zYk3m^E6n=OUgw%m|NbE}krB85bH#tMBq?<-s^Ur68>Oh%F-EH`^8BSpEEL}A8cFMm zwB^C_ieTqR8DiW+yfGe=pj2l$>4uYKT{8yNNO3G=xtyx1W7^6RBBueDN{x@p4aK%f zX$>EX1A7Ay;VJutO?3c(LjDNi^t|}kmo3~~sulJ2i%pLTUSx`VxaGYhq@n-(Q~hCx zZ*69o9~ckmqWMZFgYb`Ik1i(})-yDO6> zS3_q%mIKOyZ?`B`3hid@mhI;x@4E@GBWn>i#1?}8-rFqBVCO~pcMNnZUh_Hab|UI3 zn~*Li_^;^>{8~Ib%Ws{b_^raHdb60Y7{?vd-^`&SR{MIezX`;8^7R+qad8>T0)-E{ z)w~9*9_6834Y@E)cCyH;CFZi^pCvH-zCStII~aQ?nxd%f*%HZTApLLsyFiwIH`*XG z>UTn5uO;Q5xaW&PqU5fmC6e|R&b+e@(FBq*KcCloqgFSjo9SAc0*Z{8{mP`fr{0n3 z{(;YwO!AA5KrM)7#>|H2vS*u(n}am><(gkcLW-9voJSDk|FQtINQO2$Y!0_ zwqTL-XpDef|D+JRJm=i!KKDbD7JS@!>t7x5KUgkr{us+(HiM|ax1j+~ zp71;l;xC)_i(!Xa5%+Kzmq!;By=P~CU&ap0fBl}fJtlR;JQUNT`HNbeJo7NAql)3& z9WnoT?uDWr?HoM}3~cXo6AVlf(I&SJHYeVX2w-JlHBx=+;k~Bl~0>4?sxi>J4P$krKsaG6ugV<_&^_EzzC z>DS6%%tIrpb&|enP8+!(2PAWkv5;D03~v2NLw|$X!jNFE{w@SV#jBn0w%wiF67YQM zpD!&~&T5ilJfJVgI#8}3WTCBcRcbXE4Hfi1@05JGIAz#*O?P)*Ra<%yvqqktBcb=c zuaOR__Eo_^XXmR-yR}u$ou+b_!Ynz$b~p5F3ua}bU$|JdrJ`T&lPFl8SKiLpqwy$` zo*&4<^4|NY5B_TE3}0;hn~i}omDGa6shTXysS_Hw@ye>`_Xt}(nI+0|KaMer&X_UAzRNLF?$cL+xrx?PY z>}c4^el|bGm-vstSYc3;TU1;OjnQzm25W1>XvIGEuT_|Lc1)G&^yxjDS-&QXY=*K$ zEdF<80IUCdQ3S^boUotp03PC&`@JeXnrW)&ZrWvIH%NTfU&8{j8c0)U`d|k;J;<>L z!7h%zP;J_EB-Jv?pH2nDOuV)4T# z$kcbVAm}p>KXWB#zv8HF5nokJ`K9K-%U?boXFKK&7&0ZVgV?VzS8KJ`#y z)r=3T_of@+hcZpHzZa$(m99TJM6EqfK31>GLXsdOwlZp%hi65Bf7 zqcRWnSUkb*dDR>0lCTQXxsoXGTi4^g+=qjKMZGHY+6^)nvQ4z&-o2g&+_hc6dQWbh zs{E@I4$@ch#+RR7KP)P;<>I}Mp(XQ%Yacf$t#I(9a%hh?wKOVFq12n!eKLaFt82q= zH$dy7ORnrsN0NjW?uTY{2n}<)gOx)F#XWb^X{G!OpV$XAI?W+7g`BpJ65=JmiAai8U^JI?oiNoL+2QZ&FyGRsS7odhSW#Cc zk85jYmg|IukF9M4V3@{S+}y)3w~m@O%X1L5N3N#Rp=hU{1`5b}^A^)Uo#IBvTuXK8A~AAD zwzs9^UXIKo{-!r)dv0+GQn2CM6Z6%vPBxb9W9hPZU9*ya5WS3;4S17#`c}v227ljC zT3Ym4Svo3hiDL91@cQPCVl=R9qaz@&(9gSl<9mdJRHtot;3%G>NdLA+nST@9|8qo2QZ zfAt zI9}_UiScDHj=ya5b37S3A4Ub1_0?Y$bahAtR#tWlajy(%vp(6$txB6|(hQwR|n^fJC1W7A--i-Fx&kNJ5d@w4pZ9~9%==Ifd z;V82p>0;5Jobh;93gxNheJ7jPUj! znsF!0Ub@L{;$=y&oG1$6Zi~WiKVMmuNPv7H1pW8d*MRz8`Pid`E6&DK&u@ZWnVB*X z>*P~Y*z=WII<6ffBfNzIEOR}QjEfD1MhMf z4FXbN${hq9^oLhBqJ5u^Y~YG>cg+V8opikJm@o3kr%oE7jUN$09GnJ5oy(_|HxVCP zrk@*F-WLt9yu;MttAm|_UqL^MK<25lyOd#PhoOGr+y>CoIp(=<`59;5bBvIcuH`0u z_JfTw^8qdF2LY^c8?^C97jDVU;(4)H5fELe#J> z@CsvLZ_)+hdF0!tQku>flK26p{ z!y2@wdTlLb=4JbvS9kLgCbh+e zt(o^)^@8$D@1h@0H3n-c_SI?qSB8e^KSbotR7IMg1M(Nl8J-}2dbXBkKAXs*7K(9f zwt3g%(Me#EQ4bZV`K=>QcV~1~Vk{f-{p4mavnIbu%f~Wq!*Ac4oaWv)FZfj8(TM!k zYRUHrF`R35&Xc_O6C(8f7sW;gks$TnoAJHH&P9N`@S50;;tMZ@IC6~hw5wgbb@oT@Mpf~XL9o)>?2=F%J*Bv+T z%Gil=Q!3;VV`CYzL!1l^3pg-)=&2zDy5ataf%6|2-DEK;i}d)4pWOZA6ciyXZGiYB zPCmG^qx^j};OeOLJWaL8(9wueMFSQ|l_>bL?Q-?_YB7b@pWda<>&7}`ILq9cxKWCL zma~yVFljeDh|+U$+EM9~R_+bCjNS^Vj`TM(0A@<|(}{)uz+3}@pC1V7!&!emx^R`I-@)UE7XK6r7;n!6aEW=81hmZXVcbNjJ!=N@68<7I z@>}hvq;_{GK0U}BF+dj5Xq`;RSvD_Q%5i~jwJNIh>; z|JpvRz(&7bvCjQ|2xb_NmaW^JW?5M&xc)B9(kpjdL~L{BMXh?S`_k6^hh2zs<$6Qm__X%_B5;{!gC1jpC0~&9e0<)Fi5M3h$1kD@rFkxfBaT*E z_9Qw_r4{%5xZ#TKy4P9$5PJOIqd_&+KOusu)LhP~7Y{<(N-AvDGs?OCqO&vb$+DtJ zObLaqX0DVK{o8@&bNQBeci$#VoJ^ClAUi@nQv}Fm0YBRxY#U2yxruNn9dS8dujV7k z5fQfKtLusXo%Yiu?lhap0f*EGwhU}P0$1N|1?G8%&Pm4G7*UtWWvZ#m`mb8iB82P= z=#@oKo?joDitR|H*(I$W3kdPJ%_E??`K9zcSpu(|^}($kSy0*;SAG{!Op_+kc2zHT>?q`Yp6&`u>YMPRFBtJh3 z7S8u`)Hk|uUN2vOO;{9+R>IklRqmPdLJ|`g*tr^9iy1$2TkCdAfK?mD!E26M1VgDh zVw|s5mz!i{Rs5TEm?s}qDd7Cr13q@pCtW(TCZ(3-*V)<2vL&izlg`uma7<1*+oH#k z%B{ESocF8Nq;Q<+@K;#6SyRLG%TDcgO7fUUpi#YDWUBzJU24%rbRSlm^38jVP2znn z+kvNlkvX6TP)8^3v{rZ_Gg1U-OR_6xjIDj!CUUYZcKn`NH5Y9S0xxI|wr2n!L`5_^+i3;!JYF(muv+Unx{ zo!7rSKs>6~{QwBtJOjpUv)q>d&i87oWG{q3>KlMbar&FZPkFKPMhwioWuHdUXVUD` z($g#qqFraK@xxfxe%rA|TAD4LQvl{OLbf9`C0$JVXs71+^<>E1{mZe%)F+z(2}>0J z%eeRaA6lCe_o8u{rnjlBO@h3&Ckfa#lbe&{&+U00Xr1?J_UOllLEE%R>xD=OXII!f z_xZ>3*kL!07q}<{2$Zeg#)gI#m1Hb0y1Z3|^G`_jfR~e)NQrsa_?TD7@QdWj&@h>{ z$?9j?>y91x?z87xg-3L04o#0J8={h)u!m}UFFRbGt(zY?UQFY*o*!~K04nNF3Ws;=Qkb<-+vv1XH(v zrOt*5l{&+~bHj+ zW&sW5dt%(k0qQ9=Z-A8vNrm-s6Xatm1FYS8{ug?QqXd00wC#g}UeI4~(JO8e|7M7tB6 z8Q-#Y)l0r2Qgm1Ee56bE%aDo9W`;GH^-|PB`Xp>f_Dph*R>wE2D~<0U!Dj~pzNW36 zE{wtv-fNvJYzuQKz~3&ZxqNCQ?K=EpCuJ|l(Zdo2+${3P@fWg-?a;C(%JbVR<Nq7 zIsfc&NHN|S*XTPW7)mBy4hFZS+Np>5=9@YG9S!D^#QyZ27A4>u)Y}ZU5_eI4P*DwV zGnqBNj!nK#w~OR6nFtdVO*o9)Ac1YdEa1}SdO4bR_wM?jxz?(0n{uKT z;N3vrjUibc%uJT)krO&&MN^ZMDnP!9>r{DeC>+-NiZ7JSE5Nl)=X6-G2 zx!BeF`(+goFFwDg|H&;UB!mb56H%WB`&S8uxk81Je9*&iQAJ+oZ{<2Aanbp~wJ%l- zUcED&pK%YB?~06O^js68G$1wq&}zxzlw`c@Ng1d8>s@lscR$100(_1*{i*E8gkZx8 zV1alzMw&(tsNw|;DS#^k32SA4Rr#9_x$ww+%Ew7a<$u&{m$4qR*)7dvkr!)8kWFQMI?|;=hqVVr7!g+<027~@qK{0xYBB4CK>e#6;9n)U z3#4y@S4%5=6Q1}nw1QioSqV5L-X|-=z>TK$KFOWAxbu@<%Bz#r{wzYLw~w;eP@a0$ z{g7GFDIZVGdMU~AHGTf6Ol9xKmyt3wAfm4TkA4)tuGQ~#AGxWp=Pw>TJtFv}l0Ze# z&G=(eJQTO~kU{ip@Jrmowx&sr>U*je-Ldr!6jCZESvgh z#~$(?(Qam3O#@uD{GdADl6`#r|+>${WcgeVtW$ z2f|0BA#-XjDyg7qOmN!Us!Sj;1M}z9!&O)VRR>#wHB zsWIXrPh|?(_iJlPA^}y$cF+}N`pf~3_g(t)*MZ|I;eCVD&4T;iUb$fc;dtSbK=z= zf?PwqtExzO1G0U;=N2`C@lSazCS~rd)}8a-yUwO!SeT4Hh6XJY9k(oJ>*AMZJ(aR0BUNR#KVJbLkFQm#%Kh*f;(n02 zWw<7e;C#AUI6lwmy!{8a9u!)xIJ^(eyWO?*yxwbWm1s*|xu^eg&-=y*EqQm{dEcs2 zyo8lFn!qRr{v|y7x!dkzSiX)KAbH&(G$j$ zI`=P}SgEC`q+~}lbKC6s*LLaKEzOZz=I(h~FSG@QZ`r6AOZ6eQghoi}7m;^&QFhXhAA=|Aob+T%?gxRqsuGCrdnOFEJNTT1dksv;Pq`&2^ z_cR4nDcyfeYM-U7ED6;8kkq?jqCd~c(DiMWRzh5 zyA^H*O^Zd1s~6hT%{tZG%*)4364pip+twddgH5cjJ(FjD%mZ&;cPLhYvK>TU)fCkV z9;DlwiWM23kHB82NjtAO6m?~?nHx}%cdN+cs|;v6Mi=38l4K>CasySz8*VjJ4n5_k z3*JyOSo!z^pnCENpKK6pCr%T~#Dphkk z*YbR5LlkENnOAuyw*2@TA3^e^aao=-1_ee6hPZQjHUT6}Z6iaf22?qC_2G;DK2IR0 z7H=JPwy~y_d1u?SMpU{o%2-W3p$O3@uV|MusfE$&=0R2R9?s@9X?UTU9>YdW>*Og@ zDu9w!!Dd2KqkOK%Zi4;e#%8K$LK(&846J)Ba$zBP}B~vv8EpcM?DeG7U<$tgl z-x@z!dT9_{ndhtq_x3A8^=X9snJt~RVr`7rdOS@_2um$lWM@%{%ia=xHE(RkTL28T zHtR%81M=AfqKnooZ`h;=3KGVpmFLMaqj_ed@f46pxL+ju-Phei`q`892xT*XN{k}s z+fP3u^4Ku|3;is*GNg&aNosxuQh%>b(OdD`BL0WPf?u+fK87g6bHRrD%x4?9#(9-G zf7gvR5|Kl~oUdH>B(OX2MeFbx9c`h8xiyA9oSnLrvH*-42~Q541_sSsd9a*q*lJLZ zR*d*h-9OwCItqWg^Afl81yNE5gi!ky9k{OQq-Wh8UT759<3e425o#*NY&peV!-_U+ zjyUzpZZCavIQB`3Y$gc9g*^>_y)&L?n}}?8!IDdE9cl;=cIK+ZIP<3(o1JNLEx6Z+ z(rksVvk<|X2dw0XNN!cCA`SOEnkuJp? zH+GOM9cCHT8&7|r%sc7$PaVdLRdPr7Po-{#BBGR8=YV5sU(BE9Ur2U`F~JCgQ@i6sKK{Fwl02Q%Iom9VZ<%&2 z-LpIBhcTUDdahX|dVgz4pA#2)%*c%0H7rBf@!$3i_~5_3i9_>y3S@zLlmQj_vx|Cz z;zwjuS0D9cWQMF6t`UYpPL=p} z;aBa{mfMo?=R7WB=kq9YzSf;q=Yr*7wF~vzQnEY79_;Q5A}96i@@Xgf4HFZ}Gx3HL zZE&|1#va9U_j{7JkRxHqqB8Y)kqNYM zlUr6DY4I$=abufkt`;1cRqa8SY|liB{UJG-h@vw?Ee1KUPSXFI#|nYFNii-y_^!SlY^A#p*+1>pVOqOaGV0K!j4auQ(yo!5N&3@k+lHvfZtetcu zd_hR&t6;WE%7{>6UVK1_?(5gH6`<^93<~n9h%3NbJLcwqE}4kYw;x%3ngG2O6dd{B?zq)QwhZWlpIaB_TLL z+EAsZjFGUw#>fzrJaUmb_twbBE1*Q{JR8v)W_@f})vC!i# zgtu##`U*8e;;9rOS%Q(@A`@rD11NH4zxVq0>@KGpT&?6ItgQ?}FV|aFoT^M2$`6$& zMIoJk*Q*tN$Rv}Iw(OcuamoNPAe{j!Wv8#)K|!=rx8>@L0F#Udfiopi3{uX#8HN`c8vC=V6)P?G&CtyGA1p0Ce##AX7 zAHB*0@Cq@V>{v+qh6nuP`GMztajJih$%xv!H#25!9`tzc3=7*At-jjijcQ>$FL7ue z%`y4FX`3Q))Er_Kx>>s6$O^M771U zf$FB&#k5%#U)FtVwtV6PxW zBd_{I?S;T{QpdjpMg*A)GrR~?Vg&C4!$p>Q1WBm&R@K9z&d{W7jF3-kz%1OY6KTVp`uuRyj1QHQKE;9NafW6a`v~zpzhPyvNc2sjnW9EvX4$BipuA`=lmN)!kw4ks4%msmSdW?DNw7YP5 z_wO4VV$YEyjt=!(UAG^6r9Yrfslov;R&qy-ySn@FVc7yq`)rTk*fBxZNeh@sR^*MaEe``V4H^;I{SRN#zQznJ7Ol2Y4Ll@B|h#X&4 z$C17;q2rK8)ZPZgI&c}?{Nk%OtABQ!*Nifh6CEj28PRlvOIQ{`v+uhuBz#Y65c&qs zOU^*OZ%*d9#_{+e2%K7O5UtF1J_BW`r$C|Bk8kSPO#?u~%x2iN7$wHYX5vt(pkUvSPf&_6S z26cI-E5BCG$vD;yC7qi^^k~GQY^}rfWFEdGHHtL`-3aPuZ7bPMZIQB6{uo3u9`B>L zp%lr%@P!*c`=EBFw_;J7l&m+@Lfw)!iNhBSzZX8Spp2obG)%#@Ca8}N^RnCIfTu|c zU38#J0V*9|*f;p^?(cEby=x#t8T{n_9CK3u&*K`+<}Bx-I%1F`x3)xi7!_x?jX1#7 zE3$C!OM!a{8-H23Zj2oeY_xjiwXoV-@bL0b7T(T|IpH9vrWbd(3s-&?I{S>MTt3v` zahWUC(4MIPj2n(s#Ug9pS51_~P5#-pv6`VLy%`@ACjqN$@;J?dKJcNemjf$JC=0 zj+Ef2QU^XVh@!CEr@v8Y@*w|Vqz-q|MhpPu7@^ccW3%MXB7N1s>|f1-C0@hyO?4M@ zX2s#_@=sep%=2nMbat0|EnsV64W9hbLBYIIN2;`^c$C8hqA#G(@I_v(XQ1tX_!(Sd zZv;gr1MpakFE=FL3js;WX=$n7eftiOczlBUfi)4|tC~&8>%$Fo*q%Kg68^DW`-u}T z;GL5^Pr%70F{fA31Ct~iE-5Q z{+RgiK1E8A2n?{)D{mui>Pq~C8bA}=8hXA5}b^_!Bye8;0`~8JFBSOIJ4K?ao#-| zkt=UOP^%dH{kV1^j$V4d`d@Im=c50)-mGRCjJbtnZsmD{$$dV$rlL z$a27jzC2Bk|0aN^2NP+Pf(J0-Gxly7{Ig&b?lbA2^PXfHi!xWZilf&22H1DE@jZgryMFL}HiOojpQTN#n!m+~*uF}Jv-8wJIOkS#~ zAFoGTrm~*f#$YngF<4~&XCdNS&LDTmYx9zv=w2kqFQ$#}CxkN9#+{6O_8OSyVpC#c z{}bDGdXK4oa7aDJ>>csTHFlviWopX%YfnwDY7W-Hi?{xG^89C;DeqCyuPC@y1Skp^ zd^Zj*&z1!cNSVU#{jCaY{aOJzV8dld{K-uZPGP22FyC&lWj-+{?iw-V;-;?5(f^`xND?^~fOv>zOv@FJo{;zu zhxIJN{8p%s%Fc1s-=Ms`UvnaesnlA0m1-Zm_@S5)yf> zcrZ?AvmkCP;2{5X*VM$GN#X0w)uJ1hQsE>I%t9da@VcweAn&#Bwz+#Dj_!>V3|J{T z`!r?n>fk%nABI{pczC1~YSIzC74{XQI!M74 z1~n2emCX5Th!Oq7Df#0g8Tm>Dl33n*Net|Qn&+rEo+J=X_|cxdjKv`GgTK-fW^ZQ{ z6{NWKD~}0_{oBSD(`R6_+$|g_i+50N3MY=`}Hfu=&EH?E?RZxmlgr zjlcjY)EQgcvP*2>m)?lFRK7(0$P53o%ZVmgdGjoMs*)@BXiK;Zx?PBI6roVb>(-ew znS^rKlP#=UFtDQ$DEuwPMgDa@+{Vzn>N+2k3RoGDCcAYPdbD)2SSgb$z>LIP(-S>! z)?-5xg;MjdXdfH_e*$_AOjyPbrfl7v8vFkQHQPkGOh|24VdT_{*#(ZS!!28l8Qh+? z4lj=?vT%%$qYq+H8^^~Aos3LTya><6?R%YECqo@e={voq`Ep5`_9tI7IBFFcqfyFs zHzR7^O&=|48_8yn-~3m{iDjuIuFKjl7KOkb8ovY5%e*|BZ~gzrf2 zJ0ENdp#cVNX`IH?+uI_m6k9e`Rdi{y~asTJVOi8%2>2zal#KQMB(eH8-HM2NUNTyd11KnZ{k$^ITD{?01?z>+z=}%HN zs&{=hQ=(5+q)1lTsOVhwpM31+GUp-xyr%z8`m6PFU@|o!rzkW+Pmfux2g8aCtNCK38qVe(T(q^H(oYx5QWZJ z0lyaFVq)1Y+)tIBSagukFl)kX)KcVpNW1n%ORAcbId%1!NQ9rIB_@@SOb-PPHD1M% zXsX#WE|Z<2`Z)W*zxZL1Tc8Ntlt)pgPaYmMEmPiZg-!67p*?^TSZcT74kG2A7f-D=S&?&hTmqSn*1x7T5oQlt(Vo>7h$ zmWjz8prezt)?w@b?H=Ps4cFS;R}1w#Tk)(3=m(>RYri8IEF46_Uhp+G4Sngyd+n6jN^;dO00AahPIE^=t5IiaNJEQtYJ;eA%h7Iqv9r-ndfx7*=DP3d8G*_t~}?pzd0!Sv8gn49$+C z?BQZoLJfuY`w)3h<77wh>vhT}t{>l6pSzavyN-(J?myo$kSuFJ4z4jh7w+%MOv`t% zLjO^<qIoRmYNer}Ycjf!*_xtl7xAi$N6-GQ^Kv(kQelVs z5L;Zi0|ssd19Cehd>JCSa!t-V%!_5udX%>TTh9>*-W_-h^lz@n zQ^-7MatoPO$uq-90Ial2#kJRhYg8@Y$qf-3rAImOE}RB%J9KFu5+|+mB?;s)L?(y~ zfTDd(*4dO1&JEg@yzSEw(s|yT+7<5a-nYqh?)cSI+Tbl2JeYlSeX8pw_I}X)`5<;+ zu9NWM!vJ;-(|$Et!rW4{rvtb8)DT{@yu`-fuyrO**G_1laO1>r3!A5jrKkuv(%~Z2 z?8A4bm$ir6gGJWaF2}xUm<~3o)24J@J4eRsc8xdC6+%z%v&s06Gl6~~!Zz25w-gS< z3!96f6rjoQ{QKvJTUsJKG=>wcUun2R0x4b7YL)pa`S<(dwQb)k+$BuknuoaRuMfNr zxJCuu8pz?LRYt9^+{e3B-{)y*D*v}7|H+dM-hW^Ue@5TfDQZ^qr>vWbnhv*6>hdNs zZsW+xQhSQ;$`sQsKP!bbay|sZhy(Qj0n-Uu z!}oE@W|ZsF{^mUN=D(V=AOj8NchQ3tnuNS38*WMA#4I`QzKjyB2LmLLvx*xpZv z7qK+?=)-Wnyr$;iZ%mmKJeBVqNfGZC7F?u#jl|AP;FJTnRw^k=_dd>ORj+U^{KqBk1zMeL7W2X@35|ZtA)2v4W!_LTlsYj)vr`uwV9-bz5?}`w*H%a3R>7DaWyKnl`Zf8oX^C+s|{$%DFP1xAaT<=qRXFYCvvkGQAIV-FF^$d`Gy9k zMV~|DyKd>rbcI#rM}GIc1cpWmMsSX;fe7168@t`}gL<)+Jg;$Znm;g{;K@ER_#8-P z*7rJ1QhpjBu!{wt+I_f89KiVKh^1=f^)r|K%d)|tFjfbzSQ1FyGr0$hQkd&R0`3)~ zFH(F9{AV>s`NrE^Kmy?m5BYCZWEQthU{xP5rTE@o@N7%{Bw1v_$DU7VS~f_Yy=gj4^m(PBEc?6TOe2?j?D(@`6rRF! z8P!bW8VMpsed=YvvVq>NB4C6K%iBBn47p8t7L4O&`B*6J#UR8uB4Hy87#r!`q#F7G z)d6gtV=GS29iZIOrV+-Gv4r#W;VD#yn27JOqdi*g1VXz%ef0T3GVGolE4f7qIkx0s z;f!8t3a5bF2h4Vmh|b@?$CMB~Y!%8m?NM`S>^6ql3<`*5o+rfE)o4yfb&#)HmpD!; z&zxA|K&yIhESiY0$@x}rQ7D?LO% zQp6R$)0DkVV$~#DYL(sIHm6vn_bLyn4ARur*#|(Wc|1>r|!NM z?b_N%-w>GL8NcnDleL1(k-OpmDcF*h>SLHvw}LUR+`;=Ye2@T1i6XOH842mj5bHon z+3SIS-E}wfTAu+z&qEX0Q>b4*b1I>4jBF$+lR8_QDrbo@lYcu&S(gm1M|x z1FNt6v(QRoOj#wv$xD8zQVGd7QcRCuhu_TdXCue3^$hNFDMu6*X%JOEKdYIc4=r?0 zF%=*!#BEPRN(Mg9;pKxNU}$4h?}$m$tT0NBvUvG6G^gxdUuX5Q3W-*ZWlX*asq$Z^6QKQR@wr0rH4RWF86vW{} zYr%tlR)AO}A1{R`R;iV{hhZ%VnHByCyz4Bf#GuHuK@g9m*|K_NUE~W{yqNwJ0K|FF zfBqQy%s^?I8Y4920K^@eC+&u8PVyv;o5`v?!xb~YyA=zr$#e3*NAxffdgX2kLbB2 zpM+0-Y}ha4-M;8na4)|{;5_c{cweRCnBFg&Z0=D+sBkGHW6i2sc`aWE5U;FNCc?ob z;+T-UR&&Z+Mz%A7XO+DLHL&hm_|N3_hx2&C<`!S-T&a|+H5Aob$Q3lYQePA)e$oHkfNN@1L@P+Pk0YI^yAgeHExSmN_ePkF` ztz7Uf&jg)?pWf8ywomGp*QQ(w^U)wR*)G(?1tYrGexCt-v(R0|Hpq-KQ()YT8jxIk z0CSRSP>l5OcpI9fgWHSKTnvoVfW9`aX#Cke+vSJ$2gO1E#*ET$*knp0J!oMH5-X7g zSb$9&;bDi^A#He>*&_Z9hFwfu6b$|XcTHOJX~;zO)yU%zB#3LHzbe+Y5HY75DU~Xy zDn%aH00S5Z#iNz|z_s%}L!=I}7prGX9-W*vK-V*kX9e4tm{zSK2Y?ch+a7uA>fD*2 zHbcF32lz^9!**-T^cAnmDK@u{8#94?+Y#V4PnY5ZI_w}Vsd>EW=U1VPaU`t?Ra}9S zroY&)c_Y$r<$IU#Q_}dA$y?LF@BImK zy3Q+6xFIpt1eD)Gl$$Mp+pf)O%WID|kh*+N@<@Y3G(9|t2cmX<1E7bICdRYtD_7ay z7h0j2Z+a@_}&ZT!;U)Bj*0;|8~k#Tqd} z#?{##XP4J!JWBpYDs+=_G`=&eI58=KC}j3bI`HrM3jj>WYg8{z0MRh|;#jUhDB$C&$Zyo~W@ zhAcr31rXs`jo-fG)9N}hz+@<-87u3oKd_{ZISJc9!2>SgA7ic$kF?co1FhaM6O=zB zp>!ykHe?d)HQ?8%hVf;?)%~DTo{76q;eni)SpG~RtA5VkY?}8RTOAd1izPP7(0ORI z;@dLBix)+?I&)WFdfABikUH{)nv}cRdXWx?F zW5kJT9(H^$lDXu=r)Ogh-a`9hj=mQI97tuSFE3?auMK+FZ3rPiFAU{NKiuQ$+>6~T zeL}#N1f^QLNY{Q(9hQaps<`|tTIC#F6%Ct{#LURt#Lt@#&6bJ<-_SH&E5I!(OBUJC zxtGMDCQvXA)MermW^%c@Te+9Ap)G<^`jlAeMrGTyQ54lu>!hA`X9Io`+#v2!uddp_ zb5mh1#M-amc{sw9BVMY{5Ps@~;4JoK`*d_K-yIUPY*FaR!$^GGp$Gk@SwwfGb@;d1 zz^_s%mqdo`5sSg|PCMZ8g0Dj8Tq*8#h9$c31B=_!F77dM6mRUYC6WL6v!8xu9W#}e z7+jvJKiZBb-Nd?vO~Xu*{b58*vYH&M6mD(GpfKIqRUR8z*=o~&z)@sU z-&>~P&m5V3MR;^ke_r%DS*XIA;5ytgVl7%r?xDV^KZLo?y%w!8LBr93RVpk*aR-RhctmP0DLZ;|M|-WOXgfPY0cmVKuA{kbNWh-?shDq(85NE+ zKcR`tO{$Et0aiz<^bI|XbJ0Px7G+wSUtN~qn``RM2`{cjZG@=gIyL-%SpaVo_FA;; z`^wx1O!Tfd4+BSPgWO=oraOd>xPDZWg3d<)CTu7zS#IL3WKK{?JxQnVg$0%2j&^_W*f^kxx98h3moaL_YKo3xc&F5Qy^Ti&7y7YDp&FXgte`uP_-F#_z z-un=Uj?1atxa-sbiKNU*LZiG^`f(kK+xM+-eRelPqHW5o63Ig6ls0~>QHPF570^is ztYddMwe;6sV%!8gmQhQ+FBlqrW;>C<&iYC;5q>!h>ptgd?xT#$%!v`5qPt^wpwin* zf)aTm(wPTkg4j4-mhDSE3?H!%i8Qwb+@sbBv9K(-&> zhJ7+?Deq4$y?Y9pfuyt!_W~!xqe_|e(eGeCV_;VUfjC`w7+!Zjc*E%Kv*p=*VJ9W# z!cqFLbLqzXGr5Bc`(@V)UXiYyKIuDG(IwEvIK17Fy!sgNcw1^I84_@k_^iiwo7I} zM`B}RM(#NBkUP}GnM?~$Lw~m(#4+Al0Is@ zDhVWiOW~p|xKu}E)ppG-`<-3tV>=Z6P9OF#$53(j#M@xCXO4c&$Q+>g_{!Q)Ef(+Q zdvd54PFkXKWU{yVlV47tu=(4BWRYjtIch2}ND@iSj$B#azJDcDGPsKy(+UDD-*xE9 zs@bHkAH(**HLbf!@V9Pt#>17!_AptD&+niVV>UT0Oy7A~87b`% zqAZMjZW#EoZqLf%w~6)cG`+16IpDF=YS&2L;mBon@x_Ix8>EYKJ0vvo5k2i|E_7H8>jaS6N;9=6H%0kneF@I4Nu8 z$*wl%Gak$qlj<&y-WES|8yAvx@G6<3j&8K?DXmtj($(oerP{wp&tP&MZRB>7SdiOb zT7!5jZ9eut+BH=Lr^6W-fj6qsq3x1-82~+Bg4|ZE$zZg}L=Uh6eg=S`9HWCGBg zT&p}cGt+$NDV}9NA;``6Wz-yLk9s-7O!NL4u}=Ls$r@Q(sv2M9hQR?7^Y6XCV>XrH zbBlqD6*snK(%S_yj7J?`4hO=xd1tgBge*=4Gc6b|7!nbpLq;C&ty~vcVR$X~n5X+y zKF4Z+ziFW4wqv_q1`Z~=y(SRXE_gJ=IR26$jiy|2gaCO^YpE%d+MsFtr0>{-xyLDc z#tL@3r7MZ( zK@cIrmdjT`ZDZs|TieDkO(JWCY;8iGU!zYsFq`dZNF9@E-zGk7`9dR*C-GZl~a zet%IRmJkWZM6Ok$kOEYI9`ZoGxMdw|HI^+cC*Ko^jhP@K{L!|=bE6Nr^MRWgYtSsc z=fRpu#!l=K3(Fqzz!D+?+&%IBt;5Gy-nu<+@ZDv@`3^&xbi!CG&k^o03~anlor?c+ z?EeFbC=bfHOU4VnX$vQ!^+Zf-N*!SR^;xo!eF**=W_HS`qM0?*8uJ~lNOD%wbzLr9 zE}j3^L4tZ9!=$BoUtM-TuLfw>94yrJ5;|y#ABD!OZG#;ean}9@rYS-vZOkdYW!wyF z2IU{UjFgMcy>a-T&1EX|f6ZlIj|FDJSyp@;mb(MHTsIVK#CyMl=Z#QR_%u*&nGD9$ zSjWb#W5V0#eRzTinand}{%Sg|_V4mJSG+*e} zuI>sOkf85*ufl3sc0iIBl6POwy|ehDW^*yHH8u4chMx6-A~1c#Y5Q^8BU8qZljLU@ zagPzR5sBk;aW-le8JO}b9&K!s!0&X*#P4s|CzhG_UdQ$&{e5-|1MDD`TjMdl9;NsfSQ6IR?CIdNXw3e zsFQEGlzMq{dts&jCPSP_VE?mSJ2R;uY#pI!?dM-7Wj8Z9SS(*M8wr(pKabY>`2+va zQB&k%0?Q`+DckR*NV!(&zyl`B=Ub*^+}ut1f>F1o%x)jjQVpixkS%41@gKB27{-H( zSgW)25?VdKUzS_8vQ13$Fd*b!y(JrZgu62qk(+YDrFCqXadFj6^zk$-Rd_V?&W#XK zplH2>mir<=$SX7WrYpx;^MQ`-5d&fdP0JV~+N|tew|HNP+?`FSgliGThHNrwM@wP> z_6F#?bwiIl`m3{9EB&!zaw}G-msh?2?fV?_;aW*fg{SSqZILN)bIRlR1yjk-uQ!eZ zJAV045J%l6ia7^k49>P&k>A7QcHc0%G^FYhH{J6!1-($RUche&aC0z{Er&8zxns5L zJeYCQos3lK*N{*FzHJMZC27fP^C06qyVT`qdGV+sKTxoTEg6i%M;Q_4F{kAv0&(mv zu}Y*$3`Bt&{JM43TJL3A%~)_n(YtS|R!q%O;i?WM10 ziBeXxPHX>8Zk>KwKzExWt!aBum53SZe1>z><~{JKx}hn({VFclzP88|k?bNd4E(IN zKmGWKx9^!L%PqMES3R)jAu4#d?q_Q_iAW@0e6v#vcyE%Kk} z=7b|6Ib{~ZuIuY+-&7GMo%U3?v6UTj$+wRG7rj-8l7fDxwSSo|mHSFj z!I1MfPPLQJ==gqZyL@X@DEdwRM=FVVwu?B`;m{uSz_y2YPO4%X+fyMkVOBcdji;|4 z@P8aI*?UFu&scdQTqELWC4c~*^po->2BcPoHmVn2>K&Glsmd!1PXX$UM!g9U?{k%{ zJKt}BiPqzKgIf@8F*m{T7|e*7r1?L77>~?y;5}V(O3DU8Bc4Xgz$guhk78s%*dkLj zC0(6l|J>~vcDvQB+&(zvU(LM#A5r%%nDb&NT>qatd!b!C-PROF1^8NT1kF`AhceM~ zQBz@S8yhO;f9I>sOcX|k%%+P@X_%}|Cx_dJ$bL9y+ireWJ_~-KE?g#as_(11dL&E@ zvX83P6#$)IsuAy=wMiyA;Sc?r#UMk^h0U{h9mY!&HuOR#T%Dv$P;W0!d!xQG#gf1? z`N*%pQmKlN;q3-y5VU$hvqd7qX+sYBQA<%;Kryt(qYPj$`nD6~S|@RA)EtA6tDgrB zJzj^X`?vgnvK9j^+TJ-M9(iY$h2i8>YGj`5SG+cx0N!nX{ns$$2M zi}ZT+KgDH5m%LeD%TOY=^dUsQdg5_oV0R9|G={HgN{R~KIh4Ad5oBV#Oe7q*7v3jZ zIxc&I&$#C-NGbbX;A;ny5aTi9yY7-jc|1i1>exaO#b&bQln^Y;quNQ23B=tC-(^?d zzoJe9;j?}Qn)LVdY~>~J4tzHoOLjSlB~rZNjg)OAbl6iRl;PMRF*a=D=+v+4p>VFp z8hpkdT7dZa*(SdD?(sg!^eS&NwdZ@)#0^%)I zw;j=deNI zOkj!gj9l(jA;;cN@&{xiP1+}v`dfR=73mE4?Ze`7X1b8=J(75BG-_PJN~ zI;M|gKBEaI4J`l`s#Op1z||(t>RopSLQCQ^&5}5d3lC>J;DY1xgMQ79lXYGjtU>Dw@cQrmQ<__R z7aRu_R^?CJYD(s89s0K`xa*_pq+iPbh*lf&^vXNs2MGOP()m2W;xEK)EDQoGrO`|7 zHIWsXU94JTO^opTCVc^>#UO^TouO?095J~?(}Fky{5WPKxtNYd)el%nwL zt9{~GgAvF(z2cP;{}I~j;mM+o=PtVF8m}#a@pV1us>f2S@NU7_p~M%iRMz5M5@|@- zW`Qvd{%rXy9Oh&-t{+I)(CQ-pvPB7S_NL5%sbw_UuFSI#wG`TN?3i~Qii8{Z)qTjo zf=e#lba?#C3ep6e;S$y=7EP#gsZN~;^gd@VrUTIP9?_>%M4iTCdS2tM6Aw>Z@UCu8 zU}8OSmkxo~r-XJ=L)GmlXaklgJ|BRh{9WX`R9Cv-+qxL|^WDE;jf!1Y>F)Huw5ByP z-u`4;&>9iK%v@uV9zVC{h04iVKTL?Bg~Mo^WETDcXxO%FaV!aX@QzV&LMtu@N5Ld} zGF;x6@yXm-&DCbK^Z(dfY1R4bicw(*#A^xR#gQco=b2FMGg#v~8C72qRuhNFfx!l6 z2Y6Pk?J9cXlg8HFR|k6u78Q-;o9+$t z!~FscMt*h5&{>vBlv3CN$WK_cKd0bcC)!V2pF++-kTZkWqDOo_R*q-}=8+LRd7VV1 z&U3pwE(nkwtdbT-lsByb(053Bof{J=vsS0BlA7a|>&?Tv5(KugJ62PO&5Me*QNYbF zw2$tce{auDdEu`r5pM4of!o<$c)P%C6%Q6X(`Oaiqi-|?X^6Q0ar(|_q+5G-!XYSQ7i5JsKK zuaW5k*PG;NB1|HED{m<~O$1{QQ1w(kR9LzuT$zyl+pi_nvpMzhtaC=ZrD3B^jnE&r z)?v-sh3j+j%z}C<0;V5i{`{_XgW}E;OAfjBy;t3$qxXd2F*<9#uynCt@7Y!@>w!kS zqu_e>(Z*BgDG@h$vq#q>6EdgNsbmNzqWVzI{QOQcV1l>B_9weS)uW zLuFqf6E}J`5Bs6O%`>+s-wLcsT}E~-(yG*?V6bJZ=WGY!NGYu;T?ennF%v5kUuxKU zq_&(snYoya6U z+Q^rRh1i4hawLw^S%B>|b;hP3?r$ycFG`}6%(x$a3{gsi>M)*@UQL>OvUg_dc{Red z=UA0q;}P6R0xs$)jvvrb^m#A*{_XyvN%4X+8)yX{snuJ>0n)-Wz*7|&bKK=DO?mkK zd~I*2Du~SCY#ZX7@e^U3o&OHQ+4t;^6k|2aR)-cvXGB^K^G?vXeQkF!$O~kY2bR65 zOUEkfVk|Uza&xZSUlw=*ngd*DiA8cE~Qt(RrI$1ZXJ6$f`DuC`*L7Ss-uqT zx+tcY70$D4E{eC+i{~F1&id_2sf1>4KX*eq5UQv)@tmVrn_`!g=gd{QTwmL85T=-T zsnBqwH;Mb7I|SKB8r2k>N756Evfv^es>{+ncyHK7C<9M!Ner1} z*!%=IAl8qV%WNtCd0c8t&X)t`18wux8fyvIi65N24cun0$r_%+ON{ z)DsEB91#zi;UM=yL%Cn_M_AHik8!Ojd62*7j*8OF>tT{NV^FVyJuJQ#V6he}u50<%1mvOagFYEUfx-(f(&iapdt&X3QA(q|ct>d}9?PcqU(j?ph0yJn`N3 zs#~TKv{g$L88n9hVZeslZSS^?pnQ$kvLwJyh=0YWcp_+2O)U0y_YEt69&R%9CQXXD*Z)uq$dmqH+`$ zb?L8PEwsq}YwIgSH`>-lzzHRy{_+?&TE(z=^vZ5UE(t-^Q{L?SqbHxJzEHF=-)Wv^ zMF#83&USMs7_}JjOc^ij^er&kKts5nw>i}SfD z_L1flnkz@|s+hgH(COrTUM0z8b%V+H4IeKGD{CI5X@(DZs_&~-5rYn7eVf3|eVAl0 zF@qY7F0feneOIU{&X2;lrTEZFIJrXwCN;txAr{m~Vmfxo7wEiMRxD-TRD`1$HNs24 zifT6;Cjfs@byFUo@aEa`Bk7y+xlRM*$~k%0l6oJhpCsJ8gkEf@4xRED7@`w{`8Um; zo&5^i`7jWEAdz<7%&Dv#RSGtGA^GI_`3~6V$JrbserDshN|3J0Bbx{Z0oIy&Ec1MGUfcE{JNV<c{FxpNnvTDgzLv02eG<;KQUI0(>^&gcdA3FESL; z)3`6SpO4Pgt8<8v-_Fd`+P(SdW=7EHwl#8yX%O*3Q&ShFvnD#X2VyCSktspl0AH*1 z+a$svtWQt*9#ejeeXl^zl=xp?{??VFCkEv*E(9mYGxw-J)7Mtk@2Ela+bowwK*jM% zGqUgA{#&UQy^bLwdQg20W7lQy1u!-~##qZ)KI%;5k%m={tUM#&2L9a5s`^@hyKak>ywdrbzy*zk2-IZretRDs|3C5)Neg7d1Cz5 zl@3cJb*0#()62aSV9v$;Tv4`gUpkYtsH&w0&N`f~oX7x=NyI_LeS)Cq1Ogw*YdNrY z?g?;##FPekpVAS(&T>#EUSAuRXnLflK0cO_01>k>NZH%5Zf&A&!I+VCVG;%Ysc7A-4>+5k9;Evp!%^Tx`|EE(W8(w5eK)NXFf zH6YLD zG^9vS>?=)1(|lzV=Ea(0VIO>m>vscPe#szRlr-T2b7hF2{&yzrX;{%Hbp{pyfsoFN zDB38Ki?fucl}}5+tq2obX-?M=@{YH$$8YF!@3ix!zxh4_)Za~AFFgk*>Ov#MeSrFn zOG&lKW4@kj$Fl34>JU7Fh`C=F@VAL7{Ywh2s+CXF9e9NB9~aoy-_8&$Q1AUm;Z;WH z_rkK!Mn-p1ZjpnS7t)|tVMcr#^9&N7rGr@0T{9(_+C&H~Zf4y!z zpFUFJFRy$WST{k{k03>wR~N^SpVscLIG*aTVzQYVenRPDH6HG3X{{%0vPZo~jc7uc z=o6D-d>{DW&)bhZp>IErWpG7y4h$$%vgF1YfS{T0c*w8+aJymJvApA@+69b+hN+s2 z8ubUWh8B49=KfAT`*njo7*Is7%l*}ktjE0k#eDxNC;Z00Xd(Pc^uJe(|5iYL3`%3l z2i=QKm^2Wq2ghF4HZOA26R+@?*@8chOebaSUX_<#knIGB8p=kST&GF3hkP-3%`Ah3 zBz@Nnc#zMl9_d#(Ky4k=gGl2&xZKKMt z49Si6Q?HdP+U$p3zhte_L|B!|?dL@nt2Y(bRS8Z10cw=~NITUPL69f+yhGjT$8pHr zgkfKJYh$%vq=iOeJ4NL$)1UonQiMJd6RBKM;c3y^GnoSn8qU*~!S*&AG z)aY-}@1|6M%PWa}q7Q7O&+W_i)ZzvspCA3YcAx>|w`Q0JAUSidBeRe2=FE_!)`@zx zC}3b&2i-reZAk9@HH9UBtXg9ZUGZ5hP5-cHI9O>X)F|7m# zeW6G_O#Czzm*@03_!lYN^TtML{Od>D(ylkVEBYfFUHZwc=PVm4iHmg1D&oH`WJS~V z2%^~6jmhEgW<(^84lh3jy^TYb(ie3X)o?SiI4HzuB2jNAvPo+{=l%~FusG_I0L44k zzM$s}o3uti!dc7RPXMk?gI_fU(;9QS`v?3)cD;-sYX`PShnKdlT*LZ|j}L0jxdwAW zOFb8oKD0fDtKkeb5de|4{!uxhMIcJ9L8wpR?;tQuU*;&Jxd3_b8`>i`TW~y?j%CK#S4l9BSF-vW+46M=15u0AR|JkY~;w z1t2DYhFYu{>{K{bmdlp=sZB2bW=Yw{k0&e?d}nk~Jb{>rM7tcCR9VKUMw#{-R67&f z_PsqYD~?V|lF0VnAn|F+_mmIQctzC=o;-GbW+E!kvjcc~AnDCur-cN0$Poak=0Lm6 zocS48wT9rCJ59Fj|9~)V_#D>ezB6#QqE<3Gy0P%0S_vzwSghMH+II)}|J}j=&-0iI zc%12S1DL$U-I-1DftweBvBmMhX+(rXa_ILX;xV(aWSkOOwGNB~q=9sMSCYMvfzW)# zp+0iA_eIVO?R~{$2yw%_s@yfkWV5_uT z;2YE3YEy9l9|1w7J_*=9$EEKHHB%{472O62;KJsC851?zHj8xuU6;!M_GlpvmiduN zW(9WLDJ>2r>>ZoZ2P4Ca@MMqJGD8%Q$c^U<>|K)hR|xAIG~Y8C@FOFJ{Kcah4RFEW{BnTt4Cni-s|S5%1M zkBkA2vKgK^vebYz^HtQUDlSrogFXg!fbt;_D~0ct?S;pEF?_0l3Tmo%`3uF%W!avQ zfpL;VaJXM8yyE5evDFXX*&``O^=Oe_?oHUH$kcm|0YVo4ul|4gw}C#^>?zAtrduKnv!eeEJr3iEr9YzhlLywd<6aGSvgGoh6O zXVqJQ-wBXWvCTG196T@WDS;!=aT{{R)0aHO4e=5#2YdPd;evgTiu3)aa5e^}VpbDcb(j2NLTsDPb=@C)7g-M;3^;HYLH9i49rUYshLXsGXt->px_z6kdLKdnPdYRye}O9(2FA3xMLP)oi8qXkg2n2ttu&-##m*bD&uUQzM5Z&-n}~%E6E7OpDzA%Bx#{mF-E($>2F-EEF$jDsw+O?s!FN*iu^!0V?b-7 zA`X`e1|LW&tc~?pEyYM^G#5$yAhs@8b6-oUa~b55Zh1=P{(D=V2psSpdrno!F;Fwa zT8?`3VzMp0Hu~U2+Yi|dr7u99X$7J$HzqUFJtGWr>1dNw^69h_CBk-JL1c0)-GSzu zZ004$8t%O^3A*Gu_1db8CiXY~hP=4&jrbW4+S34HSLwc|3b=U-xy-_cUB&oZAv%a& z1qrb>SpADxeZ3!mHOZ#QdPRJy7twBDY))|ICST{^t7k+&FU}BY9Wu7hq+4rka%tAa z;fh4UafH)87RLkE$_PL`6VV8QKd`*OIO9$7&y{@wM#un<+qsPy!+2ZdQ&zY+6wEHf#%L{~4r$`5htdd(N$m@8re^G6v zr1P4!1sgwv7Px*NokDD0Ou5%2R~F&G)j4${Gnf+GYpgGa0&*C;5d{%ED`2%cln3AuW;4 zuS0wbB{#flVpcnCl&n2tEp8#}6c$L4Wu;ha!nBCTE{J`^oSQaQT%`3 zp#D$&rTp6c=5KHt=WwcAi9FbS}h`p`=>9z<)Yl+_c2+O}{UDP3Ouc z5@M=ttiJN|R^2eoIp%Z)KAE^#&pCS3{#D${xXAga3)VT0ssY4NA>UnujSYH*0hxy5UA8Mf4k&)@4Fqc$0v3`jU*8)*R_V=oS3LNdl$zANt898uOa|7dGC z7X1vBSdF=4X;XI=8PVnDF-w1)e#4eyGfikox3=${aM}4?(O;D5wS}oz%#PKL5oGYg zm3d#nRMq9!ZB7OD!5m~FKwXDhtUtzW;Dz|RhckxLL9^YDCHi5qnw7ZIZw$jb7pH43 zs|6F}T#6bB!D*liguA8>h9kWZWRI#SmS4n>X4U=7u7O%?J_kud+^~M8e$|~+$$m}W zBJ{4K_(cRJiBTa#DmchPfbC~-Hik=|s@d$QI<)Qn|G$8O@v%Pj9H8?^C13E7oH}by z{(-zlH8p*bcJSG-HdR?y;B*04ukZXjk_#eDmN}P|u-Vx<5pR~~HP;zS({()aRmpYZ zDdrl|azvAyG1;sS0@!j0MYqwNB(T9}Z7DK~tU1Lo_?Jq%Fj1GZthIMDy zNKfc6k6w|TXdE95i44y&I3;&>;-w(U5Hir}L-=}-$iw-qC(WAE8+5``Mh$~*sus-Vv6JO>)V=~B`N{6<{*(3nOpv#MD-dti# zKe(-zI3}qF)i{)Qld65vik>&$^$0mJ7~$xPPL~&Fd`fe|17IECLfreLE6*AEL{vtf zp$&;S*qw5p_Ga#Obr}bSAt#bH{K5$R zPqBL!p12YC_b^{s9lPB}ow6@mq2)N28`({|0z}B%I~ShRzlo(06U3#H{C-M@t<@n| z#X$IZ*A#lQW@BR~8F0M@4$X4oG_D79Ty1B&S_qq&zM>I*2V4rIIO*k!xj6Rx|NlQ? z3c<6#g}~%r$mrOAm0k|LW8ZU7{b~ya%F4=82)le}#Ov~Q+_Y-HQzShe->{3SK09hfByy{D$Be&`d@!cEiqFfCi|qBJK}~J0iEz!)n!J*?DqQ zy-#*Z;@;cbZJaE1KT@YbBGn~Gqsqj zVDsvjyH!z7f#*=F8Yb>#Wy9EM{VST2@m2o+AdUT)h{pVBz2?$jXZNP`|MuAF&U^uv zr=(gO-4`eGmD|9M8w$5>LLF#hc6#}y^N99q%up<*aWHGIGP(tYW@X3pZXKyhlgGAq zI;$s+N@sC9Q&q4#n{xji#y7(dU}LVaTo&N8hx{Cj z${r|0+to}5q;8iaq+F~rI@$A#YMkv>E{l$kO@ntn=LUYiF6lTFe(ks5Jmvg7J}gM1 ztAzLWqU*w<(W!B?VNc764-Lbv1~j#1r+NgP;d~GIud?6&-n6_vKjm9%=5GnAzOkYG zTH4LdS{F^K!&am-nmAG9oO^4d9k1DdK+P1gYI?5+L)MKhEH7%sZkuj;Ru24wX@lKJ=lBglC$LR;MC2e zuPIz|>kUZA;Z+}~n%Se*35 zN-ib(kBpC?1=MU#k~aTQuKnlX;TT`<_CX-zucUi>$5h*IEAQUowOs5Y92sb!yUaxE zT;qr{x(dQ!ia5hAP&5b~Sz$OundzKSqEmDguZZ{Pvd3xLgTV2 z8nR`HS0nIHq|c`n{rN#0>?%J>JaS$51MC;sf&-tR72^dQgu*;yN6U}J{L&vuY=}(> zv)xL-)V0+l9V?1IZ^PX>pPzEfGBr65iWXi+{S3cW|Hyy9L*-%L2GwSI-zgFMHJ6}W#)y>tJG&VJb&CSi#VrGVGy$+Us zm1ySHBCFeV=8x$a(jEo`goLKw;N_K-vC(>ZdgfsYxk;bu-I)9Jd`(xE^o?P7Z?Rz>Vvq1;dPcM9YJ7AZ z&DWn2dP~0`Rely*h9;@ zq6cCoT|WnZ0Af;#l^uNC%yr+}kUz)UCme{c#v(r7UuHY1XWDYi)eG)ipknZsdY{KN zdAprmrOjZ9_fy!C;%vF%@#9t-I|ThV|NDS6oYrcq z2rk7!%8?P_n{OeTR<5S|Lkq0MXtlI}8uzDO1_+%_V)m#?-9(_5{Y-sOLW7uWe==JFPO1SH}hG#;5>SQz?Nd|msu z8*>hKj=*$v!d*88>Jsxsqn%BoYB33vVfd1l^vPCKf{s|2e5_Yi1FKC}0p zU|fxsT}1w1F!*D-nb*tqgJzQs!8Od?*lH0Ik`OBg`Lv4BZY{5rHKL%@|M^hxTNYE! z&l$+X?!XV?C0ZVADX6WJRoX|{O356_&uz;g8Pg=6CqUS;UI(OLLtxsOP5)Kzg<$v# zlsz^}KpFdYKLfl^I4Y7TDog%~Eyh&MBAzYH+=|jY6=N_o$|mZ&L1mP=Uy}f*=@gN< zUo|sl_5tfQp?&mQImdr|P(})aJ0>@f%{te7fiDN+o{Z5tD+fo|nU9Vl zzVc{}*}ZC6S^iQqz5Uk12v+;h)!F;;g+YJU2Y-q_>lioAihxwhR7ooj$&ul8^?Xk{ zCMGIuUq_BWHtG4?mm&DI+rV(CN`tQCzWXJA$U%>v4_>vjwABTDtp%>2{z6;{DfTH8IvC)0oRV(h|xVocDOw zShVEbF2dYmYC%!RGOzKq+wI~`R`28M-Q#(<%oBsj5{NNPyjYhg_w!#mV#71~U2G2% z6KqwM%_Tdk()&8y2b``~<|Dl|<}EwD7weuPem7+q6Bipu&VPLHj{WZ|w-0+d(rO30 zNIxbhclJIS>(_mA9=|bkH7~B)#hY;a;V2c=Lax)LWL?zI65|>(PcscV@b9al{wdm8 zx=mAAWTtm=98XyWHh8PrnXfgirpC-hzr=t!otUJMBm)1dt@Pd)o-Ql}iTtpoR5sn7j}bIv zHM(7r(?k;i%!g?_7PRJmyG6COllis+LYUS{E#`9(M;n8RszTFt%Wsk{_9|)yZkab@ zg&Y@Z-`f8A*UNH3bn}5qNA*A5^J*KIFel@j&KIf3+rv46r8L{i2hhcCYT}8*u`7R--JD&k@TXO9(5hk#Plq$!Ob!{&!%48tqGOq}c zc|jcFOO;&DN~l-VUIeqGuJJT3@vmKk2}TU=2u9e@g0Nwd3ncNfzJ3EO_PwD;oxPCC zO~a&O4qn){LniIE_89+iOC!K=@Hpm3wQ3gkogI??i-({5O@T|2|M=~Q3%xw z67^0S@R!uw0&$q}CQMX=8HPVXpbC!iYl|_lF~{T^{l_@FON&gX<(m&yt((ztc}aq= zoBV5BHW|QcsT=nBZc{;@v_2~Rhw z?yS8wQgbApV5*7_lS*PC4dEbB20IrCFpEWmxbA^#~Us+2dfkZES&i0ABUFFUL(M z+F89cHb(?08lUz6SAx!-sVrOYqwjnwZYA!~>Bq`3V#z)uko7FZXtan~Nr%K>fN-|P z@n;&4g<)4aoA{xYY{nzI`u(fsU#CB}4|^YgOTSv0cgW@za<+`ex;H$Nxxm?* z(W_RyX*!O)_#R`SXttmcp5dfpBtW@CU(F_voD=j4#{Y({ql{PF%g{>ru&?~>!R3ex zE?}0>C^sesy$R4%$-UNwuGVz`4E=B2LQx}&yxpgr|7er^yzuA0UFu0U>A|5+;2)Iq z3f84~TW>?rC$?2#GaFu$t9!F1)`ckXbZUPz+HX@Ulil*K$e4evhz*Q`Z7t$Ocn8V~-#G}ZWj|<9g>Oju6U>DS z@CVs2G@133Qj|nm^>|erDSuxFzu8KT?GI>9>oRJdzkhyl*wUit#&C7Hkq~i%?v{$B z;AK?H7UW9)=#Of%i7hzG4qZHfNzuh#U^w;)-U&o*7Psr=0U(_QKG)Z*!)Lc3UR%ut(x5Ex^du*eX0 zwFIoh8m6{ebsk^*Cy~0A|L-+8r*2kkrj2QFZq4;J>t*YN)|CgkiNl-KfH@-?PKB9V zYp-IU_LkEZKFZijMk<}#0dJ`dxM7Wl@g!N zc^kGo*hF;|WX&9G(s~08cNru-TWg)J=v`bb!`pDRDrsjUwdTd6EDo3!*)rzjkT@}C z=a4{i21S>IYtR0W-zx`*n>N3^W!X1#k_6Ufbyz6Bi zcnue;Wm(bDZm~)JO_|%AH&q_LND8hQv$|a~DeROD#9QN3qzWSqX7UHL)l8ZWp|h(i z61Z;$4h#v43lR6~yaFE;v075Q`Rv#-5#H5(Zt{Es_n%hP>d80Ak(0BYD)X}^sFl@f zH@lPb&uE2qa%-jIflu(Hxo0m>yq8lBRj#Y*X6<`^EgUjMMxVW_+jSoR{)4y8nawnn z@oZApokj}07J0{Jd0Q!x`UC&Fd1z-v76lW#0?O4_!@Tsl~ z>itSMe8m!$*HD7jNoRo;Mz+EdAV`VRizB(tk|RHx7<7*EiE*Pbg?ut1|6;xyGW6p26}IGzH79(+{{3#fkBR2-$(X2Rj%>O#d(Zs>Ble) z9ALU~<*tI;-b~r#^2V1(Pc9F_tXV&0fZkv@=w!6Mn@KM7X5Rf0a|7`-R2rg&?&JLb zn+332=rU()I^vtNM;9XS<>aTvWkV)`Y0GL_InGv`PSsgCm0&NBV+wx^Y z_d!Pwx2!ek!@>~uarOg(0MyYb`Q6*kNtmw4kc%5qt}R-I0OL*{%P|`$T5As8PF)mA z892@I#(@RRqmd_tF81I4=~LCd+^7)>U%f@+5rFj95a|5hs|}2?l-gfO)!0DoqOmwN zy5_vjl2a#^i)^TB|r#l%G1w$OlTSYcOzN1RX^P9*jBQ|U2L+U;IC(l4O!5-uijL)n>MD@ z`=nCryG-l^syp9qxCE!YjdAM(xv;;8>607k?@;;#)hjuNdbvvV^E>$?+^$b^)|(Ui zTl+nN&>$t>toyo>tRA}R#Yn0K!SmW((pMq`JqL1FLPK*TXr?Qm$Ws+q3iSwum`@vo z&CCzdRpPPhKjurw6a4DNPyN%+6HiQpS?3^C)I62mYqeP-BN?ofocQ+aqGe5wk zC}dqGq0TKFs#m-^Em3Fm4LMC-dkamGe~q)<7)mGGYIb@9r3Ob^kcmlxYRHAe24Yr6Df@l4u8Vv=CmyGMy6txs3RQh*rg6B)%?sO1f=OWR{@8nan&Z$%Q(bW)*?y|tBOASl6MBS#w8wrphw96=>u~kF^7>8gN-~{avwyY=nx4dzG6dGvf73cP zCbb!6MgnmM%t9W++)T7A=hlPN$g>2jDp*wghpDaNOC&(;Tm1_(VR5a+1jEh6 zs}Qe`bg58C-s4!kQ{(EJG253dY1bx-qx*^wAXr`~-kLJHJ~B7s?70PUGE&ElX15=X z9s$vaHW{%RL?W>qDB<`E@S#Ar?fQqgf>u;{0p59*DdB)sRg+L*xc|{v-UM_+@45BV zp89-><2Vr-J}^RX(U2ztebNvQsrd84b7r-9nrLr#vB-@b>DQRf|0sFJZJ%n+qN)IU zBU}N4=IQ@ezZ6g-xhe1^@Co>5;TecdEep&X)eAh7_~)cs9%>!{ z$0j~~2CgzY6L>7jYBP|xC4QYL92l||d@jbuCe_<$Me%Jqi@NhEr;xtbaf4Sx-9@N1Z|X*Bc?NCA>BcU+q@P@D3crhzJBMK{WfC%Cua04mwhKHF}2W zoP;q?!|eLFCdz)7?n8fjN}jI#z%HBAbAs>82QOVUT&Wl7Vi*D&VyD(4 z)Ln5SQ_!`B454(_?pBR@*GE#)t!v@`7h7-r*7W;;eH$p<-7QE;gGe_>N~eHGkI^w; z(n^DLN`rKdmX?;T5u>|dz!*IH9M66KbbtSa?Ks}ob)E4#7rei@+a7>6nk(ZEEF1^+>VBwyrp_%T%NP!morYAA@-tXDRb)R$g*W2!R zjcZYd=rj^P9M`;a8l{T7o%qbWtH-2ZC!$`G#OUO+P$0Gbb+d4&>Oew(8g_Gat_c&g zB5MDL(`tthQ+$=yU}NdFPRa=fLAN0Ov6soBr&62@U^Zmxhwc*To=br^@$9@_smFAZ zGz^&)fb_LK%k`O=b^ChanxlSw+0qTadk<`$0Nvun9MkSDOhMijpJy*%&^-e9uy=uo zN`? z>@LL0X%9+`>ltFprQwEh3A99hMClN%Re~`oE~pXkm^q*lt*37M!bqq|s2KQvrmz2d z9kdhQ;@3TiR-1RcKRH~N+Nt{LDd6!GbVGlI^52fdf7}z1%5nJX4)ALEBCJq(P-mIu zAlty&_gU@^>bg4=21R=ah}myNqZ!nGz40q2V%5KJNxP^MH1B>=C?EtB?!C6$Cux|; zRLjIC@H*cqZnA?3W|jVWBo2N4B>j4BxT*A)PA!F}ejkV+RomNannL44`FnrL9(7J; z(sTNX+V-Y@^sKwTf{Hryy~1WDjTJ@oi1bs$3ARrdz9d4=YO2BulNv@vfZC08ubQv` zKl1YA{>BN?l>^s`I0xF|duOjaXPmF(x%?TH@f2HH#OJ(v8LgQ3R93b5R3(Sc$>!LI z*f_q#6j007e)_@on$9ZoOekhzMdYgxR#S6=dVhwz%~Rb2hjjmFJdi7ZGvJ=R$?@*4 zT+O8Svf((oEwksPh;X<}fnWH`g;QG#=vQ?b3qnP_us?kg#cqfCYOFc@rH1;GIT)-q z6hk?};jh1cSB}qr&~ud}`Pk54B>j}`YEhOINT7qBB`t=~#@kwDpTf(_D&*4tVV7I? zad}e9gf^e6z%`2Pyev^@R!PjNrO+gV=#y!Z32W0_1P7;HFn)Nno(5@OHjEg1X+Y`V zXAXUc|D#+wkU9~g`KN>3PdbQKNOX~Zav_HeE|Re~d{T*cKNZy`Y`rpFq#~^*`VzwE z(3n{N^hS(Zv#5Gl?{Hd8f*sCc$t7pg<}8qLzo>C43pB2Z3`8_K+jY4fS<(>PQ0NiD z=ZMR#x=?zy!wQ&VUc9f_Y4C&qgO{6u)iTO$U-1gg4oS6x38y?tHGY9LNrYzF6#jZ) z{OU%4z;|AbitC%`@8G`_YO8~^dbGt;QcB;N`@UI^z*Qx%oDSJ)t?0ERUFpUsNG3QA zDiYs^v4HPav+Mc&3eUChi+4CzYoGWu+tE*mhq?c%Cx><3!2Qj7mbRzO2+G>J<8e-%PxFMpC5n{Z2QbyIuqp8iS(*$(sixjAW zGsyk4q294)a!@zSa=lnhL*?0^d{0N4PoZDhc;Mcn-D&$g*{Tv9Vpsa0ZR8K#RjRqG zMx#rA09@3>`;c3|DdEfHv-=XXs^d3wo!Ci}9z?w3+B`RQpI4nW1L`<0hpM8T4;^y_ zC#IAzS%+9g-etRYV872`joh1XK@BP^W%OJaxW2u^ha^2H4)r*LfOgejU%-u-&YEFe zg|Q|G$HpJ$mxV*}K0;2Xk53}wEA`O%k5Hmj&BMHv8M#x#?5?^gdjj z9PzpAcYwUWH@;!}a2;rXZy4V&vTS z4(D|~FWqW8n3K_dfE^X0-TI@c#b)WRwPmT~v8L5z;pV4+DcCUeVXw`+OSGrA%)tCY z?=khR3`9F=K|#!#?UsPv2C@gI?%@U3GamJtGQL()h0wMKkO7(+kwf=g6E0YTS*Lj9 zlcFxFw`v!6)PwL*PDAbYu^|45qn;jWzVqx&#vs61i|@T;^Vs}5+l8G@YX4^Hd13FW zNKdZ8)U4@)i7>^?G8duSdHz5oZ63&Zt+6d(T==Y6!(S>Xk_!+2fXNjJqjN-c1f8I9 z$LY2?L0MqoEUzVd*nbQ&mSvsxtIQACWBNo#iE2EsW?kk@*Lh`#eb8J2P#{K$31z6A zw4R|jWLzwX8mWosv>sX>#OdE#J23ep#9rp+2GR=u=cjMk0bnb7>wX58c zFW>&ZPsG0%N4ls;jdO{^iH;4jWI--U7;fdWo9(^P`1FnX!L~)@jrOb^#Y*XlUPZ^h zrn`xon@8qKtI#eTyDs(sJw9vY-KpkdM6^-LWn)Dw<97BqZLaq&XjmjW%6{pxh-dqN zgx(wRexrf(ZrXk1)BYuUUk|gUpn^@pGr9m}8_}D8ze_*+VH!k^n1}oLm2rQx61pRS z8kC)BpVtFj9en`W?uK`-RX1!hd$gcqF$?tux>se=#z3Oz)dvEZ4} zbzJva>qtz~a zmt7>?9%Bb)YKc|_L|DJN(D;q; z-|;MUn3MfALg(Ou$`I{cc^oXI_QBqD4O}Pv+yTNg0c_ptDLH<)z=Z;YfB`r zrG0SLIt+a1DN{OTs9vAE?iT4ZNP}pezN5U;d$Huzl&LgGkeLEsG~z3t`(<&sd=?r>td$()SqndPcsI9tm!u&_YW>5So z`})+Hw#Lt$VypvMdyxR4#5ym^mk1j_Q=N#kJy(};oKUOgGJ-S`1CC%{t28&ix-PmY zTir+KGcYp}4g4By%9%BERMzNwRL=c}Tc!%fWjB}l6{oK^IZhx9;-_Pgq{Vm9X`WB5 ziTih1St~V*E5oQJITr7nNJw4NcN8_{g?|S9nAtA#RKt7v;<>>HlI~qn(}c;5R8P&M zMDOZWB|-_);*i%KE(|Hh2_z>{PKf6z3EF|k#A1ac`5)n#&46gn&<24P)Ij6J3&vV!*|cY7VVRDGs3a( zZ(;3B%~H!Mxu3FSedt4Mr`oqEMAKh9OiIm!U3RSo^l;;Wr2eRIvE;H~khlLMA`?)m zzWXJpq4-V&*K+hnFK8w&$8}l}`Zit?aUMuBo^=dmHrkR6>TkC_Y8&QpEve{qpV#sp zyaFx_x5$b72L1Dj{WOxQ8<4r_xwBK$AR~4EYje7jp-Es7;dE0lW_gdgF@&orz+J>C zm`vSlxsRD;8j|jYdmuD)jH(pR8{-`ky+ zaoF$h#7p^U#90&uzpDtF|`Jmq`T zr<^%MO_W;tCvSWMC{q&JBi2F-1EQLi;^2$j5e9Vzlw?t#JHPwtI~n^Jy)9t8j*Iym zK#*oqRfjOyAl->2M?q2Un>vs&<&Eop_m9SB^rJ3mgbi@frDb6_JjnWi6`IGhMZ0^n zk_iV&?Q{l3mwBgG(}p~TuQzsgIvUQ?>r?K2SX3Rvbz=YQ6{N9Bg% z4WEm^&j;!0$0guUFKqrzgh{(~JP?6A1=E8^)FAshb(}q84Jbt;@#6ll8vp4o_lXiy zWZFX!LTPdLA%(|`YN#n)z*Z5veSf+n0<{Z{u&nO583wgr>f-&c*v7N}@>M@QL*$$G z>AoWGuPs;GePR9N7SR9)qOvzEkIW}h^Cz^hPQnw^uzb>sl`!rqI{lV^Ri795i~K_B zax6#CS00L)Sii0*Q0`>M0=~8=2wsZxxed=3i5E?E#p%CoobbV=Z_JKY2q&?T;sH@KZ zdEp|qv*k=6spaT%f)`1(m^LeJCmnbGCJPY(`o%P#2s^)B4`V{WkyWpL2)s-Uz?aD? z^EZp>F_H!3o_7?~ohgu%s%ih_4HHF7r8@Q)1ZoZbb~2UBM{C<32nR0OP|I5uirVWKjJM1_a98{ERD)CDU$lc4%fdjfk6Rrs166>FZ}MoXk?n==J|w z)5#tJ7yT|gV6%qySBf;213@;XxOlJSzPBP33j4`e$1Z18YE&fFeit}%PLb3)f(j-1 zfvxT2?Axg!^b;M`S?0u-x7ePwS+;b+AA$-Otf{uY1#N$%nHT@~f|pz8>qd_IkRx}3 z$Fz?8luEEC-`4SKt~5W{H?+}Ka3?O>@XsutvU|5+ zQp>b9L;0W7b-vL-qGRvqW=O>HU=2{sJ+|;$PihBdJGS>m#8HjS5T=fzNJ2ehRbzu#8gijQ zGz3@SuC4^KmEq>>V}{Rhl(RN=cpcXO1u3U(U1>PV+Iy*}ex| zx*k30*F_O*5>EOw#i8AQP#&BIGPj1Gw}>E@&aX&L?8#+E?OMvOR~B3e21h@|T4UCC zJ%rtbcNQjF@ZLj#XZ3+nt2Io& zPbJ-{y4kK;+?RLt>9e(=Qm2(f$}3{#(t4Kb*D6!w`}iXwVbMwLDNdah%{phX{ zONt1@BQ$2su$N)SAHRAX-s`ty#&nm2GJxlKD{!svDvP%n9p@$e2R&~soupcKG8}t& z1J9%E(u*AjnPWwn_UdXib0z=Y=CPVO#a~41N1}$#+1i+(A2&{Yz?+BqQvqv%cOw>c zzrOfCY46HHSRJF-wA>{5%Q4j&cFt%9x0sEb1O4?p8imo+qHVR> z#}#~OO@$@`*(?T*7hka-H(*Tb&4nCs7MoOb)GVb(Z#bc;^1ufptLU`(C0sR$w}r!b ztzcfOaL2`7b#Et$Ugvm!12cm-MRG?%5}0j{DKV)h2fnAcH}jlbYC$~pi?@QXKas$m zNdo;rl-XFwLuR4aU4-~p8PyQ&uGgjdhZ0{X40PgzW6d~&7wYqG$`;2E8o_jQFmfOV zCOHKi&vSlMsP$#wJoJ<*vqB89W%PEy1;gKb^NtgNEC1XA3U!`v?0%cZsnFJKimm_n zH=ViLmvD{>Ut*OaC**_g`A~{76RnSvO{t=G2WD6x_~1Q7SjCokGZ%C?K%_~ER=K~N zWk}9jC|uliOpK;(3nQr>!H~K2yK1pvAYDNm3YPn{x5yTn@TnH^M@KC+aBrr7bW$0Z zx@Ra2h9L1s91E#2ew@7mBA3$Ho%iKmd-IS!hRUmDd#&(mdy6m zv(xl9qtAn+(~dO|(P$Rj4*FG45_4%wN%ARs1B^ns*z(9KUU;~jVQ})c8T(SBu|M%E z_z8Py-1{O993{qm^I+b*QGXHzFfU0HNV5zhf>xhY<9#bflO`2*?D9$Uiq%8<)$y7f zN=zujmZ@aH?{>~46V1V6xzQ2TKPYe%^ z6u~LC0o0r`N_71ajMi23HXmhLz&m0>ldPox>0CKi@OkcVDD{FSB8`WYRkHyN6#MAW z%T1KZJ*hfz?j=~*V69YVFQl)4FFAt&`}QVNk{PvQpSh{w*genq@@xLOIp%m<+}BYV zG8)aUMRZxC5&T1^J&iVMO@lIH=y(WD3+>dSf^sC|j5iKi)RzM-Q`#z-=RA*^)B46x z_29kt1Yu9E@wUcW61U3MKHT2RLAc^`CyWfCDJ4r zPflFgyK>{5I4${Vt)Db2@QBkR%tO*Vq7S}m|c(@4d)@Wmw7tIhqVmH;zDP3CB*R~cgG0)>KU@or`E#x$FsmJ{GRo{!_Fh;Yqa|B z_eFPp$}#@);HFjvHHR)(k4@@8(4iYJNKJOA-uU|g_?v-|)zM4j4-ox{?Ftz10&;@e z&8%JNc{n!88&^+Yjo*3No8|tZa9MYQE9iFG&5q1{pEEj_`Oky&#nfyLuiB5{5;60P z2v5S4VX3?M*0XCDIml!#GoB4Y>jCz;t5__>9^Q1=GyD%w3T{jbP?b=-8ra=j9JNT( zia5k97G%j@8L7DjPjgH#@AQc8#K2F(**#f312Igy)*I|#vv=01&3 zmY?#GXMM-swbuNdzgAE2lIap}6$B62u3DF(DBr|1)Nv%;h8CDMTRbQ9`6MTLqVSJf8YFEF}N zve9{`c9hOxx#beFn`YuE60n3RM3dn-(1kxizprl6Wl67w9BYEe_8c#r5Ce$Pe*h83 z3M*{TB#y?~-7l~m!|Y92%GDsACb3;=A=t81W&ewcFN^`$6c$5PA@*DwqCx$H$lDG} zu(3R1%=Yp9ef)3Wdq8;OoPV+~_{EX!hdGir)3$MaI#p8EiX$F~qo{RAR$zN@pa%vL z*2k+Sw`DChd>inssWYra%W)!yizR)>1?|4mS~Y5_gz}R>g6k5j!|0nX?agxYmsd^} zZmuzn?$ITW**8{gC>^kO0{rN{+SnF>+7RTSar?_C!^scVJBb7eh+vAqt)|`k=P;VD zmd)mW5&?`M&QU-CbyWik3;LCbHUG;om`K;1deY@l{X{H)o$X^90X%RyBWpP0e^yRz z+yBdNPJpme57}TlFsZRe>!)GP>b{soWQhq}(6}~+T7+ld<}Q&{ZNvS^S415Z?_fi| z>=eTInLi+^J*X91kcsb)03~cJWPN zgfZCzSDde5#-?rALk`Eha=#$WNs&Hgk_Q>8MC&OdR6X_5^Mgkh z{QRtdXRofcOY|a$YZX6&D?|DS#oC_3+d-RJUpz8oz%TK>fRCy1XUpnGmRO; z+L+Ckj65FhUm*_%F)#!6O_s15c|t#?$h}Xo<(QhEwcW)dxec0K6xBR8v<tJ7mO||A*9ydJS6@eEuME*+P(2mzwl9X86li#ImB-;%d5uwZxevbYssGlgPT%k7UdkN2V$^1;eW(259Mk-TqHwS zyr75}$-JVa$E#O^y`AyWC*^{1H#zA07gqtemJEecDi)R4=%2NVb^M=`Ws@WQUq+g6 z9n=J;1!DA(tC|nB?&ZgIRuwIM;CkdiIRC()(aaUg3asGwe-^NnT*PfjmZAyn{LM%t zxOP_}X(^S|Ja=fMob6au6*nhhD_44qPgtTRx8lDVo_sjX9%UfIA7h&p0Jpdg?WGVI zf5magdvE;%wSWdw%x<8dMVHaDa}l;3x(fVgLj^DQ#&gSo?3lRl#-P`nA7+ExtV^*^WfkN?K;g@6Ao=5 z82nkHA8KDHj(5@DB+{<()c#y8KO&8It;chZ2NH_de>$SZ`<&PF(vVn6HV$&d4cVq=sg1Q;(`7Jfkh?A0LjA8i?Tll182H{b&ysA~v6$yengOIaLXCq(> zdb*%K$;YGLKpgphGb+iJ@7(_|695FX9w&OOypAKsU~Nyj_99y`}vpFAY6A z!ff0Z2zuthrcr92oyP5+E%!chCL9ERXQP<5%`re3)h&YIEMT54OKV8PRYy7^{2kch zPkSKR<|gBk&ztK2(%|289|L_XbAdcxie&iRdGymXWt_$!w6g9pCbxr6=H#8{A;w&B zs%QqaIVQb-;XhoDd7B#b7h-o?;`ypKe&2F(J&3+kbAe|ZMHFSs8HvaescI}nbb(~d zL^H)Wioi*02Ym(o4~&-KRE2(;idTg|g_VPaM6Lh9f8V+DOo!Dc^hW?~O zzC3;ZfsZ^&^L{&T3rvAD;xvq-#YMTyq??>o)%f6}H?Xh%m2N00B=5fK*+MJP%4_O8;K-{Qj4G z^V6_iya~U*^^9^SnsRjHazA8(&m~*)0c=GtWA+6>WHG#u8sC-*-#t#9j~Hq%i0xiN zK3h%rWG6zNWIDi<4_{=i3xi)QUL9k1Y2T}F^#qkCY^FY~-M{nGsRlyGV&a=4G|MAH z>jLu8zgA0#(b&?VT14OF=kyHBP;nTQpKmP9!1^r94NT>~H{jT`laQgJ!uhv$u5)HR zV;%}U>QSv#T+ryu;q!_^-|1D4X?5-KM{p(N{o(9ar(Nrv^4z4c&daA*&az% zV*T@oIGs4S4$$)Zb;llYh3e4fjH`$h=)jQphS@O}W<|eFK#75W0QE6J|&!cAct|WW}PVLf?Tz?KClD&=1#w#~Z=A~MS zk$o@(` z=M++%nzqqDGnZMXr#`i!e!=Eqc}Skm!{tj{*pkU%LH=wjDW=Gw@1h<7;OLYeL+$Mf zl_ItXMsQngaT^ZP7XvPkhg4}}V%25h2p{RMR!?GPgW@*$ta3#oK%VjbaLxJiX-2#pZjxltY6Y?NVYIPAI zwNISEXsg>UIZEg1ZEXoW;=6p|zZ2d{^y;tWbc)}Jcy5GBCaqi(41u98%+_>Tk4P+qAw#yL@ zW}em(0p;8`aVX#fSTZz|XS2aDNg7EkACI8P_LXsF@qm6B4irQWyZw0;W(2m!Lk~v4 z!Mfx(;iIKt4DK!+!tYOUFdED4=($theZd%%y$?7QZ_G>Jy(wZ_tl!&ryMGGS@v%72 zj;OnRd!O1jb{J0-`+RxV#878Rsj|R1z9O6Hm&x7tb9LE_!MB&1?A593?<>gXJ|Opq zTah&CNS18R0$U^t&Dh#`$7}M`I2k-s6$#;d(G_~5o6rx_?^Ur!Ng*Mi5s8qR_6bS9;+*3gA&a4q9WZXG6}(*21TMb9TgVdSF-buY)j-=KJ* zh)KM`&$GKw#{F9~(bdrf!b}%|)d88BPW%=GO$rVCpefovRw9q%=*M>9ravx7g?k`! zv;(bCK2iDu?qMoY>$O^Y*SDyys9`cd0&w{`b~Bo9&-|&ekGTxk?X*-Lk{wYl{^J8C zyrn*~w-H={PcQvP98RMGra)hn`!n^cZ4RCB_g72H`xgLh#H14MgQb~h26D7vAQD73 zkhaaI-N}=a_@Sx!ltLwiJbrg$%>`%TpKZa&dyJ9IZo??UAITyN@{M%?G@K%Ek?BU5 z?|LKkX>K{AR?24^A1?WQ!oj_z2L2N+D$&amPtkpw=mB$^#s&p18I~2N=Pb4Z z1hea^2e0>#hWH18v&fC{0j8r{W+O)F_~=0WBL-=@U?3>ndsy?yG55@q%-#-n?}KT@ z?p)1V)U!fnEZ8Zd|HE0|3KxxEtB^lF|IUkxYqR4Vulf43O}tgkleg?gYyC0XMkO<$ zCcp_Fa`#S9VnA|F?sQhE7$0#MjT4)dUuxSYHO1pYC%qlq)eS`&6A|&zixZ_h#2S~1%19lhYgtiy6#A+ zCvIT>Y8iw4_ZlwJvj-!ENpiWme6}1%Gk%j3ra4u`}EVX={}Ckj^;^A?|0*rU!aj{R0gR@Itq$cD*tm4DcB zkvUt9n6DaD<6SExMZhbD*9r-VQ<>hjSfv~8o67$x6d9kWhV|g@bS4<{=EIpJIT{(t z6!Y3A50vJ0`mdIU*LFFk^g6lZVTamolty(s{9qJ{%pgx9(CHLX2O^BTd1cth_4&FD z758>y9U&m?D`ya^w(`6t?OLYL5K9fP&{9q}cO>GrZn$cB>6o#sg^?ZScHPDv@2y~T zu*%x10?H!2aCc2Dy&i>kVZ(0X0`fh>QwSUxLK@h`$hY@JYLs>Es8@BG_Xye#G z<#Ris4(ra$==s5pO0`dTPU_`j>2OauS-=Z2CjuP=7IP1^DC2SIvdT-Z8Z(hK^Beq^w zQeTd_c=Y%Qw2Bq7O`;3Br0t0UjQQ<6z)SE8nHuzK`6>2t&wzX1n=9ENDS9SLOpv5~ z*;VyZfy-Wy+jK|cR~d-Jxim4zQ!ni427HZ4T!T~7c_TmyZJp3P51?TAO{TO%e_lhf zb*%BOv-`HbMjvva_?_G|uoKRNCJc8_y7~TP7T#$@{zhg;|D6=YN4C|(C;_b;hwB9l zJ~xKB;5Hc1I3MfRuWJKV&nqcdP6w^WL{n-DuZR5)&Mz&Tzk@c8<9bXtE-6goiaTaD z0@Vm)<79>(!hOoM$LV6OC-WfUnXm8IWe#X=&=ADtA^e?KHWi`c%BER7zO?hebt&{i zmcfPRN9CfS{9~WOY8n^h!7hwoGvNcQNxqX0hu&=m7}hMb%bQHNLc zyq2DIoPEo*kb9G`sBvDoEb$!Hw+$MNM=|a7Spuw?fH38m4yZ2yN5s1wE^jgU`pZ54 z3)**0wiF>@Q?* zShMhY8|68Bv&UsMknD>8C>yOlD>sp^yt-0z2^?V695yN9m4k;xJOCr@PMwUa?;=9lG2Qck7>&;6p* zfi>uEE?CmqR`ntKWYun{ha35*VLO6oK*^O-!By-}$hD_z$?kmmFCXyWscgCaZWMxc zT5Va2J!2_DjQ?-gae?wbAdbe=!G7+SC$YI6=gEA9$sSay5P@QgD8|u^y}9gVqDpi= zf$h|y5EXj|Ur}|wZX_iU>}%hK6arI`l=(HO0LQF{vmky^1{wY#!xt6-mOfnvNLP%$ z*mNyQN-C2UG5%;VY(Fwl_>B=!iuQ99v>lUDLlJgJlr+*&%k^9OZZbSrP5sUT%0c_r z0nyDq>A{(FA!1;2L}B%4vz<6$YQvK!64&9F(>-SOS~TD-At0WeV{^@ixc$%31iOXj&nr@rkTqFKu_V1UC z?yluR_(ZIP0C*j5EP%`NxU_K~mGREwc^b;zjhZ!FF?~4+FF~~{7d%*?*w|aAb9ktfBnkL4`ZXIj z6!p(rO!gcPGr_zV5CX@O)bj;L-Qi4~*+AxO)b$|knrTy4$}C6X`}TO&AY+86hx1z) z8AXKVc*y$a+&@Ne;fRpMuKl$~+gv-!9h!IO`3G;fVjHxe?6B7|=RXJlhD)Zgv)wOD z(W=1IHbR$wzW(hB=st~tbc++DMkh`X{j{o1Uen>CqX%KUAb4d@i?uAkDB4(e6yuo~ zFv3vV_@mZ1L`|j=%2Lv(0m91PChuDPhCQNgC?ezWE?4D9b%2s(FbmxlHEf6myLleE zOlSz9QMB;j?diNqCnW+91kPL4mYOtRGeROx7dcOAo1a-7QdT@?4Ve(7O^t^DK9be! zxR@;OHV#wZsr=eb0X*jP;_mbl0C;>T#GA-Lwt52+gq3aA!iBB2|6B#q#76y$bC4$R%oF1Mhx+Fao)RFEKIjR-C6QTe z`vnbjkwW$2_G7+6GOE8QRvk8GPCi&UQn|rDp;jEj z)N{M#fP3rD6x{Ip^@_bXcU=|+i=pnBlqR?}_U@`NbwLgLu&B3IcxFnO$m`8k3@ETe zUEHBX$j?rpp*Ty{Vj+UBtUnRCD_QaPEB_#`%$({#{HDDUhR8Cb<$B}sDIGw>%Co57 z{&OtQ{5BZ0IT?5SlBn9{GwW=4)JV0oQ|`A1qKPIY7AS-2+ExbvrpPKL5ugmmHRYY( zD>v%99IH=4bZMXOtXNmP&?DLx_i%R@vK27x?ryBJvFe}uF zz}5JBW8vuxdd*xi5WyF^e1nR^uHe#QHZQi9JMX4@3rF-HsoYOImAl-IYdSFh*lxS_ z;<`Xxsz(>1L#a;-qIj3Tfi@DC7c9AEZZ9YsVIKF@)?7q>+mB~Tf13`|J|XzguX^8G zq=wzNl9zEA|69_4h-tnDFFdi_2ljs;#1yGu#<$5y!b$n^NSrb{NkzM{M*<~Q`>Sgp zcAhr1MwK*i`g}LrjA4o9(6i6?MgFI#vHs7uMB69)1R?_=exaD7o`|zd)k6)lj-|kU zSKap5S5BL~f$N0R2?o$!TpU0Ce)}J#QC8$@h6c+$KAj*O^C(}Bx4v(ASbc5ngAV8- zm1LtG&y%Op-;6aE%@k?YYHm~Sau1oWZZ?~Tg;9krQ?7f#q-Mfl1lygxW;kav(>|^~ zO+s8&P<_5QxHW9b{A#F!5xi19*1=p|f(ssT38l)xwLTF`=7YU>Lr!EE$1-uvMRHFI zkZR1Sc#f$5eyPs2NH=hA!s!eqzFWR-AV#WxB8E%pUC2Xl6M)v;|0dhX{Y5+!AT8B( zUCpd@JS|&^@0^z!fZeaioycKc=l$fDiv-|uc zMmB3-)+E8hIyTblA%$3+C=~~2)%J+*q*?d;V9QxB&)NRc!`+i3^*@kB*#sy-@t7sX zoCOLEfuXvkQ7MjxbCq(Ii;=Qj@}-y&q0o)~sDf8nYi35};zJ!bOW6JBSWdB5nr#zN zGX39W6rw`^N#G0hb(pRyFy2b4PSCO6u?RV6|M@NbK4k1r^FuL5>z_})2LI>K^4F%! z4-2Zo%bgb@#nLo5H?UIC{xScXj3i3pA79p=-iUXUat}m_Iu8dp)BrEK{{{+8$n= zZM4*@-oo44DvkY&8+!AW7{I2pRz7t4^dNQETPVi}TM)3N^30Ch_-Iob`SnuuO94bN zO6J63WcV^l`wW$0XTQ${fY8q; zHN%?DH{CV=FALz{Y?GsRzILJPa6TkpPB;Q3M?sfIJ5cg3oi`C6QFJ)*KDEx7lLV0| zTKWu3?{<&J4GoM5qegUP$sp8cZ5AjF&2$3SP<}Yyu$lpsKFG;fq-+$0{BQnhPUJ)c zFvutIN5>YiQ47r^Q##cd@0i)d`kn)Wn0~LzlTQMZQ-U!387s*$tky|9fxQn+#1Y610iPLONvq z;0gDI7|m=8)F=EmcE)Ig3(?oQ%wNorOW$k^C*EPo9s#$+pYt0u7Yuh{;Xq4v`EK@F zHzNn$VfC!DJr!s+l+HNYAK;hKs#~+?=5wkdj~VI#r9vuTH|K>=QqN5P!d@4lG;wFd0t9DSjQM+#fj-iyv`e_MaqQC{%9wFY6rgYt z)15p6*o?J2i9orkvT47FkhGtAJEvpqw*JdTQ6o>e9q`Ax^7qr8B^=5e$!>V;dEfXM z65NFszNrWU>^?59XsqmdAVNWd5bz%)ELL12VtK`DdEj4@E5okcM>3;t(#kI|NwlS9 z+4Xf)S|=vOw*XPjqS9wR$05uK?N-b88K?U^juula3J5U#H0 z&Xps4VSYCX68ZEpLxLkIHOXm#KfOn)-(j{=T-K3GM`m@m=c+6V(U%OYmVbDfmgBaR zL~~l+k+56xkXtUbW!W*!ooz28065|O2&K*lh){+6P$5L0Un_YF;u++$lLxGKcoHtP z#i|J53kg6Y-m47f4Bkffk^3rVB@3dSckkX}>PvAD1NyzwivhDKp1p%q_nI$;;7wGq zReZd1Zg<(zxLWF}<2iE@57kfJCXkmpn}!W7vAwCeUwSbKy5^R}YQNPcVKg^SZY2TR zQs4=pq10lL$K`Y{`zK z9E5;&kxlp2PsPa|sle&DU!AzKZq!jO(}WdkU0<#Hk8e$ok{MHL-CgDu%CwjVR?3DX zr)AoYjMm4Dqe4=_Z%0wy`E~qnRKkakCSz!3t?c^DObds0tbF5gqtRyYV&syK2yhQ% zo~R-KBsz~5SnTy46JUspG=VGYPqXy`*{px|t&A?4EMB?2eY~iO(4xhy6$(9lm!YDE zjZ_)fE&w~=jxem~TVjEp8AE%E&m?U+TB*2C#Ti4uk>MuzbKfirlTAG@rG;Q#;-_WQ zj16^py(n_*pTqXqYq#^bL zE9w9u0Fmt=J=kBXckFa05U%lt6$J?I5Wda+U=LA&;L?2%O|-}*g%ukOebJd(L!Z<8 znYfz+0)zqU&C`LzVgC!y*Gq^x>uBF}oZw%OJk++^PZzJpE}<;4LHfJiaNMZk&(WLi zW6R0|W!F2}{(!72xwXUb;NX9(Y3oxYn|Pr_V;EfdHP7(S$v-7%Ptrm8IP_ zv48FM`jw@FyKhB5%Yt8MWTK@I#?`DMX4+uJGs65KHX$&SGH1&B4Uj_;ZrsJ4$(IVr zT6wyEuCu-|4!AUU>5LYfk#IYtEq%xXPg-p~t&YC>IHhc181=7sitW?GXvYe)KA@vg zBzTIMP%&91!A@+U>Y_sj^;Ie zNUwHjM~W-bFxVLcyk34P;>-A_Q!Y1FU`zPB6KC-TOV(B08hP7wg%>9)x{3VFWRXF%K0D}ysxp#&bBvmHnI z=GxO7cO7knJ;%9|&!}R74oiHVu&il6+2|__@Sh2&l%Ay zj)Q51MZudfw}VvQ1|nZPn!HuR75PCH0LjK8bbIr{rxt}(Q>u(a|92t&IC=u(M|+%R zJnz$4vwf|iFC-UTBD~&`mT=$wWQTKYXk1^R1mT5wX-okNvSTn*K1roTSZ_;r3;T%D zeISArs|-;;b-ex9)4oG`3(V{v_dnd#@WWgJohbC;jx{`&YYh|l(^vbB`nW3=L&SBy zOP@Ab9NV%jcuR4|JPa7JCl>7MMw#I;7(6RK@RKS~^YhKAjbMuTt|;9@Cpt{j=C`p? zkz}*8i>!~-qp6zDE3m1H?3XO8vO9bNMNk^^y^bGUOy(|~Om7VB;}b3ufa48(=D`Ou z@AeqpERWm#CiIR?E?%ARU6tmK7ScV`G+?Vx4%#Ixw0Yk8aYvS6)F_Vp;5r z<@oUK;LC!d1Z&4Eur1`So_0!FUG#_Jt~34jGDCAq4c++)!(4d?D{k{00id4I`#9ng zHWk3Z2W2~tkW>CFr&xA)ZXP_a`OM#C^?}$QBBP(>ZWE^*Kc>j39z2~aoi8tC>;OTA z`<%+ym125Rq2->v$t9ng#R<6!3j5vD<6RoAzP0(;A$S z^X8XD>|8Gun&P+DjCQNNTl%r^eNsBLl}3Vuz>E8|oyllrD8M&@PFena zi>kK@aqHJKAh)YR(5_9JAH3fKl%!=hfLbmjJOxZDuIV3K(>Z^_xef0nKnFbg6|7&|18*?+rGKq$F+ZXv-LjXjB}tWMp#nKtVcu160|MK zmy~uCf5c&OcAMJb5aQU9EvyeLV`3x+Yexam4s%a7>*CL($#wKLf;SD(-`kS(*e;x6 z8<{+HBmmtwJi`EtdfFw+psD2!d?Uvw$uUp8db?e&o9hYxU`0iVtI$vFB&Gn4amnl@ zgIULxyGK8P)hjeu>`N;LW@HdK`73VvQk^;YhZ_EkA>SjP6a1lR8$tf}DgIj-l594A z$A0rM|6||b+aLP)%1f-CHbct)lBS?GK*vS7^qu;M9&nArOQFa8%>@MovDS?luG5Nl z-!pnVTGp=$;lT)9r<=e9`$fK!vW9iC=fkXLG``NKZXL@EB31G0&+E3#0~39&1^*Rz z97LtC|0gcshFn|)u2J`b61`697INOFWzEEXqrF(pC;>(MmhkiDC(k`heFWe8PyCqr z4l1DVkL>>;QM?-Of8U3C&2623Bb#7t72`-Rv1;G1zv92`a$x$HX%I}KreV)6{q5V35-mHVjB9U`{WxZny>;W!*29--rpeucTk|tA z%MNGZ>5tak)XS16tYwNh!s0C>1Mq34;5^cQ6_2x#!ZcI}jCWpQK!PTCRBo(oo!oNv za!DSot}N%4P0q7)HSvPD)z5iFic1U=nbP00vr2tplWw*eM@u+(H<)v#zd!<$3Tm|7 zfII*AIbh?}2J!3(p3b+;6056Cz$g#^^>)0oFg~F1cHsp3Haj1RQoA3sy(FxaeSu)& z#;!|>-h-)p0m%Dsr*>s+~{y>Yy zteJJ}(94jW<0Sqrh`eCTUbfSpkjzBOU<21e8u0)_jbqXspi~Wc-mu>I5Y%p&&6B;C z2=Z^E#Z7JI;R;ScpZ1TG>-P>CIqjAt$j$;<6r(lpp!!$*p#W%64GHN4&L=%D?d9C( zGi_pCc3^V^km3sV&?L`&;AxFGlpc##ay^hg;#U~!=;5Mqh6%-F2JUzwaJ&}icz$@9 zLg_BA4QU+}YJno43Zl}YH6%_>&anU9@$8MgGU%ERA=xnYMT;i4U&nh-m!51z7Fem7 zrGKU_i8e2Rn#t8#pe2NW#rGt?90N#|_QR)rjDDfk#T;wt;koM&xsFQcpqwG2FthFY zMEA8@m-dDfSM*GJ9ln>z9dU9`0Aps`D8XH^Z^!s1wtJ#IVS!>;n*Q)S0||t#nxHg> zCU_bFEerhM25X<`sKMT?E9Bx2ka zwm62 z9Kiis_-E~s1rcV+j}X>R=Of+i|73C&hbjnJRM%_U(7_E2O6-knw7I#kd*rhJ!H_O% z1d&&r8G9nFh>7-7apw(UaHIDbXNmW?kcAp4 zFa*Bt>u=2_mj(KbVDm5ONvq9gnphbaVO6~k70|{SSaBNmGyNE2{ikF#j+qeM%<3LRm22Nv^wMTtr7?QdbC9q z6lA-5G_38SGNJE8iL0~`dA_cT-r{}#xshrO1)+vw`iQ=N#Q!17Z@T~=T8Cv?Hvc@q zE$+pam-sx(R2f3i6_P;PDr#Vqx##DE98_u{KOj@U2+(>MxYY}Ggbz1WGqh{kxzC3$ z>u;aI)U-LO^2(WDy4t0Q0hQXHbo5(ntY@P3q_T;SnxM)q@1hgVp=UDN3)`r&feS91 z6)wxRbH|z(+Yq7r3)8e_vIy|#CByG4@H~8wUdxdK3s^-drY_<1!&69xP}V2VgH|p| z{bAa(9+pa4fW{_wk@o4QSsa_Jk}Y@m)QW4b6>xp~tY^kQU`u{NU$=VYR@kKGq}TZ- zXyA_XqH-_$(AG%@@Y2y25}$8X#%VqZFIkW!%2h_JNpS16D{otQ( z*f3LTkbU_JEL4X)m`SVxdn{->W+>DN09Kj(puBlIx>co<>d?L@0PS(!RLA8%KbK!F zrQ59f7988rMZJrrK2nyQO$+1_w3iQBftOPgTs7heFPWzVBfNgo>smxHLp-9X z3E2L=GZ>$`)W)5>fBW3u->>A=uXb3L+d*j$hny>5Hq)Nj8a#u@C%8Nv+ZMQ2AjBxqm zfWqi+Zcok2T&gSk+iewpme;e7a#Wm7W;dQa+oaaaHeOIJ z4*hEDWrE?BO<3X^q*?Jr^UQ#6aZD+vvV6zyRmd^|b*duI!SAsUE(wDIZR|Kc-vzyy z%S9dMYO2?|dsA@K|6wnx+acOwh@bL?DFBL$u0^{896WXMNGkbx%b@!@7W{BbG8&X| z`GxyHONF6}|iCNcQwccbuA+J#x@gQ>k z`h{jxCv2$O{4T}GX-qiOf$X|yhU8jj*!$T>> zgB8N5%%Y?w>qS-=f43}w*p&Nsxlt)bPvtv3fQ|5dlLDx>`z_9g6>~PE!$7M)kN7!7 zct0-`iD_QR@$D)os3C`Ol(o?FT+#%kwcA?!+_<1%mdPw?%68v^raHEQGnx21I^)IE zdM&rL@Z?0Or~+L*`>K!>eZ+w2z@UnjqMeE&IY7H2RfkO0``l|+GKM*ijx{z==`WJc zB&0GTDU$A0>rlrem5?)3JfK)$zo( zsqH@%JN1AI6vZH~#WEAqH6giLvyTc72!!LZ)T62bV;cw~Y@`K#devQAa1(zc^ z>)F(2R+kjxo>{8weG=KM&0HG>_N~m{R*WuD(vvPyTxsU2_v+v7UOzpH1tm)Fsz?2f z)JakOf%_>@bE{3aqasJ(dfxU>;74?ccFkfj9y? zmm`M0jJah>XDPJbk9$+*m`Q*)InM8Xcmx#0a^=ucYo zesi4V_p@$)s=hMnvExM&7G%4Gw8`|k{azO9!iW2%3C*rBBP#mr;y6u9Fi6lU7in__ z=WkIxpD&hO4Te=D@KuJEO==OV7sj&ywI{AiZ)%7*C8GXIrM>H&I1;!`EUi~RbyS2I zq2lEeWXIx{4~-91bBnNw#j$5P#@oV5w4L{ZB`q|8r3RF;i&dq;uoq3elAjXie(fQ* z&3pKBG@ol`>str34YBPDac^bA*ud6^4^+PR>+(7qkJxAKqj^ftqS(EPWnO+(a{83= z2BdMsfnz6POM?~cVg*xJith;RuBQza!2=&a55w~+#r9TO} zzA-@|?CW3<>0EE6l=N87ZA9;Z?|i#g6tw`FK#3i-0{@YwI)@H?Q=Yx&<386iRL(fLf+Dl{yUqh@h2=k4}WYoNonPx8H9J<_ugPvH}1w-)4F z^l|$6DvReqWv#F}GfQR}hFaY{en}hX()1x6Co(H;azT)`h`* z<3jZK#@0Rh@q^P0tqdzC4I5Mdb#HOkgGq%<1HbFJsn%~1Sz&z8rS=Rg8frhnnWr6Q z$?&i2;}Q?Ye~Su|(?GVYz>8M%k>o%3BXAn4sLiY*=8>)HObRUEbcvuN6q`)?ogi@V z3f>0+>R$4^U(o(C7wqa{1%zC-1!CF>uq4rg={Uaxb#tj7d62AlXW2KjvC zSr;eiioFoUI7E>UPR{)WJZF}YvOH||_IH4T5Tbth!;bG0gM)rIX=i5j4ZOE{GbOX3 z0u^cxCN1ocoirC`AlLYm`;pH%mZsN-q(+Sym~SZr$Pz)#iZPLI17^?adsF?~g2tKc zn>t_8g)uz2TUVdsjb^UJex1<>`vrGjKVl3D`6;n>&f$#M#UXZloXNGPkv3Hc9_uoi z4v%BCiEfK@?{h}_Hlir#i{A2pCuHbpKjDamG+N?7H`{%+8DNQ9={zcuFOeSg>TQ$m z;;{WyA6F&D6Dz;uCpue-h$g|o-ja?v5;RVsh@N6 z>zWQbzcLA0z?T;P#82{?vygUH42Lfzfu75z8Z*jJmRpp3`@$5dZ-${5_f0X6xqlLj z<9hgcqG$t<>-cs02rIky@Gr_!j5aoVtr(V*hSzzMPqqI+i7Jx&3swnZ zD%Bjys_!Tq2eAqg%7nPmtPn<6-$pe}p7H+%6^5mha0 zKbvWPnDqTE@0TLOG^ZXqWu<%MUr5blaFUo5rwEFDLjQK5ekE^oYAbyj;=hddH32wV zy6lM0ux@?_NLc;)YHc>4NN-R`VDb5f-1ZyPn4v=Hn@K}I-gmwRwS*g%xWTcsr4_-o zBE$juypHd#b^Fs_NSBGYvS}BZoz!Nn(t%{{<1~*~TKVBEW1E68Jl?`V7lcQQc!Fp5 z8C(0awmd<5f8mH*Twf3{9j!e<*V@-x;uEeB4w&<2WApWT0XH$P+ex=U>TwWme`zt& zmiAso0eH67E78q(YV~HaA)876A@EI@^`LQonb1bS{EYNt8TV7!HUXHY=2{qTZ?bfle%C} zC=}p(V{KGa^zh78yP>Q|IF$VI!wPs;mCPOFBuNoq&Tgh3&1bj8y13w5L1wQ>nl^9r zI9vWGj#P{^4QucSh1J_{RlFdKu_#RLBx+EHVjRpo&^jBMw#Qn;#0L11{A@B*%+^}U zt0LL<5?ua)uV&P~=d%u<1PvdmmBYs6XFL!NVbff%b<8T7b7*1b zGbnp5QNKMptyt6aD0S)YQoea_Ub`ZS|fY$q|F z-!f6o^?zPDvIyf)gxS)pl$)^Qe5{!Qyg_4r@bib-Zger#=bzDy{aTSG6->Cr`lXH$ zzC!7IxZ*!W(mvvW8V*)yj{kW-;Hs~DRP5%D0W|M(zbb9!#uK|n@T7$VK$&j@NR%3v z+L`#H{ZXtv3CBia>r@{(<+mf{t!G~QJSeBZ zR~@fPp99VmK%5OH4Htd~Ort7U(ZY)k&y4d3i!|FzKScf4D1g@$Ry{R5CPzhXTi<4ek{9=Ov(+DJ5_Rs^fJct`le66Hr9jJ@o|Cu) z_^G& zDetVVz}DVAhRhP*?FClxLN)|1_^t#0!$&q+bLtdwXIPKQCtlhvlw*N%9^oYdq~XG2 z(n_ZpZu2@*g!$wr8ago;6>EPQyswWOU!HD5H=b`A?zkm5AJ;aX3{rW`l^)lOL8d+` zDgRjvF0}ejKd?0q{&I31S{1r%XVaQbJ#5oxtug)SvCAOIw}W~p`BCVGJn|9e{P9EV z?cUiJo`&iD?TpvEb5qa>5&w}@J-MbgtA8x5u7Tp%0n;MOI#;AMSQz?9u1}z``i}t;rwIJi>;t0&S0qtYNA|na6@thjhA@uiUNOVzxw+3 z;Y_{~AAY~WxbIZ5BO`fQxi;Jd$#3Xe$$~yOa#(+*;jbuE1lfz=on-N`_%OE|3DQsM zerPk4scfP8dCxQ$YZyoQG`mQXC1piXG{lRw)hs;>OkMOvt4W2&E!!jB`~HaY$J9mF zoV1CLOXaPiq7DI}^g2*6HNG|Fv_D{}b;7+YW@O$fuofDlv7DV1(3!-I=IEm(jwTQ44+=LVnyYIOFH`CJIiKC%pYpQq$NE; z%e)l}v^`Woo(8dAt>TSX)#=u#GqHIF;DZGmyulLjJc~(mm3VAZii0y%mw*;h4|3d3 zwh^j`rzFUIxNmGa+Fq+)G3<2vM6Cd#!Y@$oCps}Vv@gGGHioq)Q)oxiG*R{~lUfD) zB5w#K;>s$s*icoG+&tCZDt481{Hd4E1rn0lMil{Pm2*tVz`C^M0rMZN62t>( z46>aiecb^Cv8qxtj&#co=y!~iv$zS`o_GjN%eXS6YWSVZxmbTI?@w;IXCmbKO^54Z zXMd$h`P(>`Q)-+r!mJRWUyNi$api6;#`$?wbLa8RF+)GAKggU2K=RcZf@?2WkLs|? zW}bUmf)z)j`Ly{M(&tvIS2<%Rka!h)J*hynW!@c=1mKO^k|?O($; z;nlCC%|)NN2GoLSPz19MJ&p|a;z0f&oTSB!-c^c#M%?7$`i4Xdt)h49bh;|^ zL;IdER)!xSfS(SroOM4|*}Qs`%afyS?Aw_3T0d;)H(nRG-=z9fTxGm1aBp1?_lz8G zFBbHwVDnJdbdq81?X*&{HJQsh+c4Qg+kD$=8d{7UyieRpzX#@?APqT%B#5Mp>e!b8J+Ib`YB_ zxOw3~tOR`}-(Ml%C)Y;b-tIwPp)%DLS1@U9r2>2RxuTiJhxq>Wf~R-|?GmA}g?O z$gC8tu;T6v)bL1Sb-$_QoP+sA)DzM z5u4U&!Z(0jtK=d5_wR`)xl56ah3-U3t{mJj6I<1R&wgdFFLV@61X!qnPm#8s@56Y6 zk-n=Tn+eR!D$VTgq!~ZFeVXL7pY4lAgYjP-H-4(Ic>@~9w*_g;iyi8(hYw6=*|=E$ z=>(vZ(1!C{dZXt(v08zY17LqaY7d(wN7BM(JyzhA8s_ScXN zCgoWDvGCO%idEfym6QzaSiDe2{qeJq(wLqFBo^(mIW;*?`!80NeP`;>a`HKqH zAxIlv=L!}_yRt8J8)_4bFirJ{#qt$oVn{8 zKEv9hs)Ki#PGgh(T=ADwS)gDmoP_c}TuCxl@Ib8mgNNMgS8@TH&$agIMrRgb30#6R zT!yVetyj~^JZq2l(7d%Cth|Z)^#76sV-v$G;oKXqT$xW) zg3=_TA|+$mj4=Bw`yNo5%ZK+qOhKWlxSo9Y)}pyf_1;1H@=_`)QfwSUeE90Qiltsw zl}$4%ln+>OTx~S+47?2agj6N04X?XQ2m%g(s1hhNo^z6^y1bFNG$qIj5x8)O9isrF zob>y?ON9<(sRV{eHe6jUI-2h?adm}nq!pskw*}(+g>vzVqhl@JDNfXpy$ac%y7a_{ z+ELT|dO$)>f&Q1 z{i%p)R!re{G_H{i-nIN{-n6*eRUQme$FYP7fr zcdvap3r|Bw~+<;YS+_fb0T)fX)eyaj2x)204Q%C=v=0WX5`IBGx9>jz+ zhpB1BnVlyL5Li}~|65w5fX0Sg1^ZK-)%(flldB8^4^u$KGk%}eSR-=3*?O5Jrr8Ob ztNfjvq{0tT)#OPvh-W>PR6E3+i!hR7KhKPGAxaNWs)!!`va(ZmNI#5d34kf`Likka z2LJR=Bf^fvk2o*pq6!qmiO7d3Jba__ac12Q0hHK`LVJQT64p8Dd{MSVw=}37^rV{1 zMb@yn2;}{lTase?iA13KfF0Ce#W54EW$mYFU2y_%(X!XV`$4nb{HRm#2;18=fr6_Z-71EVqj zZy4uAO2Cv}YuftjjdiQH*CyqCV|Z5>m;TZX2Plq+I}sL6jK8s&gs{^~@F^=k=#E)N zf9)Y2yZ|8c4D$SnCdDsz_|XPksjEpw6T*`o6NZp&`trr%coB=E{M>7)CDu*1aS+yO zk>M4_o+bS9`V%i3^2kb;k4pExFn_P#OYzU*f-S4P3^TcE6@Pzvq3wbmdh!)r2}UR>V4n zn(zBf=Uq3%M;*ciZ~z9yADMX$x5NW&Ke9nG&SC`LN?TR!*}v9P-LDKDo5@2{|*pS@wv8M8Zp4YJs8GPpzRm z9cjXcqI;A|quQzkWhROpVkLaAdc z$gfLoZv9_wrv^YfS}@K`-y+2yZO<@>2^_RIHJiJWs%v4>MCsIvrw&!Y2jGuCfPx-3 z199lG8;rXGQsod;c>&!jM)u(TXpBTAKtw1LU1ZRT%Y-;Fqz>+;wnEe_YlPcRd6TuG znCpfAQ$Er|^e-E+T-?asL!fFNkKzyoC_fQ#OGw4?=RAFsoV7D~5Op|T?3cuj<0NXS zd|2_Ou2hSJ8sXPwGf8CRNYLBJ_Vutri_`!Ma5I4cZ2=-ZvJ}iu!=-?-D^6mZ|7V`@ zXM~((O!OJTPpp46HWx5x6PNAwruv`0)*{Z+JLMC_Gk)<6(TeBnL17Qv4kFP3=La=&$UgVw*rIC$$>Y%r>#yf7}NRcBO z;RG>tzF;5x4>kt6T;Tavu37E4zHz8FhB}!i1Sn2*Sqv-382g#m39|JYjJRba5;eAu)JDKK6bjsk{ zqn|JGOK2D`jge*1?YY$9uDb9*;1Uvpz#St^5O?6*ztx8em zkxo3W1D>KD*BLu$Ubk+a^rV8f>YS5=Yn-jwVc0dH?Y594oEfh_gXAI#E~efwHCoRq z7cN&$2J0TCDuIWZVTS5#I?zNLh7zLvWqKzrsS4|FPEH(JKHn+Nq#hW8wbd^19 zE-It5_=Q5&dQDzL8^EGJRNA z(VA{*kc1FLpwvL^TDZ9Wp1VJJIi2ke+v(QfVk^XJq=Xyi{>i^np4v9ntA$lAampT3 z-F)gcR-XF2zJhUO&CystP#b1XtR{HaXhG)N5#9ialr1%zlkmW9$rL&>;M;CVYW1~* zm8++XpDEb*oEjhN)qgy^b$Rrx?WCrNVh@=~R+1j{t6$TW;bLBR{Hc^ij}6J+G%^g{&}={sp}*;={O7iMP(!{3j| zc$aMmcI+z;Dvn~2aMMt|M#0X@{QS#U4D#ONbk$0A`msztO)sJBkq=>&n?2#`{+5^%><3s2 z>oMa5XBH@5*BBDovWD|p(7Q0v`H7a#cG?4K8S?7H zkZaM==vie~Kl^)(4b*F@fqfWEOjW3*!j`BnCDDE=Jx$T@ANLi@9x?KcHrd|asP#s| z!qkc?y2!h--30U6YmK5IBvl=DYFN8@yr#fySu$wCAN57{aeUo3$O*|?ORaR}V!T{n ztxJw8%~EPa)tSC$hVrfl^$!3e?+jSs)m7&UW0k4T2w1dED>_y-CV6mH!^bW$+D%p4 zVWjrVw9Tq;qRsL=voTM2d+E{62!t_Ctj+_7V4lzCF>A$F1-h#i0J&e3sOH0s#R+E% zy(0_VaQ8L5FBuK(nhX>C`vo)XqOKz@l>OwLstnOKgyh0~SZML4t5l;Xof=N+^=Mnc z{hQD{B8K+$hP8eV!=@0g4R2}{T(gI6V`@e8v2G+Aa*{`MqW}jkI~-=??NcO`>mri^ zxi~VevmAk0IWN=6{yQioGAQYK#QP(kLwdBCAEREM-n8^_Sw(Mp*TZ_`lM-1VBoxdM z?KAY{K{%R&r)olFf0alu5UD^&i1~zE=3d=8gKMq$QHyV6lWeOIS6T6O-TqPM4#dbB zwJ;m+3+A4!Foc$fs#h{!cU;R@=|WcJN|D#IG-;AC)?i1fjtx5H+uO{)u58!v^mV+w z@;g)wGFNVp@HPL+^gzr*Tp$|EoT+%~GgS4P%+|2Qsh4Pg)lK}b@%pZeL^rP)Ws{|W z($lA6OJ8jFqE#WL-VNX%)f;P@NTlMtYH}p9bNPc*_ck|nQMu6tV;$?vf+i|z!sV1b zgW?oO&>8*8$<&N@&5h9eXz~FJFvHx&9o0=R#69AybtXLzJY*RYYER)r!Ix~u>>o-s zd&$s(Y-b<;-fW4S> zJGYI6izx9AN@!M3dk0oH1O-AJ{93f`R;X{~9#*hWQX?$NxnvTngCEn&caJ_h63ARA zZFt$K9XUWOjaK$!XdhN7l+G44vh=9IE1Vq|dAQJgNxKF=8YxSPmD;oikUAx%Q=$8s zCp-!D#sHxt3WHvh_FVSAMLQbF4lJ%SpW~@V z8ge=n5i+|0Gw*&WPnfkoLUjl)KmIhJwYw*+u44K2`IR*(56*ZM=dMz3d-S!6pwc~P zUP;?Rkw)A5)ROw1D`RZp0UK|ob(hE?SzdjcyHV7ZnU$Ga|wEgv=B!LoCW(B-M# zs0Aab;x&Rg%D3K=#OTyOgqQ7-Pg7Vr0Z*Te&0?zpZF~^njAd!Ovsh23FQ3BDpVk*(m=5Fmo}Py29$Ck z+$Tm4>{Z+KmQ4CR91X6epeQ}tJbSWj6&=6*g4YT}0)ql|xKgP8u z@KGx*<$7VVD?xFHbRIMl=6YNP?}5G9MO>;;Y%)cW)sQ zcyU7>wQ2Hh+5b8DiA>xk>!f<>|;I~4Om_g|;U%MPOAm{sEG1UFS`gY=nA z4)J>g`&gD0SQ4?Jc5Ep+z|RqzC};GIx7BYu0@@~1r&lY0>~qFz-EUId?qvAKk%_N5 zQ7mq&-{^!tCj3$twFb*up}MTUu5xQ`7v+(>L$|=3!_@yN*Qg{89%K$HabaF=P8UYE9+psIG^YbQ(2y+l~lNUBzMzsqQc@5ao) z@^ITP3327?h^!&{7-BHdw$jzdocz@{GeW1O9Rg79JMk`n$U2uFX9mu_f0IPt`+`=2leH{)bg3~xZwtQEBcJPQYL*F<@zsBKIzXMgnyec z?6yZstK9Cl$hzpogkKNXzVk>po!(FEoi7?Fkb7&QAq9h2uLieh@WM2Hh*kqNAMMHm4G22q8Zl=8w@Q+luF7D;ew2MAr`&gaH`uXTE z8jgqbM?u?Cp7q=8XE=E*R`P3;CW5;m|I+q86zrHt;g}TZgvD>vQ)l z7m7g2xe<45E{L7a@U^6s?-1P^h_&DLlK<{Q(OR;3W&5_mfm}x+tj~3n4SEhNRf}@_ z;m_m09X557@VireAO|5(vKnLJ6t7uxed0w+Gc{JHPsegDlwjhBic_c!<(p2!xr5{i z6_r;dV{N%SfhEfB4IPd`zV_`b(!_j0 z$sOBewI~1RoN(zvx5Tw;HCLFr?iz>ZeAXss9OJ42NaewRx~3NS_;Wnx(5%dJ_EAuQ zgbTAa#3_87Z>_c~@USQaXawqtaZ~^YMG zG-i}wiU#QmkpJc~sJ;gmee#cn39 z(*hQqB&TpPsOL0Q@p8&sl3(T3wrA%P=UQN{&rlGsV!h{F#&X*Bq21LK9?swX&z>uC&m!qEFrONwR z>wNfOc-ZIXbCxWAg6}evHgbO;R-U zooBp2mLy=dIX^FA)~lb}ESI{83FH^@mp71;t+qY|&RWOdPmKL5Z1i_Xj8qni7|xhH z?7pS1@R+M{+Rl;26+eAr2&PRs%*;%Sm&Sh)+J^qhborHa$Av~eGGQw3J(jCvPT0xh z9JUhQ#ezW@{T4!q@`D=H{3qP&08=a2vlT%$+g(ZWCd$blA>sO1+PmNFtMoGLA*|E4 z`1dMRAx8d;jfFEbB5qMnk!+6-;^3=lL9O{~g+<%Ph5Ir)6jdOZ;r;)|0!TzAgmmB- zGG_bbw`!|FdOqG;u*Pb9Bsi`*yfJ4q45wHuET}T+tuzQSmEh4~m}luN9nmJKz?Wov zxBV?+Tm5hPA;*aP?tYcSu)303^Rs`JcQTYCiBy@zT$Ta+C1Nw@^Lv+`G>O{v5W=E3 zttw!Cy}y{`j2ok&jZ4vbevVoEv$EOnDEo#&u(x*GmGGS8oZuYW4bId%FK?+Gl8viS zGSe^U*NXSO7x>Cx?1_;l>Vc0Snc7@l3sZ8if{L&!Fvg9`oIEFl9C%8TNp^9P*VOH1 z>mC|D^B6`*)CP2Mk3u^!{ZUoQ!yV|?(xoSpdZ!xu@2Y*($Nj4YLqDX-5U6K z=5Vijez`8&9Hp1hwbk@z+GO(_s8#~!|H^snh|+@_wk(s}6G^HE;I@=+SXPZ#`F1dx zS>}Cj{$b(|@5_6s^^AN~uMvp-%nnPG0AynEM{Z7`an1GZcju(BvNz)w?HK zGW^pGz8{3!OBiLEqU;oj-o9HmzN}& zVh^wMKBdK&%YDBi%LLQM;m7Uv=o=aX9xM-R;Z%cBzHMEyo+MBqEZO1nX-eYfX@r=& z&qu$O&Bfc2SDhW^KlO70#|w-#oJE)zvw+zGVwa!UdU=5NG>umS`;D;@C#eGilxyPY z0ccBOcPz7|N7Wxidwi^H>-F&ZfyaUnO3_fhhG~yi5+^5+MTN|!_uAACqV*||)DRcp zJRoO=vY=UuhNNSppwi(_2#Slxq{5?qOht6iZM>OnuFf!FegK*}+s4#rAPSpSd6cfb z4d`A6&=5|*{MEOK2%Lhx`XE`ymvJkxq^D|ms0lh*X$S?Rz5gibbk^}A0VGEWK_g(= zY^afB5+u)IRfGaABF%Ro|5?w__%FK9Q@8KZ@_%^C=g4x#PyGg4O zn@KzEKAdbl3AA*}DHJe0{+`Pnn=9m6X0RH<474JT=+`o;ZEOGDbtB)Q6K4+?^XYb0 zkvrG;XVC4#tar-Y5000BkF54p_1$w%=bocok{9}qrKc0lU$1_A_rprsp z%A(2aG9wxEE?^Vth5B;;87cbTZU~%Rm^sq_Ny)9mLG(dmR++oV!%nPJd~;Ic{%B(D zR)Gx-tMjK?x{|nlV3*9R_o*K%S~Fms;eY&SO2^jVhFnF#439-Ap#Ggp7FfY2`|o2} z5jh$HdrEM^nFW$W1FEcvb(6cIC+M9X;U5?~jOyaEZycS!(!RDTZG~x5kuNnd#2_q_ z>wo%Lrvz%_*WS*LyC8BWQe|76dmVj;#$;C|Nct_qiWvFY!y$|LW8aIzis=JB70L|5 zg;45UMsJ<3j0p4NOw$$r%S$OClYSbiD z{g{n!uJ=_CdGj-SQd;q0+yCY<$fM@D<#-z?LKO7%^o@$~3-4GnW83yctp4VesA_vZ z20vvjpDfYW@d>c3H$3f<&rN8y28tItNA?zYF>Lc)o4va=hE80xgnZ~;S~Sh*ml9do z$h!9UQ9gGyX`1hzx?E2(y;#$=u5=6TDfaKXG9|iT+1Dfz0IFQf2>0iDg7yOuI#7O?#_a z|7)CGA5|v}iT_92DF164ot!l2II8a(RM~8G;SCnTcfaCy1p6;^2#*~pkv|4|X6@zg z63RT5ei}|Y95t_6@4LGf`JbpKA>LnhR{hWA;h)+H2xnZ~DHjxY6{SzdxBcSbN0D6* zwj+i2oC>=IF2kZ3w_34}$1^gW!ub1d{yqP67+r>Bo5qW;aDKh3w&|9r+=Z9EU_>Me z4uNWhiMoO6^}gSH)|$1 z7BjxHHS&TnxbqH+l=I~qL~@vASh;euA*a`C(gp^s=7euqIbehVUS9ll$7ldRe)IRG zdeJNEQnaTBcZ@dgxuE_x$Xw>MbA_MnwMsJ2m$P1#m3ZxXlK$kvK+L-JnrdeYN}j(2 zb$d@Q*E7%7gj}0)kWR)5<_q0hzZuhIlhHqaeMqM96-zR;pnhII`tYU@eYXt{d@YR>u4kN*^a?p9d zo@h^rLv8R9UrQ)~#qEbd!kj-WXi25^TF4(CY(eIy+jr;mL7HiQM?eN9yPz9xXKgzY zA6Wwukw%8k8l4%#&-}w{Mj6vJk{(_~tG@8}H52cybpivz+{L6**(I?1bW@3in5|{j zz)ga19dD+iMdKDVV5C1l2qczlh=1;hz3HuU+FB9CYd*zhrdXj@$^1kdygRHfc_!q$ z-XEt;-n%T|{}8j*HAZyS(ZOba{-Y4}`uklfPZOsKiDt@0w3Xk@%XTA*&qfIqkow%G ztNP0cMN2oUi+51szAH{$Yv8WP^#9`Pt)k+J)-G){!QGt%4;tJF5Zv80xVuZBa1HJR zcXubayL(}UTjAPuzJK(^Kl*fE?!7PSrp9_@%{iae7W05=k86uKQ4qDQ!y0KbuZFgw zKqx9bX1v)JEsb1S_;Bo@ppz|&_7WK$WO05T@3uwG-_9i`P|u8VW=Q;;ir8V&9&4@S z+igj|J&Jc04QHSz)e%u?s!F>wD6cZ;NqPYO*-HC6#kWp61rj(%U#Z{0b+*yQo?rp{ z2?aqc3@v6rvrZ5~xj^_&A2yH<(9RX|`|_KH%_1kkEUdZ z9fe#-AR2;TlFQ;@E9MLs6*{G015vJ;XBl*P5BkQaX16>*nxFsQ1<(KYnEMGP-SLG( z=`~^hk9!tgz=pqc#(j}hXiz#`)(@zKn7t4$JL8M-Dr1bGqdc)0hEvskg)@8LsHASc zDS8tC%zQ@J?QI3i-_Q}_FTjoiU-AMw^vaJ^_j(h2e~+=|agK;og2%K{un<_{%WT*mR(yLS)Yar4rz}r(d!xS#&uqst+PbOZUP1AvP>e`w_doI>ZMutMa|lg%UX6bj zB}stE!%T}u%h(}oVxhC?ZtQ!6qU6K)g9<-iq%#EypNz|2pOXx?`83rU)Dx%zNC@fK&B`D;Y#=N3fV#{A|Z8GZMYSN0A4QWe3l=V1IU{-t{LD9d7vb6k{3hO0hffv3Kl z$6p(;j1_h8qN5vZyy@*G^||r!Zgw8=FGr8V_cQIy{)pivNyhw9>F?ag4-I!ZjPFb$AFf zVD5&qQmP^Nq$;0W2? zT*N7sFGFuA_CSBbptE8=p8BvwYT25>>3-TY0Imlt6G))c{PRf7CReR8L3+y#EZF9{MddpbU;Br6OL&7_>*N#;atr1yc?K`x#igkTj|MjarXk&4$=b{*!4A8 znNrW+OfVv~dH}?^s$G$8OvSuurwC16so+JpD)Bb6r6u<$Fne;MW29BIAv4`(%>cOD zD<#gkk$IWFD9QqB*mpAFl>>BtxNZk50gFb6AQ3JTNm2EuKuX$>U+X}KncdodN7ZPK1iTxn!Z7$&vkak zwUtp6eiuGSoGbDzpC(uIvh+~#qW@5{=d4oh`zQ9;5Ct(*Z#8mvL$7; zK%u9Q%Lth1d?`L;eTcI&KDy0dbyCo8y&p3G=DYZ4?3VbNwgu^Y;@VU^&|C&9370sl zbRX<8xYaOo560OQ%$$JA$d0`tb1xtt+Fg4HwteUg;^F;grjm^y&j(Y{CKg_+kG8fl$-?xY9f&?$0xn&0(dI2Fd%0E*a9T+gdyy`)_AJbT91>$o!fM*To{eW?f&^uAS~% zA+aAY4Wd%JuB_ENuh+e>LhA{0iI8pJV5OLixQ`p5>9{MwEZrrsWgoCg09g6LJkV2HL%6r_Y*d9#BQuNTNNH+M{Akl() z9c@jSRFFm*k22VOQW ze)9-vnZ2g)KoZ#_5@>o?;%E{$4JoId@s}w)o_B2hK}bPj)Rs8}!DX zFY0#w+HbzJj0=g|yla^RUJEyccH5rtz%W6SFqR1TIjBGl8gXpsv@O9Dh+9|WO{YzCC4V;y5rIKtVgol|qq8y584%VbVp;d`sa zPaWaSwKQ4!Nxlsu(&o($=54cX2aq1v7_jB4@qiX2%d+|k;tq`0{C5BF6kXEkLq<#2 z!BdlVSbfc#0;SO%yw|2GQ89OTEdl7&dr7HAf6J1XUaI{;&_kf3O}NeW#mbS28OSvB zWe^h6{OPsQZfHBh-}0`Znbc)9bJpDl#MJmWOO)j;s?T!_GvS)gR8gj|JhSj_^7fl5 z6wnVd(xbVOHlWEJKAmB?<^GANW3Y*?DDIq|R_=J7oxQ5DMNRzGP~QMJ+wC_m{cPe} z6Lp|2ESnqELwdVftk}f8<@puMYKV}_R*Pz5K_GEwjyqGwg538CdTltW&!(me&%pob z;A%IDB@|mxfbDhXvoOLiiHjTuX4y0+nEQPwhPeNaJ~Isa*r=zCRS{0Qj3Ea(#d zvyxPd+`$*{d;X{QCdR(0I9cJJa{&V9e*T>w`140N%{)%pw!4E%D?B7RUe=rP=nU7d z>6rW%EltlF#eREgh;8u`N6_6BYmZ3|6T(fVu4wDEp$8z~BcM5~f#n>t`uQ}JPK_vx zh1=sdxwVuXONwT8D*piPRr^=Ne+4kZ)Z^=H-{7x5{bNPly%d+5r3Pxr7;eV83TN}% zIC#H%@p!=Jo_J?SOxS+BHxkHs&^>9Udayo`SbHrVmkWESl_)yb@O_+2lOCY%7d`cB zpUGp-^X85{L>@Ctk5Auoe~vt!b8~x+qmbUx?#32YoIv)?y1Rrx6#F_e3NyjOJFlyT zyv_fN{2uU4I>O^`%HKD%*KnpmQLHf!^O^1dug3xbXJ0!Wb92AeD@Fuvj*WYKl@2>q ze%Q_F1XQ72=9L-y9+S?WzWFSHbghC4oxTTa)T`Huv@{4w-)^yl9Co zK@i?u%gkEaP9>R>z#wP(HO}Z?lfw{5yUtK^b@Z26^$wkbS+)4ZMl*7+t>VdF=bSEO zGx_wyz4VFx;<(%NU1_VQOmHc0g*$umQbu36f+v}8_9@|R(pPw`)6K)N%jf7%isN(_ zQy%yJs$=5@vrvs>D@>ofUC(XT511z)txg{)I!D;GsWltFHaQI>-jj0e>97uOvc<>V z9x>q2OZA8|pC($Moy+&lMzA0>V<77wN*S}{q}e)D{ha?LAFXN^XS-2fI``((`n7sU z_KEClbsK@~o$G4Ga4mz=rVsM}#k}Z#xnOlUoVbGUn)tEoV`5_1VgYFxRzr*Xg&Ecf zmT^MQmOK}&dwmcwZGq$MtfXYVg6JegUb7>x&r_<>iuvZ$@WY#fQu}ihqm>85swtVE zSjEe^-~FTjz-Lg@gr}#*(`_bGjx?A;5JxO3bIP;@i01lL)G-J zqVz>JWZGP{(?=i0L*#{1L|VUH-|cGb*wo*&l|Yjha5y5&N5|ZA2RQDp+YK?z|U$ ze4M2v#3IKERUGk7K-ioQKrE#C$ylMfE{_JO3kPQ1V02Usef*|dltW&So%l^gh%WIHERC$YgXX0zP00KDW%NrFe%wGKY!C+PqchG?tVLVi4*r+;eyW}s+$s;|M;6$hAQ1;TAQ_iHwjoJ@0vkl z%|1RCUblGOT4*ENh|1f1r-lGg7EYWe!Xt!SKe2YpYo6_KkeT>M>mp0s)84c@dqo?s z_ZyS#t|()`jyGhVCs}np-%_F;xRt~l>Y^a(&Yx4nw3SDg>1G$iR_s20D?UHtQ&_&q za_i>tNaE5|k0(6^sFWn`5ceB14z7a;m!B~k=ZP)`GGq>@E$Ps~| z6t8{k#Q9}9b#Bb1#-}`NJl)>x9;7ZQ2P@9m*xS2&bnnI2-P2vow!?kR=S6OlKCuCy z-+sy{R!pwGtDjE42<;xGeeVXfep1qq$zC&@sQ9*=0s`vQGJ=}TRt)7QJ4{z&wUJeH zmE}P=yBOggGM?eudOZVi{xdD9o^6NPpx7X@buQ43FKO`@z4VFwYO)UNot#WbC}0v<2LUNSf$Pn40%yPIr+7{8 zo|4Dn2S=2%;$SX2daG3()9oMC5hjVTv177cZtl%Oh@GwO!O3>uR$|rIfYt(D*OHjv z&?E%z9+s!ge?m-y$iOCeJD?ZG#t=CB;K=Ikj{w<7C|vG~ea}NqisSfYh8S{)+a-D5 z0X{e2v}YALx6P+1xGMPkgs#w|)FyQKxFi0w;;w~c*&os@I8^^+Fnsd+X-f)wtZ(+} zs>MB=XanijJJ#zQgf;fk$DFEMZF$H@bYtn!ot*634c3ISn})rz{8=7fL?)X{a=aOy zf2juBO=a-{>&Ka+gyv633*q);{hJya;p7#|-R@UR<3ji^TEAL1yNS0RgXHt&6Q&xU zJ?CBb1EFCFra6tDZVvvqvgi6-c5xq-oM+fnM-aIf$krZnD|}ZlQP_5Pc2G^ccm+?# zt+|%u-ygcA~hq=+-a*gLQm1mW?; z4^3d|P-yBcV01f4i{XrDnZjEjN#J^Z>(wYN3UqJjBLmox=ie3Gr@|#2;&B`K+mQ;% z;bhUbD({&wW>_H|snnus+ZK<7bccJ0E;3St(E#DXm^`J_g?{HuZ;ElI4*yi3 z%ofy+Mu*JAZO?Gv@v+GSbO)Hm1XJJ^FNE$ zcrJ~-#nN7KLN>Giq1BdL|NI+BcVeO4b*#B=60amJ0hnl|hGPrIUGfL{piZhd0z>$Q#8WZ3xB-$+YN7#j z^E>_FKEI2rYvt$i~x*Hk@@MneHQtB-GTN8-H}A;rku|D zaxNeJ!h&siko0ir!7(-)P?d4sX1ylv8)slfS&JJ9XFUxH869B|YvOk%mi%#H&hOD3 z*9H2w@HVVorA&oAY)+?@Stri(7WKtHu2~MLXMr1Dn7QUH@5S_t!wb7dR&K^ba7&{Q z#f_^kaSV}xte}mFaLbIlH0i09uI?9ep*6h|Fxvzr=N?J|T}Fa_*l3f+n$dR5FCm#u z6cQz@{mig`ZeQU~O+R>P1|{V;{_OK7u@$1r!2c`O9U*#{H%#H!hy<*QH!AA&G_#_e zu@6UF4_d8YEaQ(j!!2~Q_Cz=xJ-VGq3AEx#nEbIwz>3veKjE`{TQqm7au%#7GnHH% z^p|yGX_maPCbe4$d>WVE|60Z#E)@IObii$*PF+tdlBZDM!J}CKeH7_s@OpM26mi(` ztDO_7dem|oBPAahQx0$z4nP_Y{s-v>!BRw34iwGjEgG?#N3@H9u+7X915 zENXPyb&V6kWr?LW`Am<&r&e4Q(R`<@?oN|7D46Q%7aO*(X$Vnlw=L7AJ-9=a=Z;y7 z`gQD#x}$5b#s{tZCgNqJZ)wGC?)CS4 z1e-Pm&d8VHmIDykUdFr{(tUJC9H{PGr&?!w3-}(HU2XYu1*kOir}fxLOQ!U zOM`6I&kLcSXLmbsZ|qU+wqU9?_17@E1N2SJm^f-;#~!-9c--|Wd{ecHiAj7Niak8C z5+}@hqS~6DVvi<^!zm}Edm6M1mumrvUlf+>A!b1)CVRa#?7diXxs0CcD3|ePX}p)s zBHC%a;i`EqE>zELzQ-vix#CNNl++IP5g2>0=!ueojI_w zVJdP!%S|u9RLZwXtL{sJ5%;F6Y8soV#3K+rfFOcZUYRB_A;J+1h6qmk|NaQ>E#iLb zr=TunvoLt}csW)KAo_$(MpE5*Gv+uaO&bD12@GztrO>vOkhi89c+-*zuPUO zfxXz&0+@xymkEFWpQGO2ZxES!)i*l7(cMDw9{xX5r=1&{r58MI52e$ub;ohK9=}gI zeUE1HUM)3Ql~PW~;mD4|yi24XvpX&oHx*3XA1RP{5|Rq~35qh+drBU#!FzFNE1wd4 z6JD9FMa&*I`bme84s#?5c1H2n@@vH>gBuhsFsB5oB(*|WT}QvawlIc>?4~($^xg8h zDX)|`T3pZFU*UC&FNu-))_Qvo#6A`N*}GN^nvo0o#jfK*mRP(TQdUSh>=MEf!J`ur zFQ2#3eup84;7#zwsiX_8KP;`(=kB`sYVKQTa-Te6#YuRC?ztsQK2*Zo5d3lFSPF8tbT`PXSAo@+>4Jr+NeihHVMepC zK=*((t{AS#P^m_8oc!v)Oi}nvsf{yI^RSfqUxQkhj2h7bgd`OzQA_^%%^}Z80bLjr zjZ4@5U?E_6Z(8{X&&32MWgrpG2Z}87$Zpx7*Zj3>jOF7BXr+xNP{%GN3(ghtg^1ng zv@{*t9?$Qu_};BQQFJ!=Tl1m)C@qhF&_5|Pn(kH$?8MD;FhY3{f2`!K(N04>Lx$dI zH5gp+MBe5lr`Sm)u@W~DBoUR|wP8wgu$VaiwL$jdz2RDw!(A_;5KA$nbUft}$$@!= zaQtL_TS4S2;_s3^Fdxkv!LvQrK^gs*vSzF=)|B18TwmgCX%$%Db9NlOkaSv2Wiw{ z^y5jD7EfAFyCNsDZ2OVV9S&`#0&JdxYTLeSN^8jn?Ppua!-VA@4rD zXZ()9W?&96zZTr;Z>=kl!h_(~-K1r{p^_V%*RciE)hwOh{ zqkLKkbC-=qCkQ{pze`4NfenIjT)wJ?b4qS?3jSwu^2`i&fSj9X*Ftl3!`@$4>(r|# zpG&fxX{RfZ6Dd>|b&`0TTX||WOO5nN3#R%@T}1`u*2gvy&IlCz6SX~tzvog7qD&!= z4z$p3JEU&FT~qIivQgDc)?`iQ}wvz^yHiRmhEVSGT%7(=Wy z!*UGC9ykXwCaap4%sZsLI@J!H-4ooSIKzDsL$?xJn{S>EkMHS|-p(jCm4J-BEzW0B zv}Ma}Fl}~Z4V!OgWC_)_ZezdL+=wZT%PqptnDdqxG3b4E0Ilnv3+K5nW2`Z^-hE;=Tz&C_-u>wP&;h`OMnK z&Q1VFCnV=(@7pOyUQ-*SZ3fpnL}4#eC?{5RhN3Uq(X&5KkiXb*?9Dv9%WVl_InalP z{M$yPKkASdw!t#s-f$Fq?fN?k2}_3rY7SEYRjwXF!nzkBA?SEiC;o4zh5^xzvfYcf zlZOsCP5;?v-=WsL$(sn-h*TFESZ>q<@)Tze)L(vjz`{L%L|lkywU0e zta(4F$w!psCeSf>elzX<@2+{pAc%TV2+`ZHxA8cvUdN8#{Vd#E<2EkdAMWMr62;RY zno0nNVy=+jd+(j(#lLrqpAtAB9Js&5`zl|3H2&?;KG~gK`UgA@Z-4lhL6s$?wLFnq z6G~TOjIS!b<9U{vrD=GHGmK9U8g(D1FT2m`qjQGwli9|0)ZMY?BT~gl=Gr5o3vH3$ zP5(R^a=LCp1>Rh80KQ@@M2jE!Tz;Jdy1QHT^kDoZ5yfxP8zNK1=OkalSgO=UP<}Y9 zW(B*3XMx+p8ZZ{J{hU3XbAu3F46TqAg|>-8lCbQtF_xrbnZ!!VMeL}9mep&{Q4C)o zz?SEi7-QBxvZgirjw zgLi06)+&3mI^`U;%etke?1B-YOpg0AGn^c9&N|_t;1ZES5L|0`^L-M5DAe6VFcT0c zY$SoCVm+c34iCss#jqv|aDDg>KrFVUTxOZ`vsVMRku(+tVuDK&1tE@f5aQ2?vS{@s z-j<8Dl8*L|_|+dpnbXomnmrx4Cy}e24n1Juc0;UmeL}vmVh_# z2Cv(Bla@^=TlaMg+oA9FwLf_t^*9utszGl#H6UI`M6}Mjm=FnyGt?|!GDg(JF`5Dja zmaT{a6kp&2{EF2a;MFl{x`@JJERTy0ee{XC_tc_d8W0;_-%mb?wptp829+j520^