From e2b11009e60b53277a997561369a3a060b2b20a8 Mon Sep 17 00:00:00 2001 From: Ethan Wee Date: Wed, 18 Jun 2025 22:32:20 -0700 Subject: [PATCH 1/4] Cherry-picked commit with merge conflict --- .ci/caffe2/test.sh | 6 +++--- .ci/docker/requirements-ci.txt | 2 +- .ci/onnx/test.sh | 2 +- .ci/pytorch/common_utils.sh | 39 ++++++++++++++++++++++++++++------ .ci/pytorch/macos-test.sh | 2 -- .ci/pytorch/test.sh | 10 ++++----- 6 files changed, 42 insertions(+), 19 deletions(-) diff --git a/.ci/caffe2/test.sh b/.ci/caffe2/test.sh index f224608729320..a8adfc1fa0c74 100755 --- a/.ci/caffe2/test.sh +++ b/.ci/caffe2/test.sh @@ -5,7 +5,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh" if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then pip install click mock tabulate networkx==2.0 - pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx" + pip -q install "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx" fi # Skip tests in environments where they are not built/applicable @@ -151,8 +151,8 @@ export DNNL_MAX_CPU_ISA=AVX2 if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then # TODO(sdym@meta.com) remove this when the linked issue resolved. # py is temporary until https://github.com/Teemu/pytest-sugar/issues/241 is fixed - pip install --user py==1.11.0 - pip install --user pytest-sugar + pip install py==1.11.0 + pip install pytest-sugar # NB: Warnings are disabled because they make it harder to see what # the actual erroring test is "$PYTHON" \ diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index 89960035d918a..38d0bf3776ed9 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -261,7 +261,7 @@ tb-nightly==2.13.0a20230426 #Pinned versions: #test that import: -tlparse==0.3.7 +tlparse==0.3.30 #Description: parse logs produced by torch.compile #Pinned versions: #test that import: dynamo/test_structured_trace.py diff --git a/.ci/onnx/test.sh b/.ci/onnx/test.sh index a7d3b72c62a7e..d42ca2c218dec 100755 --- a/.ci/onnx/test.sh +++ b/.ci/onnx/test.sh @@ -19,7 +19,7 @@ git config --global --add safe.directory /var/lib/jenkins/workspace if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then # TODO: This can be removed later once vision is also part of the Docker image - pip install -q --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)" + pip install -q --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)" # JIT C++ extensions require ninja, so put it into PATH. export PATH="/var/lib/jenkins/.local/bin:$PATH" # NB: ONNX test is fast (~15m) so it's ok to retry it few more times to avoid any flaky issue, we diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh index 7a5c39e36c150..13dcc84cbb547 100644 --- a/.ci/pytorch/common_utils.sh +++ b/.ci/pytorch/common_utils.sh @@ -127,9 +127,9 @@ function install_torchaudio() { if [[ "$1" == "cuda" ]]; then # TODO: This is better to be passed as a parameter from _linux-test workflow # so that it can be consistent with what is set in build - TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${commit}" + TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install --no-use-pep517 "git+https://github.com/pytorch/audio.git@${commit}" else - pip_install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${commit}" + pip_install --no-use-pep517 "git+https://github.com/pytorch/audio.git@${commit}" fi } @@ -139,8 +139,8 @@ function install_torchtext() { local text_commit data_commit=$(get_pinned_commit data) text_commit=$(get_pinned_commit text) - pip_install --no-use-pep517 --user "git+https://github.com/pytorch/data.git@${data_commit}" - pip_install --no-use-pep517 --user "git+https://github.com/pytorch/text.git@${text_commit}" + pip_install --no-use-pep517 "git+https://github.com/pytorch/data.git@${data_commit}" + pip_install --no-use-pep517 "git+https://github.com/pytorch/text.git@${text_commit}" } function install_torchvision() { @@ -153,16 +153,19 @@ function install_torchvision() { echo 'char* dlerror(void) { return "";}'|gcc -fpic -shared -o "${HOME}/dlerror.so" -x c - LD_PRELOAD=${orig_preload}:${HOME}/dlerror.so fi - pip_install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${commit}" + pip_install --no-use-pep517 "git+https://github.com/pytorch/vision.git@${commit}" if [ -n "${LD_PRELOAD}" ]; then LD_PRELOAD=${orig_preload} fi } +<<<<<<< HEAD function install_tlparse() { pip_install --user "tlparse==0.3.25" PATH="$(python -m site --user-base)/bin:$PATH" } +======= +>>>>>>> 0bd4030892 ([release/2.7] Removing --user flag from all pip install commands (#2238)) function install_torchrec_and_fbgemm() { local torchrec_commit @@ -173,6 +176,7 @@ function install_torchrec_and_fbgemm() { pip_uninstall fbgemm-gpu-nightly pip_install setuptools-git-versioning scikit-build pyre-extensions +<<<<<<< HEAD # TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it # seems to be an sccache-related issue if [[ "$IS_A100_RUNNER" == "1" ]]; then @@ -187,6 +191,29 @@ function install_torchrec_and_fbgemm() { if [[ "$IS_A100_RUNNER" == "1" ]]; then export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache sudo mv /opt/cache/bin-backup /opt/cache/bin +======= + if [[ "$BUILD_ENVIRONMENT" == *rocm* ]] ; then + # install torchrec first because it installs fbgemm nightly on top of rocm fbgemm + pip_install --no-use-pep517 "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" + pip_uninstall fbgemm-gpu-nightly + + pip_install tabulate # needed for newer fbgemm + pip_install patchelf # needed for rocm fbgemm + git clone --recursive https://github.com/pytorch/fbgemm + pushd fbgemm/fbgemm_gpu + git checkout "${fbgemm_commit}" + python setup.py install \ + --package_variant=rocm \ + -DHIP_ROOT_DIR="${ROCM_PATH}" \ + -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \ + -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA" + popd + rm -rf fbgemm + else + # See https://github.com/pytorch/pytorch/issues/106971 + CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu" + pip_install --no-use-pep517 "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" +>>>>>>> 0bd4030892 ([release/2.7] Removing --user flag from all pip install commands (#2238)) fi } @@ -224,7 +251,7 @@ function checkout_install_torchbench() { function install_torchao() { local commit commit=$(get_pinned_commit torchao) - pip_install --no-use-pep517 --user "git+https://github.com/pytorch/ao.git@${commit}" + pip_install --no-use-pep517 "git+https://github.com/pytorch/ao.git@${commit}" } function print_sccache_stats() { diff --git a/.ci/pytorch/macos-test.sh b/.ci/pytorch/macos-test.sh index 95aad6e29b7d0..c2b1d98ae7fd8 100755 --- a/.ci/pytorch/macos-test.sh +++ b/.ci/pytorch/macos-test.sh @@ -274,8 +274,6 @@ test_timm_perf() { echo "timm benchmark on mps device completed" } -install_tlparse - if [[ $TEST_CONFIG == *"perf_all"* ]]; then test_torchbench_perf test_hf_perf diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh index f90344ba43052..f7080f14b110e 100755 --- a/.ci/pytorch/test.sh +++ b/.ci/pytorch/test.sh @@ -182,7 +182,7 @@ fi if [[ "$BUILD_ENVIRONMENT" != *-bazel-* ]] ; then # JIT C++ extensions require ninja. - pip_install --user "ninja==1.10.2" + pip_install "ninja==1.10.2" # ninja is installed in $HOME/.local/bin, e.g., /var/lib/jenkins/.local/bin for CI user jenkins # but this script should be runnable by any user, including root export PATH="$HOME/.local/bin:$PATH" @@ -193,8 +193,6 @@ if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then export VALGRIND=OFF fi -install_tlparse - # DANGER WILL ROBINSON. The LD_PRELOAD here could cause you problems # if you're not careful. Check this if you made some changes and the # ASAN test is not working @@ -424,7 +422,7 @@ DYNAMO_BENCHMARK_FLAGS=() pr_time_benchmarks() { - pip_install --user "fbscribelogger" + pip_install "fbscribelogger" TEST_REPORTS_DIR=$(pwd)/test/test-reports mkdir -p "$TEST_REPORTS_DIR" @@ -1312,8 +1310,8 @@ test_bazel() { test_benchmarks() { if [[ "$BUILD_ENVIRONMENT" == *cuda* && $TEST_CONFIG != *nogpu* ]]; then - pip_install --user "pytest-benchmark==3.2.3" - pip_install --user "requests" + pip_install "pytest-benchmark==3.2.3" + pip_install "requests" BENCHMARK_DATA="benchmarks/.data" mkdir -p ${BENCHMARK_DATA} pytest benchmarks/fastrnns/test_bench.py --benchmark-sort=Name --benchmark-json=${BENCHMARK_DATA}/fastrnns_default.json --fuser=default --executor=default From 761c92313045cd2b63c346c5e07b70392a97222d Mon Sep 17 00:00:00 2001 From: Ethan Wee Date: Thu, 19 Jun 2025 08:34:44 -0700 Subject: [PATCH 2/4] Merge conflict and move to requirements-ci --- .ci/pytorch/common_utils.sh | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh index 13dcc84cbb547..7abc5dc8362e9 100644 --- a/.ci/pytorch/common_utils.sh +++ b/.ci/pytorch/common_utils.sh @@ -159,14 +159,6 @@ function install_torchvision() { fi } -<<<<<<< HEAD -function install_tlparse() { - pip_install --user "tlparse==0.3.25" - PATH="$(python -m site --user-base)/bin:$PATH" -} -======= ->>>>>>> 0bd4030892 ([release/2.7] Removing --user flag from all pip install commands (#2238)) - function install_torchrec_and_fbgemm() { local torchrec_commit torchrec_commit=$(get_pinned_commit torchrec) From 9ebadcead6ab62f13a12d9d980997e85c6b70514 Mon Sep 17 00:00:00 2001 From: Ethan Wee Date: Thu, 19 Jun 2025 08:35:42 -0700 Subject: [PATCH 3/4] Fix merge --- .ci/pytorch/common_utils.sh | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/.ci/pytorch/common_utils.sh b/.ci/pytorch/common_utils.sh index 7abc5dc8362e9..5a4b141fdd129 100644 --- a/.ci/pytorch/common_utils.sh +++ b/.ci/pytorch/common_utils.sh @@ -168,7 +168,6 @@ function install_torchrec_and_fbgemm() { pip_uninstall fbgemm-gpu-nightly pip_install setuptools-git-versioning scikit-build pyre-extensions -<<<<<<< HEAD # TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it # seems to be an sccache-related issue if [[ "$IS_A100_RUNNER" == "1" ]]; then @@ -183,29 +182,6 @@ function install_torchrec_and_fbgemm() { if [[ "$IS_A100_RUNNER" == "1" ]]; then export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache sudo mv /opt/cache/bin-backup /opt/cache/bin -======= - if [[ "$BUILD_ENVIRONMENT" == *rocm* ]] ; then - # install torchrec first because it installs fbgemm nightly on top of rocm fbgemm - pip_install --no-use-pep517 "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" - pip_uninstall fbgemm-gpu-nightly - - pip_install tabulate # needed for newer fbgemm - pip_install patchelf # needed for rocm fbgemm - git clone --recursive https://github.com/pytorch/fbgemm - pushd fbgemm/fbgemm_gpu - git checkout "${fbgemm_commit}" - python setup.py install \ - --package_variant=rocm \ - -DHIP_ROOT_DIR="${ROCM_PATH}" \ - -DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \ - -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA" - popd - rm -rf fbgemm - else - # See https://github.com/pytorch/pytorch/issues/106971 - CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu" - pip_install --no-use-pep517 "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" ->>>>>>> 0bd4030892 ([release/2.7] Removing --user flag from all pip install commands (#2238)) fi } From 6fc9673c512c5f85e5acc84d05c2d5fbfd071a5b Mon Sep 17 00:00:00 2001 From: Ethan Wee Date: Thu, 19 Jun 2025 08:36:46 -0700 Subject: [PATCH 4/4] Update tlparse to mirror upstream --- .ci/docker/requirements-ci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index 38d0bf3776ed9..1b5253db5d95f 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -261,7 +261,7 @@ tb-nightly==2.13.0a20230426 #Pinned versions: #test that import: -tlparse==0.3.30 +tlparse==0.3.25 #Description: parse logs produced by torch.compile #Pinned versions: #test that import: dynamo/test_structured_trace.py