From 18b2961d52d82d7c556d16ef358766757570305b Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 14 Feb 2024 13:36:22 +0000 Subject: [PATCH 01/10] Add missing export that prevented any pyarrow tests from running --- ci/scripts/python_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh index 20ca3300c0538..7b803518494ee 100755 --- a/ci/scripts/python_test.sh +++ b/ci/scripts/python_test.sh @@ -52,6 +52,7 @@ fi : ${PYARROW_TEST_S3:=${ARROW_S3:-ON}} export PYARROW_TEST_ACERO +export PYARROW_TEST_AZURE export PYARROW_TEST_CUDA export PYARROW_TEST_DATASET export PYARROW_TEST_FLIGHT From d944abfb4f585ee71912fcd4e22f6b7c92a55167 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 14 Feb 2024 13:36:49 +0000 Subject: [PATCH 02/10] Enable pyarrow tests on sdist_test --- ci/scripts/python_sdist_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh index d3c6f0e6ade89..1cd1000aa3903 100755 --- a/ci/scripts/python_sdist_test.sh +++ b/ci/scripts/python_sdist_test.sh @@ -28,6 +28,7 @@ export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON} +export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} export PYARROW_WITH_S3=${ARROW_S3:-OFF} export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} From 9f0cd0b0345f430b684677f453c727e65ddbbd51 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 14 Feb 2024 13:37:11 +0000 Subject: [PATCH 03/10] Enable Azure on python macos builds --- .github/workflows/python.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 6e3797b29c21e..25d918bcc25aa 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -132,6 +132,7 @@ jobs: timeout-minutes: 60 env: ARROW_HOME: /usr/local + ARROW_AZURE: ON ARROW_DATASET: ON ARROW_FLIGHT: ON ARROW_GANDIVA: ON From b699483dbcfc4ca3f60c916a450526457c77e740 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 14 Feb 2024 13:37:43 +0000 Subject: [PATCH 04/10] Enable azure in conda builds and install dependencies --- ci/conda_env_cpp.txt | 5 +++++ ci/docker/conda-cpp.dockerfile | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index ef00f7cf4751c..753950e796f0f 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -16,6 +16,11 @@ # under the License. aws-sdk-cpp=1.11.68 +azure-core-cpp>=1.10.3 +azure-identity-cpp>=1.6.0 +azure-storage-blobs-cpp>=12.10.0 +azure-storage-common-cpp>=12.5.0 +azure-storage-files-datalake-cpp>=12.9.0 benchmark>=1.6.0 boost-cpp>=1.68.0 brotli diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index 7a54dcc86f8fa..5fa7a82fe3851 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -22,6 +22,16 @@ FROM ${repo}:${arch}-conda COPY ci/scripts/install_minio.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_minio.sh latest /opt/conda +# Azurite requires npm +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update -y -q && \ + apt-get install -y -q npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + # Unless overridden use Python 3.10 # Google GCS fails building with Python 3.11 at the moment. ARG python=3.10 @@ -50,6 +60,7 @@ COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin ENV ARROW_ACERO=ON \ + ARROW_AZURE=ON \ ARROW_BUILD_TESTS=ON \ ARROW_DATASET=ON \ ARROW_DEPENDENCY_SOURCE=CONDA \ From b8f6cb5b1a3dc39247491b38decde99fd22cb19c Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 14 Feb 2024 22:58:52 +0000 Subject: [PATCH 05/10] Install node from conda instead of apt --- ci/conda_env_cpp.txt | 1 + ci/docker/conda-cpp.dockerfile | 16 ++++++---------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 753950e796f0f..b8c792008a958 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -39,6 +39,7 @@ libutf8proc lz4-c make ninja +nodejs orc pkg-config python diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index 5fa7a82fe3851..bb17871e01613 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -22,16 +22,6 @@ FROM ${repo}:${arch}-conda COPY ci/scripts/install_minio.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_minio.sh latest /opt/conda -# Azurite requires npm -RUN export DEBIAN_FRONTEND=noninteractive && \ - apt-get update -y -q && \ - apt-get install -y -q npm \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_azurite.sh - # Unless overridden use Python 3.10 # Google GCS fails building with Python 3.11 at the moment. ARG python=3.10 @@ -52,6 +42,12 @@ RUN mamba install -q -y \ valgrind && \ mamba clean --all +# Ensure nvm and node are on path. npm is required to install azurite. +ENV PATH=/opt/conda/envs/arrow/bin:$PATH + +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + # We want to install the GCS testbench using the same Python binary that the Conda code will use. COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_gcs_testbench.sh default From 40f1db1b1f79076e646e804c3d02f48ff6cc82de Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Thu, 15 Feb 2024 08:56:48 +0000 Subject: [PATCH 06/10] Update comment --- ci/docker/conda-cpp.dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index bb17871e01613..2b96dc5ea1767 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -42,7 +42,8 @@ RUN mamba install -q -y \ valgrind && \ mamba clean --all -# Ensure nvm and node are on path. npm is required to install azurite. +# Ensure nvm, node and azurite are on path. npm and node are required to install azurite, which will then need to +# be on the path for the tests to run. ENV PATH=/opt/conda/envs/arrow/bin:$PATH COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ From e740c3a0ffbb4122c2d88928aa41ccfb40dd97e2 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Sat, 17 Feb 2024 16:46:24 +0000 Subject: [PATCH 07/10] Pin specific versions to avoid segfault --- ci/conda_env_cpp.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index b8c792008a958..f7e7c3ce1c1d7 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -16,11 +16,11 @@ # under the License. aws-sdk-cpp=1.11.68 -azure-core-cpp>=1.10.3 -azure-identity-cpp>=1.6.0 -azure-storage-blobs-cpp>=12.10.0 -azure-storage-common-cpp>=12.5.0 -azure-storage-files-datalake-cpp>=12.9.0 +azure-core-cpp=1.10.3 +azure-identity-cpp=1.6.0 +azure-storage-blobs-cpp=12.10.0 +azure-storage-common-cpp=12.5.0 +azure-storage-files-datalake-cpp=12.9.0 benchmark>=1.6.0 boost-cpp>=1.68.0 brotli From 92bd99acf69f77adf95d0f409e82ea198e6e0c8e Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Sun, 18 Feb 2024 00:46:58 +0000 Subject: [PATCH 08/10] Less restrictive pin and add a comment about the issue --- ci/conda_env_cpp.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index f7e7c3ce1c1d7..2e7b568fc53cd 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -16,11 +16,12 @@ # under the License. aws-sdk-cpp=1.11.68 -azure-core-cpp=1.10.3 -azure-identity-cpp=1.6.0 -azure-storage-blobs-cpp=12.10.0 -azure-storage-common-cpp=12.5.0 -azure-storage-files-datalake-cpp=12.9.0 +# There is a problem with the 1.11.0 conda release of azure-core-cpp https://github.com/conda-forge/admin-requests/pull/911 +azure-core-cpp>=1.10.3,<1.11.0 +azure-identity-cpp>=1.6.0 +azure-storage-blobs-cpp>=12.10.0 +azure-storage-common-cpp>=12.5.0 +azure-storage-files-datalake-cpp>=12.9.0 benchmark>=1.6.0 boost-cpp>=1.68.0 brotli From 7ed0910f8cf421db0b1ec39a377cc512c3f7a2b4 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Sun, 18 Feb 2024 20:42:18 +0000 Subject: [PATCH 09/10] Correct comment typo --- ci/docker/conda-cpp.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index 2b96dc5ea1767..dff1f2224809a 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -42,7 +42,7 @@ RUN mamba install -q -y \ valgrind && \ mamba clean --all -# Ensure nvm, node and azurite are on path. npm and node are required to install azurite, which will then need to +# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to # be on the path for the tests to run. ENV PATH=/opt/conda/envs/arrow/bin:$PATH From bccc2565adffe4aa0aec468dbd264bc382b65853 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 19 Feb 2024 00:14:04 +0000 Subject: [PATCH 10/10] Only run blob emulator --- cpp/src/arrow/filesystem/azurefs_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 42f38f1ed6ac7..e73ac8b9e4a81 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -188,7 +188,7 @@ class AzuriteEnv : public AzureEnvImpl { new AzuriteEnv("devstoreaccount1", "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/" "K1SZFPTOtr/KBHBeksoGMGw==")); - auto exe_path = bp::search_path("azurite"); + auto exe_path = bp::search_path("azurite-blob"); if (exe_path.empty()) { return Status::Invalid("Could not find Azurite emulator."); }