Skip to content

Commit

Permalink
get make test target to pass in spark lib
Browse files Browse the repository at this point in the history
Signed-off-by: David Wood <dawood@us.ibm.com>
  • Loading branch information
daw3rd committed May 23, 2024
1 parent d876967 commit 0a86341
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 10 deletions.
8 changes: 1 addition & 7 deletions data-processing-lib/spark/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,4 @@ image-spark:: Dockerfile
# pytest-forked was tried, but then we get SIGABRT in pytest when running the s3 tests, some of which are skipped..
test::
@# Help: Use the already-built virtual environment to run pytest on the test directory.
source venv/bin/activate; export PYTHONPATH=../src; cd test; $(PYTEST) data_processing_tests/data_access;
source venv/bin/activate; export PYTHONPATH=../src; cd test; $(PYTEST) data_processing_tests/transform;
source venv/bin/activate; export PYTHONPATH=../src; cd test; $(PYTEST) data_processing_tests/launch/pure_python/launcher_test.py;
source venv/bin/activate; export PYTHONPATH=../src; cd test; $(PYTEST) data_processing_tests/launch/pure_python/test_noop_launch.py;
source venv/bin/activate; export PYTHONPATH=../src; cd test; $(PYTEST) data_processing_tests/launch/ray/ray_util_test.py;
source venv/bin/activate; export PYTHONPATH=../src; cd test; $(PYTEST) data_processing_tests/launch/ray/launcher_test.py;
source venv/bin/activate; export PYTHONPATH=../src; cd test; $(PYTEST) data_processing_tests/launch/ray/test_noop_launch.py;
source venv/bin/activate; export PYTHONPATH=../src; cd test; $(PYTEST) data_processing_spark_tests/launch/spark/test_noop_launch.py;
14 changes: 14 additions & 0 deletions data-processing-lib/spark/config/spark_profile_kube.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
spark.app.name: ${APP_NAME}
spark.driver.memory: ${DRIVER_MEMORY}
spark.executor.instances: ${NUM_EXECUTORS}
spark.executor.memory: ${EXECUTOR_MEMORY}
spark.executor.cores: ${EXECUTOR_CORES}
spark.sql.shuffle.partitions: ${NUM_TASKS}
spark.task.cpus: ${TASK_CPUS}
spark.sql.legacy.parquet.nanosAsLong: true
spark.executor.decommission.forceKillTimeout: "10h"
# spark.sql.files.ignoreCorruptFiles: true
# configuration needed when running in kubernetes
spark.kubernetes.authenticate.driver.serviceAccountName: ${SERVICE_ACCOUNT}
spark.kubernetes.container.image: ${EXECUTOR_DOCKER_IMAGE}
spark.kubernetes.namespace: ${EXECUTOR_NAMESPACE}
8 changes: 8 additions & 0 deletions data-processing-lib/spark/config/spark_profile_local.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
spark.app.name: cma-spark-driver-test-local
spark.driver.memory: 1g
spark.executor.instances: 2
spark.executor.memory: 1g
spark.executor.cores: 1
spark.sql.shuffle.partitions: 2
spark.task.cpus: 1
spark.sql.legacy.parquet.nanosAsLong: true
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,13 @@ class TestRayNOOPTransform(AbstractTransformLauncherTest):
"""

def get_test_transform_fixtures(self) -> list[tuple]:
basedir = "../../../../test-data/data_processing/spark/noop/"
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), basedir))
proj_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../.."))
test_data_dir = os.path.join(proj_dir, "test-data/data_processing/spark/noop/")
config_path = os.path.join(proj_dir, "config/spark_profile_local.yml")
launcher = SparkTransformLauncher(NOOPSparkRuntimeConfiguration())
fixtures = [(launcher, {"noop_sleep_sec": 0}, basedir + "/input", basedir + "/expected")]
cli_params = {
"noop_sleep_sec": 0,
"spark_local_config_filepath": config_path,
}
fixtures = [(launcher, cli_params, test_data_dir + "/input", test_data_dir + "/expected")]
return fixtures

0 comments on commit 0a86341

Please sign in to comment.